Markdown.pl: refactor initialization and command line

Move one-time initialization into BEGIN blocks. Avoid running qr(...) more than once on expressions that do not change (actually Perl should mostly already do this). Get rid of the kludgy check for command-line and move all that code into a new _main function and call it only when being run from the comamnd line. This seems to have resulted in a very very very tiny speed boost as well. Signed-off-by: Kyle J. McKay <mackyle@gmail.com>
9 years ago · f46d48232a
1 changed files with 125 additions and 110 deletions
--- a/Markdown.pl
+++ b/Markdown.pl
@ -26,8 +26,6 @@ All rights reserved.
 *VERSION = \"1.0.4+" # Sun 05 Jun 2016+
 }
 close(DATA) if fileno(DATA);
 require Exporter;
 use Digest::MD5 qw(md5);
 use File::Basename qw(basename);
@ -37,27 +35,56 @@ use Pod::Usage;
@EXPORT_OK = qw(Markdown);
 $INC{__PACKAGE__.'.pm'} = $INC{basename(__FILE__)} unless exists $INC{__PACKAGE__.'.pm'};
 close(DATA) if fileno(DATA);
 exit(&_main(@ARGV)||0) unless caller;
 ## Disabled; causes problems under Perl 5.6.1:
 # use utf8;
 # binmode( STDOUT, ":utf8" );  # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
 sub block_id;
 #
 # Global default settings:
 #
-my $g_empty_element_suffix = " />";	# Change to ">" for HTML output
+my ($g_empty_element_suffix, $g_tab_width);
-my $g_tab_width = 4;			# Legacy even though it's wrong
+BEGIN {
    $g_empty_element_suffix = " />";	# Change to ">" for HTML output
    $g_tab_width = 4;			# Legacy even though it's wrong
 }
 #
 # Globals:
 #
 # Permanent block id table
 my %g_perm_block_ids;
 # Global hashes, used by various utility routines
 my %g_urls;
 my %g_titles;
 my %g_block_ids;
 my %g_html_blocks;
 my %opt;
 # Return a "block id" to use to identify the block that does not contain
 # any characters that could be misinterpreted by the rest of the code
 # Originally this used md5_hex but that's unnecessarily slow
 # Instead just use the refaddr of the scalar ref of the entry for that
 # key in either the global or, if the optional second argument is true,
 # permanent table.  To avoid the result being confused with anything
 # else, it's prefixed with a control character and suffixed with another
 # both of which are not allowed by the XML standard or Unicode.
 sub block_id {
    $_[1] ?
    "\2".refaddr(\$g_perm_block_ids{$_[0]})."\3" :
    "\5".refaddr(\$g_block_ids{$_[0]})."\6";
 }
 # Regex to match balanced [brackets]. See Friedl's
 # "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
 my $g_nested_brackets;
 BEGIN {
    $g_nested_brackets = qr{
    (?>					# Atomic matching
 	[^\[\]]+			# Anything other than brackets
@ -66,37 +93,41 @@ $g_nested_brackets = qr{
 	    (??{ $g_nested_brackets })	# Recursive set of nested brackets
 	\]
    )*
-}x;
+    }ox
 }
 # Table of hash values for escaped characters:
 my %g_escape_table;
 BEGIN {
    foreach my $char (split //, "\\\`*_~{}[]()>#+-.!") {
 	$g_escape_table{$char} = block_id($char,1);
    }
-
+}
 # Permanent block id table
 my %g_perm_block_ids;
 # Global hashes, used by various utility routines
 my %g_urls;
 my %g_titles;
 my %g_block_ids;
 my %g_html_blocks;
 my %opt;
 # Used to track when we're inside an ordered or unordered list
 # (see _ProcessListItems() for details):
-my $g_list_level = 0;
+my $g_list_level;
 BEGIN {
    $g_list_level = 0;
 }
 #### Blosxom plug-in interface ##########################################
 my $_haveBX;
 BEGIN {
    no warnings 'once';
    $_haveBX = defined($blosxom::version);
 }
 # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
 # which posts Markdown should process, using a "meta-markup: markdown"
 # header. If it's set to 0 (the default), Markdown will process all
 # entries.
-my $g_blosxom_use_meta = 0;
+my $g_blosxom_use_meta;
 BEGIN {
    $g_blosxom_use_meta = 0;
 }
 sub start { 1; }
 sub story {
@ -112,15 +143,16 @@ sub story {
 #### Movable Type plug-in interface #####################################
-eval {require MT};  # Test to see if we're running in MT.
+my $_haveMT = eval {require MT; 1;}; # Test to see if we're running in MT
-unless ($@) {
+my $_haveMT3 = $_haveMT && eval {require MT::Plugin; 1;}; # and MT >= MT 3.0.
 unless ($_haveMT) {
    require MT;
    import  MT;
    require MT::Template::Context;
    import  MT::Template::Context;
-    eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0.
+    unless ($_haveMT3) {
    unless ($@) {
 	require MT::Plugin;
 	import  MT::Plugin;
 	my $plugin = new MT::Plugin({
@ -203,14 +235,11 @@ unless ($@) {
 	});
    }
 }
-elsif (!caller) {
+
 #### BBEdit/command-line text filter interface ##########################
-# Needs to be hidden from MT (and Blosxom when running in static mode).
+sub _main {
    local *ARGV = \@_;
    # We're only using $blosxom::version once; tell Perl not to warn us:
    no warnings 'once';
    unless ( defined($blosxom::version) ) {
 	use warnings;
    #### Check for command-line switches: #################
    my %options = ();
@ -262,29 +291,15 @@ elsif (!caller) {
 	defined($_) or last;
 	print Markdown($_, \%options);
    }
 	exit 0;
    }
 }
-# Return a "block id" to use to identify the block that does not contain
+    exit 0;
 # any characters that could be misinterpreted by the rest of the code
 # Originally this used md5_hex but that's unnecessarily slow
 # Instead just use the refaddr of the scalar ref of the entry for that
 # key in either the global or, if the optional second argument is true,
 # permanent table.  To avoid the result being confused with anything
 # else, it's prefixed with a control character and suffixed with another
 # both of which are not allowed by the XML standard or Unicode.
 sub block_id {
    $_[1] ?
    "\2".refaddr(\$g_perm_block_ids{$_[0]})."\3" :
    "\5".refaddr(\$g_block_ids{$_[0]})."\6";
 }
 sub Markdown {
 #
-# Main function. The order in which other subs are called here is
+# Primary function. The order in which other subs are called here is
 # essential. Link and image substitutions need to happen before
 # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
 # and <img> tags get encoded.
@ -426,6 +441,11 @@ sub _StripLinkDefinitions {
    return $text;
 }
 my ($block_tags_a, $block_tags_b);
 BEGIN {
    $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/o;
    $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/o;
 }
 sub _HashHTMLBlocks {
    my $text = shift;
@ -437,8 +457,6 @@ sub _HashHTMLBlocks {
    # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
    # phrase emphasis, and spans. The list of tags we're looking for is
    # hard-coded:
    my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/;
    my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/;
    # First, look for nested blocks, e.g.:
    #   <div>
@ -882,6 +900,14 @@ sub _DoHeaders {
 }
 my ($marker_ul, $marker_ol, $marker_any);
 BEGIN {
    # Re-usable patterns to match list item bullets and number markers:
    $marker_ul  = qr/[*+-]/o;
    $marker_ol  = qr/\d+[.]/o;
    $marker_any = qr/(?:$marker_ul|$marker_ol)/o;
 }
 sub _DoLists {
 #
 # Form HTML ordered (numbered) and unordered (bulleted) lists.
@ -889,11 +915,6 @@ sub _DoLists {
    my $text = shift;
    my $less_than_tab = $opt{tab_width} - 1;
    # Re-usable patterns to match list item bullets and number markers:
    my $marker_ul  = qr/[*+-]/;
    my $marker_ol  = qr/\d+[.]/;
    my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
    # Re-usable pattern to match any entirel ul or ol list:
    my $whole_list = qr{
 	(			    # $1 = whole list
@ -1132,13 +1153,7 @@ sub _EncodeCode {
    # Encode $'s, but only if we're running under Blosxom.
    # (Blosxom interpolates Perl variables in article bodies.)
-    {
+    s/\$/&#036;/g if $_haveBX;
 	no warnings 'once';
    if (defined($blosxom::version)) {
 	s/\$/&#036;/g;
    }
    }
    # Do the angle bracket song and dance:
    s! <  !&lt;!gx;
@ -1396,7 +1411,7 @@ sub _TokenizeHTML {
    my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth);
    my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment
 		   (?s: <\? .*? \?> ) |		    # processing instruction
-		   $nested_tags/ix;		    # nested tags
+		   $nested_tags/iox;		    # nested tags
    while ($str =~ m/($match)/g) {
 	my $whole_tag = $1;