Browse Source

Markdown.pl: refactor initialization and command line

Move one-time initialization into BEGIN blocks.

Avoid running qr(...) more than once on expressions that
do not change (actually Perl should mostly already do this).

Get rid of the kludgy check for command-line and move all
that code into a new _main function and call it only when
being run from the comamnd line.

This seems to have resulted in a very very very tiny speed
boost as well.

Signed-off-by: Kyle J. McKay <mackyle@gmail.com>
master
Kyle J. McKay 8 years ago
parent
commit
f46d48232a
  1. 235
      Markdown.pl

235
Markdown.pl

@ -26,8 +26,6 @@ All rights reserved.
*VERSION = \"1.0.4+" # Sun 05 Jun 2016+ *VERSION = \"1.0.4+" # Sun 05 Jun 2016+
} }
close(DATA) if fileno(DATA);
require Exporter; require Exporter;
use Digest::MD5 qw(md5); use Digest::MD5 qw(md5);
use File::Basename qw(basename); use File::Basename qw(basename);
@ -37,28 +35,57 @@ use Pod::Usage;
@EXPORT_OK = qw(Markdown); @EXPORT_OK = qw(Markdown);
$INC{__PACKAGE__.'.pm'} = $INC{basename(__FILE__)} unless exists $INC{__PACKAGE__.'.pm'}; $INC{__PACKAGE__.'.pm'} = $INC{basename(__FILE__)} unless exists $INC{__PACKAGE__.'.pm'};
close(DATA) if fileno(DATA);
exit(&_main(@ARGV)||0) unless caller;
## Disabled; causes problems under Perl 5.6.1: ## Disabled; causes problems under Perl 5.6.1:
# use utf8; # use utf8;
# binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html # binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
sub block_id;
# #
# Global default settings: # Global default settings:
# #
my $g_empty_element_suffix = " />"; # Change to ">" for HTML output my ($g_empty_element_suffix, $g_tab_width);
my $g_tab_width = 4; # Legacy even though it's wrong BEGIN {
$g_empty_element_suffix = " />"; # Change to ">" for HTML output
$g_tab_width = 4; # Legacy even though it's wrong
}
# #
# Globals: # Globals:
# #
# Permanent block id table
my %g_perm_block_ids;
# Global hashes, used by various utility routines
my %g_urls;
my %g_titles;
my %g_block_ids;
my %g_html_blocks;
my %opt;
# Return a "block id" to use to identify the block that does not contain
# any characters that could be misinterpreted by the rest of the code
# Originally this used md5_hex but that's unnecessarily slow
# Instead just use the refaddr of the scalar ref of the entry for that
# key in either the global or, if the optional second argument is true,
# permanent table. To avoid the result being confused with anything
# else, it's prefixed with a control character and suffixed with another
# both of which are not allowed by the XML standard or Unicode.
sub block_id {
$_[1] ?
"\2".refaddr(\$g_perm_block_ids{$_[0]})."\3" :
"\5".refaddr(\$g_block_ids{$_[0]})."\6";
}
# Regex to match balanced [brackets]. See Friedl's # Regex to match balanced [brackets]. See Friedl's
# "Mastering Regular Expressions", 2nd Ed., pp. 328-331. # "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
my $g_nested_brackets; my $g_nested_brackets;
$g_nested_brackets = qr{ BEGIN {
$g_nested_brackets = qr{
(?> # Atomic matching (?> # Atomic matching
[^\[\]]+ # Anything other than brackets [^\[\]]+ # Anything other than brackets
| |
@ -66,37 +93,41 @@ $g_nested_brackets = qr{
(??{ $g_nested_brackets }) # Recursive set of nested brackets (??{ $g_nested_brackets }) # Recursive set of nested brackets
\] \]
)* )*
}x; }ox
}
# Table of hash values for escaped characters: # Table of hash values for escaped characters:
my %g_escape_table; my %g_escape_table;
foreach my $char (split //, "\\\`*_~{}[]()>#+-.!") { BEGIN {
$g_escape_table{$char} = block_id($char,1); foreach my $char (split //, "\\\`*_~{}[]()>#+-.!") {
$g_escape_table{$char} = block_id($char,1);
}
} }
# Permanent block id table
my %g_perm_block_ids;
# Global hashes, used by various utility routines
my %g_urls;
my %g_titles;
my %g_block_ids;
my %g_html_blocks;
my %opt;
# Used to track when we're inside an ordered or unordered list # Used to track when we're inside an ordered or unordered list
# (see _ProcessListItems() for details): # (see _ProcessListItems() for details):
my $g_list_level = 0; my $g_list_level;
BEGIN {
$g_list_level = 0;
}
#### Blosxom plug-in interface ########################################## #### Blosxom plug-in interface ##########################################
my $_haveBX;
BEGIN {
no warnings 'once';
$_haveBX = defined($blosxom::version);
}
# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
# which posts Markdown should process, using a "meta-markup: markdown" # which posts Markdown should process, using a "meta-markup: markdown"
# header. If it's set to 0 (the default), Markdown will process all # header. If it's set to 0 (the default), Markdown will process all
# entries. # entries.
my $g_blosxom_use_meta = 0; my $g_blosxom_use_meta;
BEGIN {
$g_blosxom_use_meta = 0;
}
sub start { 1; } sub start { 1; }
sub story { sub story {
@ -112,15 +143,16 @@ sub story {
#### Movable Type plug-in interface ##################################### #### Movable Type plug-in interface #####################################
eval {require MT}; # Test to see if we're running in MT. my $_haveMT = eval {require MT; 1;}; # Test to see if we're running in MT
unless ($@) { my $_haveMT3 = $_haveMT && eval {require MT::Plugin; 1;}; # and MT >= MT 3.0.
unless ($_haveMT) {
require MT; require MT;
import MT; import MT;
require MT::Template::Context; require MT::Template::Context;
import MT::Template::Context; import MT::Template::Context;
eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. unless ($_haveMT3) {
unless ($@) {
require MT::Plugin; require MT::Plugin;
import MT::Plugin; import MT::Plugin;
my $plugin = new MT::Plugin({ my $plugin = new MT::Plugin({
@ -203,88 +235,71 @@ unless ($@) {
}); });
} }
} }
elsif (!caller) {
#### BBEdit/command-line text filter interface ##########################
# Needs to be hidden from MT (and Blosxom when running in static mode).
# We're only using $blosxom::version once; tell Perl not to warn us: #### BBEdit/command-line text filter interface ##########################
no warnings 'once'; sub _main {
unless ( defined($blosxom::version) ) { local *ARGV = \@_;
use warnings;
#### Check for command-line switches: ################# #### Check for command-line switches: #################
my %options = (); my %options = ();
my %cli_opts; my %cli_opts;
use Getopt::Long; use Getopt::Long;
Getopt::Long::Configure(qw(bundling require_order pass_through)); Getopt::Long::Configure(qw(bundling require_order pass_through));
GetOptions(\%cli_opts, GetOptions(\%cli_opts,
'help','h', 'help','h',
'version|V', 'version|V',
'shortversion|short-version|s', 'shortversion|short-version|s',
'html4tags', 'html4tags',
'htmlroot|r=s', 'htmlroot|r=s',
'imageroot|i=s', 'imageroot|i=s',
); );
if ($cli_opts{'help'}) { if ($cli_opts{'help'}) {
pod2usage(-verbose => 2, -exitval => 0); pod2usage(-verbose => 2, -exitval => 0);
} }
if ($cli_opts{'h'}) { if ($cli_opts{'h'}) {
pod2usage(-verbose => 0, -exitval => 0); pod2usage(-verbose => 0, -exitval => 0);
} }
if ($cli_opts{'version'}) { # Version info if ($cli_opts{'version'}) { # Version info
print "\nThis is Markdown, version $VERSION.\n", $COPYRIGHT; print "\nThis is Markdown, version $VERSION.\n", $COPYRIGHT;
print "License is Modified BSD (aka 3-clause BSD) License\n"; print "License is Modified BSD (aka 3-clause BSD) License\n";
print "<https://opensource.org/licenses/BSD-3-Clause>\n"; print "<https://opensource.org/licenses/BSD-3-Clause>\n";
exit 0; exit 0;
} }
if ($cli_opts{'shortversion'}) { # Just the version number string. if ($cli_opts{'shortversion'}) { # Just the version number string.
print $VERSION; print $VERSION;
exit 0; exit 0;
} }
if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML
$options{empty_element_suffix} = ">"; $options{empty_element_suffix} = ">";
} }
if ($cli_opts{'htmlroot'}) { # Use URL prefix if ($cli_opts{'htmlroot'}) { # Use URL prefix
$options{url_prefix} = $cli_opts{'htmlroot'}; $options{url_prefix} = $cli_opts{'htmlroot'};
} }
if ($cli_opts{'imageroot'}) { # Use image URL prefix if ($cli_opts{'imageroot'}) { # Use image URL prefix
$options{img_prefix} = $cli_opts{'imageroot'}; $options{img_prefix} = $cli_opts{'imageroot'};
} }
#### Process incoming text: ########################### #### Process incoming text: ###########################
for (;;) { for (;;) {
local $_; local $_;
{ {
local $/; # Slurp the whole file local $/; # Slurp the whole file
$_ = <>; $_ = <>;
}
defined($_) or last;
print Markdown($_, \%options);
} }
exit 0; defined($_) or last;
print Markdown($_, \%options);
} }
}
# Return a "block id" to use to identify the block that does not contain exit 0;
# any characters that could be misinterpreted by the rest of the code
# Originally this used md5_hex but that's unnecessarily slow
# Instead just use the refaddr of the scalar ref of the entry for that
# key in either the global or, if the optional second argument is true,
# permanent table. To avoid the result being confused with anything
# else, it's prefixed with a control character and suffixed with another
# both of which are not allowed by the XML standard or Unicode.
sub block_id {
$_[1] ?
"\2".refaddr(\$g_perm_block_ids{$_[0]})."\3" :
"\5".refaddr(\$g_block_ids{$_[0]})."\6";
} }
sub Markdown { sub Markdown {
# #
# Main function. The order in which other subs are called here is # Primary function. The order in which other subs are called here is
# essential. Link and image substitutions need to happen before # essential. Link and image substitutions need to happen before
# _EscapeSpecialChars(), so that any *'s or _'s in the <a> # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
# and <img> tags get encoded. # and <img> tags get encoded.
@ -426,6 +441,11 @@ sub _StripLinkDefinitions {
return $text; return $text;
} }
my ($block_tags_a, $block_tags_b);
BEGIN {
$block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/o;
$block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/o;
}
sub _HashHTMLBlocks { sub _HashHTMLBlocks {
my $text = shift; my $text = shift;
@ -437,8 +457,6 @@ sub _HashHTMLBlocks {
# "paragraphs" that are wrapped in non-block-level tags, such as anchors, # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
# phrase emphasis, and spans. The list of tags we're looking for is # phrase emphasis, and spans. The list of tags we're looking for is
# hard-coded: # hard-coded:
my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/;
my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/;
# First, look for nested blocks, e.g.: # First, look for nested blocks, e.g.:
# <div> # <div>
@ -882,6 +900,14 @@ sub _DoHeaders {
} }
my ($marker_ul, $marker_ol, $marker_any);
BEGIN {
# Re-usable patterns to match list item bullets and number markers:
$marker_ul = qr/[*+-]/o;
$marker_ol = qr/\d+[.]/o;
$marker_any = qr/(?:$marker_ul|$marker_ol)/o;
}
sub _DoLists { sub _DoLists {
# #
# Form HTML ordered (numbered) and unordered (bulleted) lists. # Form HTML ordered (numbered) and unordered (bulleted) lists.
@ -889,11 +915,6 @@ sub _DoLists {
my $text = shift; my $text = shift;
my $less_than_tab = $opt{tab_width} - 1; my $less_than_tab = $opt{tab_width} - 1;
# Re-usable patterns to match list item bullets and number markers:
my $marker_ul = qr/[*+-]/;
my $marker_ol = qr/\d+[.]/;
my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
# Re-usable pattern to match any entirel ul or ol list: # Re-usable pattern to match any entirel ul or ol list:
my $whole_list = qr{ my $whole_list = qr{
( # $1 = whole list ( # $1 = whole list
@ -1132,13 +1153,7 @@ sub _EncodeCode {
# Encode $'s, but only if we're running under Blosxom. # Encode $'s, but only if we're running under Blosxom.
# (Blosxom interpolates Perl variables in article bodies.) # (Blosxom interpolates Perl variables in article bodies.)
{ s/\$/&#036;/g if $_haveBX;
no warnings 'once';
if (defined($blosxom::version)) {
s/\$/&#036;/g;
}
}
# Do the angle bracket song and dance: # Do the angle bracket song and dance:
s! < !&lt;!gx; s! < !&lt;!gx;
@ -1396,7 +1411,7 @@ sub _TokenizeHTML {
my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth); my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth);
my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment
(?s: <\? .*? \?> ) | # processing instruction (?s: <\? .*? \?> ) | # processing instruction
$nested_tags/ix; # nested tags $nested_tags/iox; # nested tags
while ($str =~ m/($match)/g) { while ($str =~ m/($match)/g) {
my $whole_tag = $1; my $whole_tag = $1;

Loading…
Cancel
Save