diff --git a/Markdown.pl b/Markdown.pl index fc0b23e..9d8ee9a 100755 --- a/Markdown.pl +++ b/Markdown.pl @@ -29,8 +29,9 @@ All rights reserved. close(DATA) if fileno(DATA); require Exporter; -use Digest::MD5 qw(md5 md5_hex); +use Digest::MD5 qw(md5); use File::Basename qw(basename); +use Scalar::Util qw(refaddr); use Pod::Usage; @ISA = qw(Exporter); @EXPORT_OK = qw(Markdown); @@ -41,6 +42,7 @@ $INC{__PACKAGE__.'.pm'} = $INC{basename(__FILE__)} unless exists $INC{__PACKAGE_ # use utf8; # binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html +sub block_id; # # Global default settings: @@ -70,13 +72,16 @@ $g_nested_brackets = qr{ # Table of hash values for escaped characters: my %g_escape_table; foreach my $char (split //, "\\\`*_~{}[]()>#+-.!") { - $g_escape_table{$char} = md5_hex($char); + $g_escape_table{$char} = block_id($char,1); } +# Permanent block id table +my %g_perm_block_ids; # Global hashes, used by various utility routines my %g_urls; my %g_titles; +my %g_block_ids; my %g_html_blocks; my %opt; @@ -262,6 +267,20 @@ elsif (!caller) { } +# Return a "block id" to use to identify the block that does not contain +# any characters that could be misinterpreted by the rest of the code +# Originally this used md5_hex but that's unnecessarily slow +# Instead just use the refaddr of the scalar ref of the entry for that +# key in either the global or, if the optional second argument is true, +# permanent table. To avoid the result being confused with anything +# else, it's prefixed with a control character and suffixed with another +# both of which are not allowed by the XML standard or Unicode. +sub block_id { + $_[1] ? + "\2".refaddr(\$g_perm_block_ids{$_[0]})."\3" : + "\5".refaddr(\$g_block_ids{$_[0]})."\6"; +} + sub Markdown { # @@ -298,6 +317,7 @@ sub Markdown { # articles): %g_urls = (); %g_titles = (); + %g_block_ids = (); %g_html_blocks = (); $g_list_level = 0; @@ -359,7 +379,7 @@ sub _HashBTCodeBlocks { $codeblock = _EncodeCode($codeblock); # or run highlighter here $codeblock = "
" . $codeblock . "\n
";
- my $key = md5_hex($codeblock);
+ my $key = block_id($codeblock);
$g_html_blocks{$key} = $codeblock;
"\n\n" . $key . "\n\n";
}egmx;
@@ -442,7 +462,7 @@ sub _HashHTMLBlocks {
(?=\n+|\Z) # followed by a newline or end of document
)
}{
- my $key = md5_hex($1);
+ my $key = block_id($1);
$g_html_blocks{$key} = $1;
"\n\n" . $key . "\n\n";
}egmx;
@@ -462,7 +482,7 @@ sub _HashHTMLBlocks {
(?=\n+|\Z) # followed by a newline or end of document
)
}{
- my $key = md5_hex($1);
+ my $key = block_id($1);
$g_html_blocks{$key} = $1;
"\n\n" . $key . "\n\n";
}egmx;
@@ -484,7 +504,7 @@ sub _HashHTMLBlocks {
(?=\n{2,}|\Z) # followed by a blank line or end of document
)
}{
- my $key = md5_hex($1);
+ my $key = block_id($1);
$g_html_blocks{$key} = $1;
"\n\n" . $key . "\n\n";
}egx;
@@ -507,7 +527,7 @@ sub _HashHTMLBlocks {
(?=\n{2,}|\Z) # followed by a blank line or end of document
)
}{
- my $key = md5_hex($1);
+ my $key = block_id($1);
$g_html_blocks{$key} = $1;
"\n\n" . $key . "\n\n";
}egx;
@@ -527,9 +547,9 @@ sub _RunBlockGamut {
$text = _DoHeaders($text);
# Do Horizontal Rules:
- $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n