From 2d4f04ac8ccdbbff6cea435d148b8c4a4e3f835a Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Thu, 3 Sep 2015 02:19:22 -0700 Subject: [PATCH] Markdown.pl: fix tab setting A tab indents 8 spaces. Period. The coding style for this file is an indent of 4. Correct the assume-tabs-are-broken-and-only-4-spaces problem. Signed-off-by: Kyle J. McKay --- Markdown.pl | 1926 +++++++++++++++++++++++++-------------------------- 1 file changed, 963 insertions(+), 963 deletions(-) diff --git a/Markdown.pl b/Markdown.pl index a9ce2e2..4931851 100755 --- a/Markdown.pl +++ b/Markdown.pl @@ -3,8 +3,8 @@ # # Markdown -- A text-to-HTML conversion tool for web writers # -# Copyright (c) 2004 John Gruber -# +# Copyright (C) 2004 John Gruber +# Copyright (C) 2015 Kyle J. McKay # @@ -26,7 +26,7 @@ $VERSION = '1.0.1'; # # Global default settings: # -my $g_empty_element_suffix = " />"; # Change to ">" for HTML output +my $g_empty_element_suffix = " />"; # Change to ">" for HTML output my $g_tab_width = 4; @@ -38,20 +38,20 @@ my $g_tab_width = 4; # "Mastering Regular Expressions", 2nd Ed., pp. 328-331. my $g_nested_brackets; $g_nested_brackets = qr{ - (?> # Atomic matching - [^\[\]]+ # Anything other than brackets - | - \[ - (??{ $g_nested_brackets }) # Recursive set of nested brackets - \] - )* + (?> # Atomic matching + [^\[\]]+ # Anything other than brackets + | + \[ + (??{ $g_nested_brackets }) # Recursive set of nested brackets + \] + )* }x; # Table of hash values for escaped characters: my %g_escape_table; foreach my $char (split //, '\\`*_{}[]()>#+-.!~') { - $g_escape_table{$char} = md5_hex($char); + $g_escape_table{$char} = md5_hex($char); } @@ -75,12 +75,12 @@ my $g_blosxom_use_meta = 0; sub start { 1; } sub story { - my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; + my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; - if ( (! $g_blosxom_use_meta) or - (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) - ){ - $$body_ref = Markdown($$body_ref); + if ( (! $g_blosxom_use_meta) or + (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) + ){ + $$body_ref = Markdown($$body_ref); } 1; } @@ -94,135 +94,135 @@ unless ($@) { require MT::Template::Context; import MT::Template::Context; - eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. - unless ($@) { - require MT::Plugin; - import MT::Plugin; - my $plugin = new MT::Plugin({ - name => "Markdown", - description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)", - doc_link => 'http://daringfireball.net/projects/markdown/' - }); - MT->add_plugin( $plugin ); - } - - MT::Template::Context->add_container_tag(MarkdownOptions => sub { - my $ctx = shift; - my $args = shift; - my $builder = $ctx->stash('builder'); - my $tokens = $ctx->stash('tokens'); - - if (defined ($args->{'output'}) ) { - $ctx->stash('markdown_output', lc $args->{'output'}); - } - - defined (my $str = $builder->build($ctx, $tokens) ) - or return $ctx->error($builder->errstr); - $str; # return value - }); - - MT->add_text_filter('markdown' => { - label => 'Markdown', - docs => 'http://daringfireball.net/projects/markdown/', - on_format => sub { - my $text = shift; - my $ctx = shift; - my $raw = 0; - if (defined $ctx) { - my $output = $ctx->stash('markdown_output'); - if (defined $output && $output =~ m/^html/i) { - $g_empty_element_suffix = ">"; - $ctx->stash('markdown_output', ''); - } - elsif (defined $output && $output eq 'raw') { - $raw = 1; - $ctx->stash('markdown_output', ''); - } - else { - $raw = 0; - $g_empty_element_suffix = " />"; - } - } - $text = $raw ? $text : Markdown($text); - $text; - }, + eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. + unless ($@) { + require MT::Plugin; + import MT::Plugin; + my $plugin = new MT::Plugin({ + name => "Markdown", + description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)", + doc_link => 'http://daringfireball.net/projects/markdown/' }); + MT->add_plugin( $plugin ); + } - # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: - my $smartypants; + MT::Template::Context->add_container_tag(MarkdownOptions => sub { + my $ctx = shift; + my $args = shift; + my $builder = $ctx->stash('builder'); + my $tokens = $ctx->stash('tokens'); - { - no warnings "once"; - $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; + if (defined ($args->{'output'}) ) { + $ctx->stash('markdown_output', lc $args->{'output'}); } - if ($smartypants) { - MT->add_text_filter('markdown_with_smartypants' => { - label => 'Markdown With SmartyPants', - docs => 'http://daringfireball.net/projects/markdown/', - on_format => sub { - my $text = shift; - my $ctx = shift; - if (defined $ctx) { - my $output = $ctx->stash('markdown_output'); - if (defined $output && $output eq 'html') { - $g_empty_element_suffix = ">"; - } - else { - $g_empty_element_suffix = " />"; - } - } - $text = Markdown($text); - $text = $smartypants->($text, '1'); - }, - }); - } + defined (my $str = $builder->build($ctx, $tokens) ) + or return $ctx->error($builder->errstr); + $str; # return value + }); + + MT->add_text_filter('markdown' => { + label => 'Markdown', + docs => 'http://daringfireball.net/projects/markdown/', + on_format => sub { + my $text = shift; + my $ctx = shift; + my $raw = 0; + if (defined $ctx) { + my $output = $ctx->stash('markdown_output'); + if (defined $output && $output =~ m/^html/i) { + $g_empty_element_suffix = ">"; + $ctx->stash('markdown_output', ''); + } + elsif (defined $output && $output eq 'raw') { + $raw = 1; + $ctx->stash('markdown_output', ''); + } + else { + $raw = 0; + $g_empty_element_suffix = " />"; + } + } + $text = $raw ? $text : Markdown($text); + $text; + }, + }); + + # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: + my $smartypants; + + { + no warnings "once"; + $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; + } + + if ($smartypants) { + MT->add_text_filter('markdown_with_smartypants' => { + label => 'Markdown With SmartyPants', + docs => 'http://daringfireball.net/projects/markdown/', + on_format => sub { + my $text = shift; + my $ctx = shift; + if (defined $ctx) { + my $output = $ctx->stash('markdown_output'); + if (defined $output && $output eq 'html') { + $g_empty_element_suffix = ">"; + } + else { + $g_empty_element_suffix = " />"; + } + } + $text = Markdown($text); + $text = $smartypants->($text, '1'); + }, + }); + } } else { #### BBEdit/command-line text filter interface ########################## # Needs to be hidden from MT (and Blosxom when running in static mode). # We're only using $blosxom::version once; tell Perl not to warn us: - no warnings 'once'; + no warnings 'once'; unless ( defined($blosxom::version) ) { - use warnings; - - #### Check for command-line switches: ################# - my %cli_opts; - use Getopt::Long; - Getopt::Long::Configure('pass_through'); - GetOptions(\%cli_opts, - 'h', - 'help', - 'version', - 'shortversion', - 'html4tags', - ); - if ($cli_opts{'help'} || $cli_opts{'h'}) { - exec 'perldoc', $0; - } - if ($cli_opts{'version'}) { # Version info - print "\nThis is Markdown, version $VERSION.\n"; - print "Copyright (C) 2004 John Gruber\n"; - print "Copyright (C) 2015 Kyle J. McKay\n"; - exit 0; - } - if ($cli_opts{'shortversion'}) { # Just the version number string. - print $VERSION; - exit 0; - } - if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML - $g_empty_element_suffix = ">"; - } + use warnings; + + #### Check for command-line switches: ################# + my %cli_opts; + use Getopt::Long; + Getopt::Long::Configure('pass_through'); + GetOptions(\%cli_opts, + 'h', + 'help', + 'version', + 'shortversion', + 'html4tags', + ); + if ($cli_opts{'help'} || $cli_opts{'h'}) { + exec 'perldoc', $0; + } + if ($cli_opts{'version'}) { # Version info + print "\nThis is Markdown, version $VERSION.\n"; + print "Copyright (C) 2004 John Gruber\n"; + print "Copyright (C) 2015 Kyle J. McKay\n"; + exit 0; + } + if ($cli_opts{'shortversion'}) { # Just the version number string. + print $VERSION; + exit 0; + } + if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML + $g_empty_element_suffix = ">"; + } - #### Process incoming text: ########################### - my $text; - { - local $/; # Slurp the whole file - $text = <>; - } - print Markdown($text); + #### Process incoming text: ########################### + my $text; + { + local $/; # Slurp the whole file + $text = <>; + } + print Markdown($text); } } @@ -235,44 +235,44 @@ sub Markdown { # _EscapeSpecialChars(), so that any *'s or _'s in the # and tags get encoded. # - my $text = shift; + my $text = shift; - # Clear the global hashes. If we don't clear these, you get conflicts - # from other articles when generating a page which contains more than - # one article (e.g. an index page that shows the N most recent - # articles): - %g_urls = (); - %g_titles = (); - %g_html_blocks = (); + # Clear the global hashes. If we don't clear these, you get conflicts + # from other articles when generating a page which contains more than + # one article (e.g. an index page that shows the N most recent + # articles): + %g_urls = (); + %g_titles = (); + %g_html_blocks = (); - # Standardize line endings: - $text =~ s{\r\n}{\n}g; # DOS to Unix - $text =~ s{\r}{\n}g; # Mac to Unix + # Standardize line endings: + $text =~ s{\r\n}{\n}g; # DOS to Unix + $text =~ s{\r}{\n}g; # Mac to Unix - # Make sure $text ends with a couple of newlines: - $text .= "\n\n"; + # Make sure $text ends with a couple of newlines: + $text .= "\n\n"; - # Convert all tabs to spaces. - $text = _Detab($text); + # Convert all tabs to spaces. + $text = _Detab($text); - # Strip any lines consisting only of spaces and tabs. - # This makes subsequent regexen easier to write, because we can - # match consecutive blank lines with /\n+/ instead of something - # contorted like /[ \t]*\n+/ . - $text =~ s/^[ \t]+$//mg; + # Strip any lines consisting only of spaces and tabs. + # This makes subsequent regexen easier to write, because we can + # match consecutive blank lines with /\n+/ instead of something + # contorted like /[ \t]*\n+/ . + $text =~ s/^[ \t]+$//mg; - # Turn block-level HTML blocks into hash entries - $text = _HashHTMLBlocks($text); + # Turn block-level HTML blocks into hash entries + $text = _HashHTMLBlocks($text); - # Strip link definitions, store in hashes. - $text = _StripLinkDefinitions($text); + # Strip link definitions, store in hashes. + $text = _StripLinkDefinitions($text); - $text = _RunBlockGamut($text); + $text = _RunBlockGamut($text); - $text = _UnescapeSpecialChars($text); + $text = _UnescapeSpecialChars($text); - return $text . "\n"; + return $text . "\n"; } @@ -281,147 +281,147 @@ sub _StripLinkDefinitions { # Strips link definitions from text, stores the URLs and titles in # hash references. # - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Link defs are in the form: ^[id]: url "optional title" - while ($text =~ s{ - ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1 - [ \t]* - \n? # maybe *one* newline - [ \t]* - ? # url = $2 - [ \t]* - \n? # maybe one newline - [ \t]* - (?: - (?<=\s) # lookbehind for whitespace - ["(] - (.+?) # title = $3 - [")] - [ \t]* - )? # title is optional - (?:\n+|\Z) - } - {}mx) { - $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive - if ($3) { - $g_titles{lc $1} = $3; - $g_titles{lc $1} =~ s/"/"/g; - } + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Link defs are in the form: ^[id]: url "optional title" + while ($text =~ s{ + ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1 + [ \t]* + \n? # maybe *one* newline + [ \t]* + ? # url = $2 + [ \t]* + \n? # maybe one newline + [ \t]* + (?: + (?<=\s) # lookbehind for whitespace + ["(] + (.+?) # title = $3 + [")] + [ \t]* + )? # title is optional + (?:\n+|\Z) + } + {}mx) { + $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive + if ($3) { + $g_titles{lc $1} = $3; + $g_titles{lc $1} =~ s/"/"/g; } + } - return $text; + return $text; } sub _HashHTMLBlocks { - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Hashify HTML blocks: - # We only want to do this for block-level HTML tags, such as headers, - # lists, and tables. That's because we still want to wrap

s around - # "paragraphs" that are wrapped in non-block-level tags, such as anchors, - # phrase emphasis, and spans. The list of tags we're looking for is - # hard-coded: - my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; - my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; - - # First, look for nested blocks, e.g.: - #

- #
- # tags for inner block must be indented. - #
- #
- # - # The outermost tags must start at the left margin for this to match, and - # the inner nested divs must be indented. - # We need to do this before the next, more liberal match, because the next - # match will start at the first `
` and stop at the first `
`. - $text =~ s{ - ( # save in $1 - ^ # start of line (with /m) - <($block_tags_a) # start tag = $2 - \b # word break - (.*\n)*? # any number of lines, minimally matching - # the matching end tag - [ \t]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egmx; - - - # - # Now match more liberally, simply from `\n` to `\n` - # - $text =~ s{ - ( # save in $1 - ^ # start of line (with /m) - <($block_tags_b) # start tag = $2 - \b # word break - (.*\n)*? # any number of lines, minimally matching - .* # the matching end tag - [ \t]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egmx; - # Special case just for
. It was easier to make a special case than - # to make the other regex more complicated. - $text =~ s{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,$less_than_tab} - <(hr) # start tag = $2 - \b # word break - ([^<>])*? # - /?> # the matching end tag - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egx; - - # Special case for standalone HTML comments: - $text =~ s{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,$less_than_tab} - (?s: - - ) - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egx; - - - return $text; + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Hashify HTML blocks: + # We only want to do this for block-level HTML tags, such as headers, + # lists, and tables. That's because we still want to wrap

s around + # "paragraphs" that are wrapped in non-block-level tags, such as anchors, + # phrase emphasis, and spans. The list of tags we're looking for is + # hard-coded: + my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; + my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; + + # First, look for nested blocks, e.g.: + #

+ #
+ # tags for inner block must be indented. + #
+ #
+ # + # The outermost tags must start at the left margin for this to match, and + # the inner nested divs must be indented. + # We need to do this before the next, more liberal match, because the next + # match will start at the first `
` and stop at the first `
`. + $text =~ s{ + ( # save in $1 + ^ # start of line (with /m) + <($block_tags_a) # start tag = $2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egmx; + + + # + # Now match more liberally, simply from `\n` to `\n` + # + $text =~ s{ + ( # save in $1 + ^ # start of line (with /m) + <($block_tags_b) # start tag = $2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + .* # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egmx; + # Special case just for
. It was easier to make a special case than + # to make the other regex more complicated. + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + <(hr) # start tag = $2 + \b # word break + ([^<>])*? # + /?> # the matching end tag + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + # Special case for standalone HTML comments: + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + (?s: + + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + + return $text; } @@ -430,30 +430,30 @@ sub _RunBlockGamut { # These are all the transformations that form block-level # tags like paragraphs, headers, and list items. # - my $text = shift; + my $text = shift; - $text = _DoHeaders($text); + $text = _DoHeaders($text); - # Do Horizontal Rules: - $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n tags around block-level tags. - $text = _HashHTMLBlocks($text); + # We already ran _HashHTMLBlocks() before, in Markdown(), but that + # was to escape raw HTML in the original Markdown source. This time, + # we're escaping the markup we've just created, so that we don't wrap + #

tags around block-level tags. + $text = _HashHTMLBlocks($text); - $text = _FormParagraphs($text); + $text = _FormParagraphs($text); - return $text; + return $text; } @@ -462,60 +462,60 @@ sub _RunSpanGamut { # These are all the transformations that occur *within* block-level # tags like paragraphs, headers, and list items. # - my $text = shift; + my $text = shift; - $text = _DoCodeSpans($text); + $text = _DoCodeSpans($text); - $text = _EscapeSpecialChars($text); + $text = _EscapeSpecialChars($text); - # Process anchor and image tags. Images must come first, - # because ![foo][f] looks like an anchor. - $text = _DoImages($text); - $text = _DoAnchors($text); + # Process anchor and image tags. Images must come first, + # because ![foo][f] looks like an anchor. + $text = _DoImages($text); + $text = _DoAnchors($text); - # Make links out of things like `` - # Must come after _DoAnchors(), because you can use < and > - # delimiters in inline links like [this](). - $text = _DoAutoLinks($text); + # Make links out of things like `` + # Must come after _DoAnchors(), because you can use < and > + # delimiters in inline links like [this](). + $text = _DoAutoLinks($text); - $text = _EncodeAmpsAndAngles($text); + $text = _EncodeAmpsAndAngles($text); - $text = _DoItalicsAndBoldAndStrike($text); + $text = _DoItalicsAndBoldAndStrike($text); - # Do hard breaks: - $text =~ s/ {2,}\n/ or tags. -# my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!; - - foreach my $cur_token (@$tokens) { - if ($cur_token->[0] eq "tag") { - # Within tags, encode *, _ and ~ so they don't conflict - # with their use in Markdown for italics and strong. - # We're replacing each such character with its - # corresponding MD5 checksum value; this is likely - # overkill, but it should prevent us from colliding - # with the escape values by accident. - $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx; - $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx; - $cur_token->[1] =~ s! ~ !$g_escape_table{'~'}!gx; - $text .= $cur_token->[1]; - } else { - my $t = $cur_token->[1]; - $t = _EncodeBackslashEscapes($t); - $text .= $t; - } + my $text = shift; + my $tokens ||= _TokenizeHTML($text); + + $text = ''; # rebuild $text from the tokens +# my $in_pre = 0; # Keep track of when we're inside

 or  tags.
+#   my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
+
+    foreach my $cur_token (@$tokens) {
+	if ($cur_token->[0] eq "tag") {
+	    # Within tags, encode *, _ and ~ so they don't conflict
+	    # with their use in Markdown for italics and strong.
+	    # We're replacing each such character with its
+	    # corresponding MD5 checksum value; this is likely
+	    # overkill, but it should prevent us from colliding
+	    # with the escape values by accident.
+	    $cur_token->[1] =~	s! \* !$g_escape_table{'*'}!gx;
+	    $cur_token->[1] =~	s! _  !$g_escape_table{'_'}!gx;
+	    $cur_token->[1] =~	s! ~  !$g_escape_table{'~'}!gx;
+	    $text .= $cur_token->[1];
+	} else {
+	    my $t = $cur_token->[1];
+	    $t = _EncodeBackslashEscapes($t);
+	    $text .= $t;
 	}
-	return $text;
+    }
+    return $text;
 }
 
 
@@ -523,100 +523,100 @@ sub _DoAnchors {
 #
 # Turn Markdown link shortcuts into XHTML  tags.
 #
-	my $text = shift;
-
-	#
-	# First, handle reference-style links: [link text] [id]
-	#
-	$text =~ s{
-		(					# wrap whole match in $1
-		  \[
-		    ($g_nested_brackets)	# link text = $2
-		  \]
-
-		  [ ]?				# one optional space
-		  (?:\n[ ]*)?		# one optional newline followed by spaces
-
-		  \[
-		    (.*?)		# id = $3
-		  \]
-		)
-	}{
-		my $result;
-		my $whole_match = $1;
-		my $link_text   = $2;
-		my $link_id     = lc $3;
-
-		if ($link_id eq "") {
-			$link_id = lc $link_text;     # for shortcut links like [this][].
-		}
-
-		if (defined $g_urls{$link_id}) {
-			my $url = $g_urls{$link_id};
-			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
-			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics, bold
-			$url =~ s!  ~ !$g_escape_table{'~'}!gx;		# and strike through.
-			$result = "?	# href = $3
-			[ \t]*
-			(			# $4
-			  (['"])	# quote char = $5
-			  (.*?)		# Title = $6
-			  \5		# matching quote
-			)?			# title is optional
-		  \)
-		)
-	}{
-		my $result;
-		my $whole_match = $1;
-		my $link_text   = $2;
-		my $url		= $3;
-		my $title	= $6;
-
-		$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
-		$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics, bold
-		$url =~ s!  ~ !$g_escape_table{'~'}!gx;		# and strike through.
-		$result = "?	# href = $3
+	    [ \t]*
+	    (		# $4
+	      (['"])	# quote char = $5
+	      (.*?)	# Title = $6
+	      \5	# matching quote
+	    )?		# title is optional
+	  \)
+	)
+    }{
+	my $result;
+	my $whole_match = $1;
+	my $link_text	= $2;
+	my $url	    = $3;
+	my $title   = $6;
+
+	$url =~ s! \* !$g_escape_table{'*'}!gx;	    # We've got to encode these to avoid
+	$url =~ s!  _ !$g_escape_table{'_'}!gx;	    # conflicting with italics, bold
+	$url =~ s!  ~ !$g_escape_table{'~'}!gx;	    # and strike through.
+	$result = " tags.
 #
-	my $text = shift;
-
-	#
-	# First, handle reference-style labeled images: ![alt text][id]
-	#
-	$text =~ s{
-		(				# wrap whole match in $1
-		  !\[
-		    (.*?)		# alt text = $2
-		  \]
-
-		  [ ]?				# one optional space
-		  (?:\n[ ]*)?		# one optional newline followed by spaces
-
-		  \[
-		    (.*?)		# id = $3
-		  \]
-
-		)
-	}{
-		my $result;
-		my $whole_match = $1;
-		my $alt_text    = $2;
-		my $link_id     = lc $3;
-
-		if ($link_id eq "") {
-			$link_id = lc $alt_text;     # for shortcut links like ![this][].
-		}
-
-		$alt_text =~ s/"/"/g;
-		if (defined $g_urls{$link_id}) {
-			my $url = $g_urls{$link_id};
-			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
-			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics, bold
-			$url =~ s!  ~ !$g_escape_table{'~'}!gx;		# and strike through.
-			$result = "\"$alt_text\"";?	# src url = $3
-			[ \t]*
-			(			# $4
-			  (['"])	# quote char = $5
-			  (.*?)		# title = $6
-			  \5		# matching quote
-			  [ \t]*
-			)?			# title is optional
-		  \)
-		)
-	}{
-		my $result;
-		my $whole_match = $1;
-		my $alt_text    = $2;
-		my $url		= $3;
-		my $title	= '';
-		if (defined($6)) {
-			$title	= $6;
-		}
+	$result;
+    }xsge;
+
+    #
+    # Next, handle inline images:  ![alt text](url "optional title")
+    # Don't forget: encode * and _
+
+    $text =~ s{
+	(		# wrap whole match in $1
+	  !\[
+	    (.*?)	# alt text = $2
+	  \]
+	  \(		# literal paren
+	    [ \t]*
+	    ?	# src url = $3
+	    [ \t]*
+	    (		# $4
+	      (['"])	# quote char = $5
+	      (.*?)	# title = $6
+	      \5	# matching quote
+	      [ \t]*
+	    )?		# title is optional
+	  \)
+	)
+    }{
+	my $result;
+	my $whole_match = $1;
+	my $alt_text	= $2;
+	my $url	    = $3;
+	my $title   = '';
+	if (defined($6)) {
+	    $title  = $6;
+	}
 
-		$alt_text =~ s/"/"/g;
-		$title    =~ s/"/"/g;
-		$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
-		$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics, bold
-		$url =~ s!  ~ !$g_escape_table{'~'}!gx;		# and strike through.
-		$result = "\"$alt_text\"";"  .  _RunSpanGamut($1)  .  "\n\n";
-	}egmx;
-
-	$text =~ s{ ^(?:-+[ \t]*\n)?(.+)[ \t]*\n-+[ \t]*\n+ }{
-		"

" . _RunSpanGamut($1) . "

\n\n"; - }egmx; - - $text =~ s{ ^(?:~+[ \t]*\n)?(.+)[ \t]*\n~+[ \t]*\n+ }{ - "

" . _RunSpanGamut($1) . "

\n\n"; + my $text = shift; + + # Setext-style headers: + # Header 1 + # ======== + # + # Header 2 + # -------- + # + # Header 3 + # ~~~~~~~~ + # + $text =~ s{ ^(?:=+[ \t]*\n)?(.+)[ \t]*\n=+[ \t]*\n+ }{ + "

" . _RunSpanGamut($1) . "

\n\n"; + }egmx; + + $text =~ s{ ^(?:-+[ \t]*\n)?(.+)[ \t]*\n-+[ \t]*\n+ }{ + "

" . _RunSpanGamut($1) . "

\n\n"; + }egmx; + + $text =~ s{ ^(?:~+[ \t]*\n)?(.+)[ \t]*\n~+[ \t]*\n+ }{ + "

" . _RunSpanGamut($1) . "

\n\n"; + }egmx; + + + # atx-style headers: + # # Header 1 + # ## Header 2 + # ## Header 2 with closing hashes ## + # ... + # ###### Header 6 + # + $text =~ s{ + ^(\#{1,6}) # $1 = string of #'s + [ \t]* + (.+?) # $2 = Header text + [ \t]* + \#* # optional closing #'s (not counted) + \n+ + }{ + my $h_level = length($1); + "" . _RunSpanGamut($2) . "\n\n"; }egmx; - - # atx-style headers: - # # Header 1 - # ## Header 2 - # ## Header 2 with closing hashes ## - # ... - # ###### Header 6 - # - $text =~ s{ - ^(\#{1,6}) # $1 = string of #'s - [ \t]* - (.+?) # $2 = Header text - [ \t]* - \#* # optional closing #'s (not counted) - \n+ - }{ - my $h_level = length($1); - "" . _RunSpanGamut($2) . "\n\n"; - }egmx; - - return $text; + return $text; } @@ -782,235 +782,235 @@ sub _DoLists { # # Form HTML ordered (numbered) and unordered (bulleted) lists. # - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Re-usable patterns to match list item bullets and number markers: - my $marker_ul = qr/[*+-]/; - my $marker_ol = qr/\d+[.]/; - my $marker_any = qr/(?:$marker_ul|$marker_ol)/; - - # Re-usable pattern to match any entirel ul or ol list: - my $whole_list = qr{ - ( # $1 = whole list - ( # $2 - [ ]{0,$less_than_tab} - (${marker_any}) # $3 = first list item marker - [ \t]+ - ) - (?s:.+?) - ( # $4 - \z - | - \n{2,} - (?=\S) - (?! # Negative lookahead for another list item marker - [ \t]* - ${marker_any}[ \t]+ - ) - ) - ) - }mx; - - # We use a different prefix before nested lists than top-level lists. - # See extended comment in _ProcessListItems(). - # - # Note: There's a bit of duplication here. My original implementation - # created a scalar regex pattern as the conditional result of the test on - # $g_list_level, and then only ran the $text =~ s{...}{...}egmx - # substitution once, using the scalar as the pattern. This worked, - # everywhere except when running under MT on my hosting account at Pair - # Networks. There, this caused all rebuilds to be killed by the reaper (or - # perhaps they crashed, but that seems incredibly unlikely given that the - # same script on the same server ran fine *except* under MT. I've spent - # more time trying to figure out why this is happening than I'd like to - # admit. My only guess, backed up by the fact that this workaround works, - # is that Perl optimizes the substition when it can figure out that the - # pattern will never change, and when this optimization isn't on, we run - # afoul of the reaper. Thus, the slightly redundant code to that uses two - # static s/// patterns rather than one conditional pattern. - - if ($g_list_level) { - $text =~ s{ - ^ - $whole_list - }{ - my $list = $1; - my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; - # Turn double returns into triple returns, so that we can make a - # paragraph for the last item in a list, if necessary: - $list =~ s/\n{2,}/\n\n\n/g; - my $result = _ProcessListItems($list, $marker_any); - $result = "<$list_type>\n" . $result . "\n"; - $result; - }egmx; - } - else { - $text =~ s{ - (?:(?<=\n\n)|\A\n?) - $whole_list - }{ - my $list = $1; - my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; - # Turn double returns into triple returns, so that we can make a - # paragraph for the last item in a list, if necessary: - $list =~ s/\n{2,}/\n\n\n/g; - my $result = _ProcessListItems($list, $marker_any); - $result = "<$list_type>\n" . $result . "\n"; - $result; - }egmx; - } + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Re-usable patterns to match list item bullets and number markers: + my $marker_ul = qr/[*+-]/; + my $marker_ol = qr/\d+[.]/; + my $marker_any = qr/(?:$marker_ul|$marker_ol)/; + + # Re-usable pattern to match any entirel ul or ol list: + my $whole_list = qr{ + ( # $1 = whole list + ( # $2 + [ ]{0,$less_than_tab} + (${marker_any}) # $3 = first list item marker + [ \t]+ + ) + (?s:.+?) + ( # $4 + \z + | + \n{2,} + (?=\S) + (?! # Negative lookahead for another list item marker + [ \t]* + ${marker_any}[ \t]+ + ) + ) + ) + }mx; + + # We use a different prefix before nested lists than top-level lists. + # See extended comment in _ProcessListItems(). + # + # Note: There's a bit of duplication here. My original implementation + # created a scalar regex pattern as the conditional result of the test on + # $g_list_level, and then only ran the $text =~ s{...}{...}egmx + # substitution once, using the scalar as the pattern. This worked, + # everywhere except when running under MT on my hosting account at Pair + # Networks. There, this caused all rebuilds to be killed by the reaper (or + # perhaps they crashed, but that seems incredibly unlikely given that the + # same script on the same server ran fine *except* under MT. I've spent + # more time trying to figure out why this is happening than I'd like to + # admit. My only guess, backed up by the fact that this workaround works, + # is that Perl optimizes the substition when it can figure out that the + # pattern will never change, and when this optimization isn't on, we run + # afoul of the reaper. Thus, the slightly redundant code to that uses two + # static s/// patterns rather than one conditional pattern. + + if ($g_list_level) { + $text =~ s{ + ^ + $whole_list + }{ + my $list = $1; + my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; + # Turn double returns into triple returns, so that we can make a + # paragraph for the last item in a list, if necessary: + $list =~ s/\n{2,}/\n\n\n/g; + my $result = _ProcessListItems($list, $marker_any); + $result = "<$list_type>\n" . $result . "\n"; + $result; + }egmx; + } + else { + $text =~ s{ + (?:(?<=\n\n)|\A\n?) + $whole_list + }{ + my $list = $1; + my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; + # Turn double returns into triple returns, so that we can make a + # paragraph for the last item in a list, if necessary: + $list =~ s/\n{2,}/\n\n\n/g; + my $result = _ProcessListItems($list, $marker_any); + $result = "<$list_type>\n" . $result . "\n"; + $result; + }egmx; + } - return $text; + return $text; } sub _ProcessListItems { # -# Process the contents of a single ordered or unordered list, splitting it -# into individual list items. +# Process the contents of a single ordered or unordered list, splitting it +# into individual list items. # - my $list_str = shift; - my $marker_any = shift; - - - # The $g_list_level global keeps track of when we're inside a list. - # Each time we enter a list, we increment it; when we leave a list, - # we decrement. If it's zero, we're not in a list anymore. - # - # We do this because when we're not inside a list, we want to treat - # something like this: - # - # I recommend upgrading to version - # 8. Oops, now this line is treated - # as a sub-list. - # - # As a single paragraph, despite the fact that the second line starts - # with a digit-period-space sequence. - # - # Whereas when we're inside a list (or sub-list), that line will be - # treated as the start of a sub-list. What a kludge, huh? This is - # an aspect of Markdown's syntax that's hard to parse perfectly - # without resorting to mind-reading. Perhaps the solution is to - # change the syntax rules such that sub-lists must start with a - # starting cardinal number; e.g. "1." or "a.". - - $g_list_level++; - - # trim trailing blank lines: - $list_str =~ s/\n{2,}\z/\n/; - - - $list_str =~ s{ - (\n)? # leading line = $1 - (^[ \t]*) # leading whitespace = $2 - ($marker_any) [ \t]+ # list marker = $3 - ((?s:.+?) # list item text = $4 - (\n{1,2})) - (?= \n* (\z | \2 ($marker_any) [ \t]+)) - }{ - my $item = $4; - my $leading_line = $1; - my $leading_space = $2; - - if ($leading_line or ($item =~ m/\n{2,}/)) { - $item = _RunBlockGamut(_Outdent($item)); - } - else { - # Recursion for sub-lists: - $item = _DoLists(_Outdent($item)); - chomp $item; - $item = _RunSpanGamut($item); - } + my $list_str = shift; + my $marker_any = shift; + + + # The $g_list_level global keeps track of when we're inside a list. + # Each time we enter a list, we increment it; when we leave a list, + # we decrement. If it's zero, we're not in a list anymore. + # + # We do this because when we're not inside a list, we want to treat + # something like this: + # + # I recommend upgrading to version + # 8. Oops, now this line is treated + # as a sub-list. + # + # As a single paragraph, despite the fact that the second line starts + # with a digit-period-space sequence. + # + # Whereas when we're inside a list (or sub-list), that line will be + # treated as the start of a sub-list. What a kludge, huh? This is + # an aspect of Markdown's syntax that's hard to parse perfectly + # without resorting to mind-reading. Perhaps the solution is to + # change the syntax rules such that sub-lists must start with a + # starting cardinal number; e.g. "1." or "a.". + + $g_list_level++; + + # trim trailing blank lines: + $list_str =~ s/\n{2,}\z/\n/; + + + $list_str =~ s{ + (\n)? # leading line = $1 + (^[ \t]*) # leading whitespace = $2 + ($marker_any) [ \t]+ # list marker = $3 + ((?s:.+?) # list item text = $4 + (\n{1,2})) + (?= \n* (\z | \2 ($marker_any) [ \t]+)) + }{ + my $item = $4; + my $leading_line = $1; + my $leading_space = $2; + + if ($leading_line or ($item =~ m/\n{2,}/)) { + $item = _RunBlockGamut(_Outdent($item)); + } + else { + # Recursion for sub-lists: + $item = _DoLists(_Outdent($item)); + chomp $item; + $item = _RunSpanGamut($item); + } - "
  • " . $item . "
  • \n"; - }egmx; + "
  • " . $item . "
  • \n"; + }egmx; - $g_list_level--; - return $list_str; + $g_list_level--; + return $list_str; } sub _DoCodeBlocks { # -# Process Markdown `
    ` blocks.
    +#   Process Markdown `
    ` blocks.
     #
     
    -	my $text = shift;
    -
    -	$text =~ s{
    -			(?:\n\n|\A)
    -			(	            # $1 = the code block -- one or more lines, starting with a space/tab
    -			  (?:
    -			    (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
    -			    .*\n+
    -			  )+
    -			)
    -			((?=^[ ]{0,$g_tab_width}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
    -		}{
    -			my $codeblock = $1;
    -			my $result; # return value
    +    my $text = shift;
    +
    +    $text =~ s{
    +	    (?:\n\n|\A)
    +	    (		    # $1 = the code block -- one or more lines, starting with a space/tab
    +	      (?:
    +		(?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
    +		.*\n+
    +	      )+
    +	    )
    +	    ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
    +	}{
    +	    my $codeblock = $1;
    +	    my $result; # return value
     
    -			$codeblock = _EncodeCode(_Outdent($codeblock));
    -			$codeblock = _Detab($codeblock);
    -			$codeblock =~ s/\A\n+//; # trim leading newlines
    -			$codeblock =~ s/\s+\z//; # trim trailing whitespace
    +	    $codeblock = _EncodeCode(_Outdent($codeblock));
    +	    $codeblock = _Detab($codeblock);
    +	    $codeblock =~ s/\A\n+//; # trim leading newlines
    +	    $codeblock =~ s/\s+\z//; # trim trailing whitespace
     
    -			$result = "\n\n
    " . $codeblock . "\n
    \n\n"; + $result = "\n\n
    " . $codeblock . "\n
    \n\n"; - $result; - }egmx; + $result; + }egmx; - return $text; + return $text; } sub _DoCodeSpans { # -# * Backtick quotes are used for spans. +# * Backtick quotes are used for spans. # -# * You can use multiple backticks as the delimiters if you want to -# include literal backticks in the code span. So, this input: +# * You can use multiple backticks as the delimiters if you want to +# include literal backticks in the code span. So, this input: # -# Just type ``foo `bar` baz`` at the prompt. +# Just type ``foo `bar` baz`` at the prompt. # -# Will translate to: +# Will translate to: # -#

    Just type foo `bar` baz at the prompt.

    +#

    Just type foo `bar` baz at the prompt.

    # -# There's no arbitrary limit to the number of backticks you -# can use as delimters. If you need three consecutive backticks -# in your code, use four for delimiters, etc. +# There's no arbitrary limit to the number of backticks you +# can use as delimters. If you need three consecutive backticks +# in your code, use four for delimiters, etc. # -# * You can use spaces to get literal backticks at the edges: +# * You can use spaces to get literal backticks at the edges: # -# ... type `` `bar` `` ... +# ... type `` `bar` `` ... # -# Turns to: +# Turns to: # -# ... type `bar` ... +# ... type `bar` ... # - my $text = shift; - - $text =~ s@ - (`+) # $1 = Opening run of ` - (.+?) # $2 = The code block - (?$c
    "; - @egsx; - - return $text; + my $text = shift; + + $text =~ s@ + (`+) # $1 = Opening run of ` + (.+?) # $2 = The code block + (?$c
    "; + @egsx; + + return $text; } @@ -1022,157 +1022,157 @@ sub _EncodeCode { # local $_ = shift; - # Encode all ampersands; HTML entities are not - # entities within a Markdown code span. - s/&/&/g; + # Encode all ampersands; HTML entities are not + # entities within a Markdown code span. + s/&/&/g; - # Encode $'s, but only if we're running under Blosxom. - # (Blosxom interpolates Perl variables in article bodies.) - { - no warnings 'once'; - if (defined($blosxom::version)) { - s/\$/$/g; - } + # Encode $'s, but only if we're running under Blosxom. + # (Blosxom interpolates Perl variables in article bodies.) + { + no warnings 'once'; + if (defined($blosxom::version)) { + s/\$/$/g; + } } - # Do the angle bracket song and dance: - s! < !<!gx; - s! > !>!gx; + # Do the angle bracket song and dance: + s! < !<!gx; + s! > !>!gx; - # Now, escape characters that are magic in Markdown: - s! \* !$g_escape_table{'*'}!gx; - s! _ !$g_escape_table{'_'}!gx; - s! ~ !$g_escape_table{'~'}!gx; - s! { !$g_escape_table{'{'}!gx; - s! } !$g_escape_table{'}'}!gx; - s! \[ !$g_escape_table{'['}!gx; - s! \] !$g_escape_table{']'}!gx; - s! \\ !$g_escape_table{'\\'}!gx; + # Now, escape characters that are magic in Markdown: + s! \* !$g_escape_table{'*'}!gx; + s! _ !$g_escape_table{'_'}!gx; + s! ~ !$g_escape_table{'~'}!gx; + s! { !$g_escape_table{'{'}!gx; + s! } !$g_escape_table{'}'}!gx; + s! \[ !$g_escape_table{'['}!gx; + s! \] !$g_escape_table{']'}!gx; + s! \\ !$g_escape_table{'\\'}!gx; - return $_; + return $_; } sub _DoItalicsAndBoldAndStrike { - my $text = shift; + my $text = shift; - # must go first: - $text =~ s{ \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* } - {$1}gsx; - $text =~ s{ (?$1}gsx; + # must go first: + $text =~ s{ \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* } + {$1}gsx; + $text =~ s{ (?$1}gsx; - $text =~ s{ ~~ (?=\S) (.+?[*_]*) (?<=\S) ~~ } - {$1}gsx; + $text =~ s{ ~~ (?=\S) (.+?[*_]*) (?<=\S) ~~ } + {$1}gsx; - $text =~ s{ \* (?=\S) (.+?) (?<=\S) \* } - {$1}gsx; - $text =~ s{ (?$1}gsx; + $text =~ s{ \* (?=\S) (.+?) (?<=\S) \* } + {$1}gsx; + $text =~ s{ (?$1}gsx; - return $text; + return $text; } sub _DoBlockQuotes { - my $text = shift; - - $text =~ s{ - ( # Wrap whole match in $1 - ( - ^[ \t]*>[ \t]? # '>' at the start of a line - .+\n # rest of the first line - (.+\n)* # subsequent consecutive lines - \n* # blanks - )+ - ) + my $text = shift; + + $text =~ s{ + ( # Wrap whole match in $1 + ( + ^[ \t]*>[ \t]? # '>' at the start of a line + .+\n # rest of the first line + (.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + ) + }{ + my $bq = $1; + $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting + $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines + $bq = _RunBlockGamut($bq); # recurse + + $bq =~ s/^/ /g; + # These leading spaces screw with
     content, so we need to fix that:
    +	    $bq =~ s{
    +		    (\s*
    .+?
    ) }{ - my $bq = $1; - $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting - $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines - $bq = _RunBlockGamut($bq); # recurse - - $bq =~ s/^/ /g; - # These leading spaces screw with
     content, so we need to fix that:
    -			$bq =~ s{
    -					(\s*
    .+?
    ) - }{ - my $pre = $1; - $pre =~ s/^ //mg; - $pre; - }egsx; - - "
    \n$bq\n
    \n\n"; - }egmx; - - - return $text; + my $pre = $1; + $pre =~ s/^ //mg; + $pre; + }egsx; + + "
    \n$bq\n
    \n\n"; + }egmx; + + + return $text; } sub _FormParagraphs { # -# Params: -# $text - string to process with html

    tags +# Params: +# $text - string to process with html

    tags # - my $text = shift; - - # Strip leading and trailing lines: - $text =~ s/\A\n+//; - $text =~ s/\n+\z//; - - my @grafs = split(/\n{2,}/, $text); - - # - # Wrap

    tags. - # - foreach (@grafs) { - unless (defined( $g_html_blocks{$_} )) { - $_ = _RunSpanGamut($_); - s/^([ \t]*)/

    /; - $_ .= "

    "; - } + my $text = shift; + + # Strip leading and trailing lines: + $text =~ s/\A\n+//; + $text =~ s/\n+\z//; + + my @grafs = split(/\n{2,}/, $text); + + # + # Wrap

    tags. + # + foreach (@grafs) { + unless (defined( $g_html_blocks{$_} )) { + $_ = _RunSpanGamut($_); + s/^([ \t]*)/

    /; + $_ .= "

    "; } + } - # - # Unhashify HTML blocks - # - foreach (@grafs) { - if (defined( $g_html_blocks{$_} )) { - $_ = $g_html_blocks{$_}; - } + # + # Unhashify HTML blocks + # + foreach (@grafs) { + if (defined( $g_html_blocks{$_} )) { + $_ = $g_html_blocks{$_}; } + } - return join "\n\n", @grafs; + return join "\n\n", @grafs; } sub _EncodeAmpsAndAngles { # Smart processing for ampersands and angle brackets that need to be encoded. - my $text = shift; + my $text = shift; - # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: - # http://bumppo.net/projects/amputator/ - $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g; + # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: + # http://bumppo.net/projects/amputator/ + $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g; - # Encode naked <'s - $text =~ s{<(?![a-z/?\$!])}{<}gi; + # Encode naked <'s + $text =~ s{<(?![a-z/?\$!])}{<}gi; - return $text; + return $text; } sub _EncodeBackslashEscapes { # -# Parameter: String. -# Returns: The string, with after processing the following backslash -# escape sequences. +# Parameter: String. +# Returns: The string, with after processing the following backslash +# escape sequences. # local $_ = shift; - s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first. + s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first. s! \\` !$g_escape_table{'`'}!gx; s! \\\* !$g_escape_table{'*'}!gx; s! \\_ !$g_escape_table{'_'}!gx; @@ -1195,77 +1195,77 @@ sub _EncodeBackslashEscapes { sub _DoAutoLinks { - my $text = shift; - - $text =~ s{<((https?|ftp):[^'">\s]+)>}{
    $1}gi; - - # Email addresses: - $text =~ s{ - < - (?:mailto:)? - ( - [-.\w]+ - \@ - [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ - ) - > - }{ - _EncodeEmailAddress( _UnescapeSpecialChars($1) ); - }egix; + my $text = shift; + + $text =~ s{<((https?|ftp):[^'">\s]+)>}{$1}gi; + + # Email addresses: + $text =~ s{ + < + (?:mailto:)? + ( + [-.\w]+ + \@ + [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ + ) + > + }{ + _EncodeEmailAddress( _UnescapeSpecialChars($1) ); + }egix; - return $text; + return $text; } sub _EncodeEmailAddress { # -# Input: an email address, e.g. "foo@example.com" +# Input: an email address, e.g. "foo@example.com" # -# Output: the email address as a mailto link, with each character -# of the address encoded as either a decimal or hex entity, in -# the hopes of foiling most address harvesting spam bots. E.g.: +# Output: the email address as a mailto link, with each character +# of the address encoded as either a decimal or hex entity, in +# the hopes of foiling most address harvesting spam bots. E.g.: # -# foo -# @example.com +# foo +# @example.com # -# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk -# mailing list: +# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk +# mailing list: # - my $addr = shift; - - srand; - my @encode = ( - sub { '&#' . ord(shift) . ';' }, - sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' }, - sub { shift }, - ); - - $addr = "mailto:" . $addr; - - $addr =~ s{(.)}{ - my $char = $1; - if ( $char eq '@' ) { - # this *must* be encoded. I insist. - $char = $encode[int rand 1]->($char); - } elsif ( $char ne ':' ) { - # leave ':' alone (to spot mailto: later) - my $r = rand; - # roughly 10% raw, 45% hex, 45% dec - $char = ( - $r > .9 ? $encode[2]->($char) : - $r < .45 ? $encode[1]->($char) : - $encode[0]->($char) - ); - } - $char; - }gex; + my $addr = shift; + + srand; + my @encode = ( + sub { '&#' . ord(shift) . ';' }, + sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' }, + sub { shift }, + ); + + $addr = "mailto:" . $addr; + + $addr =~ s{(.)}{ + my $char = $1; + if ( $char eq '@' ) { + # this *must* be encoded. I insist. + $char = $encode[int rand 1]->($char); + } elsif ( $char ne ':' ) { + # leave ':' alone (to spot mailto: later) + my $r = rand; + # roughly 10% raw, 45% hex, 45% dec + $char = ( + $r > .9 ? $encode[2]->($char) : + $r < .45 ? $encode[1]->($char) : + $encode[0]->($char) + ); + } + $char; + }gex; - $addr = qq{$addr}; - $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part + $addr = qq{$addr}; + $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part - return $addr; + return $addr; } @@ -1273,28 +1273,28 @@ sub _UnescapeSpecialChars { # # Swap back in all the special characters we've hidden. # - my $text = shift; + my $text = shift; - while( my($char, $hash) = each(%g_escape_table) ) { - $text =~ s/$hash/$char/g; - } + while( my($char, $hash) = each(%g_escape_table) ) { + $text =~ s/$hash/$char/g; + } return $text; } sub _TokenizeHTML { # -# Parameter: String containing HTML markup. -# Returns: Reference to an array of the tokens comprising the input -# string. Each token is either a tag (possibly with nested, -# tags contained therein, such as , or a -# run of text between tags. Each element of the array is a -# two-element array; the first is either 'tag' or 'text'; -# the second is the actual value. +# Parameter: String containing HTML markup. +# Returns: Reference to an array of the tokens comprising the input +# string. Each token is either a tag (possibly with nested, +# tags contained therein, such as , or a +# run of text between tags. Each element of the array is a +# two-element array; the first is either 'tag' or 'text'; +# the second is the actual value. # # # Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin. -# +# # my $str = shift; @@ -1303,20 +1303,20 @@ sub _TokenizeHTML { my @tokens; my $depth = 6; - my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth); + my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth); my $match = qr/(?s: ) | # comment - (?s: <\? .*? \?> ) | # processing instruction - $nested_tags/ix; # nested tags + (?s: <\? .*? \?> ) | # processing instruction + $nested_tags/ix; # nested tags while ($str =~ m/($match)/g) { - my $whole_tag = $1; - my $sec_start = pos $str; - my $tag_start = $sec_start - length $whole_tag; - if ($pos < $tag_start) { - push @tokens, ['text', substr($str, $pos, $tag_start - $pos)]; - } - push @tokens, ['tag', $whole_tag]; - $pos = pos $str; + my $whole_tag = $1; + my $sec_start = pos $str; + my $tag_start = $sec_start - length $whole_tag; + if ($pos < $tag_start) { + push @tokens, ['text', substr($str, $pos, $tag_start - $pos)]; + } + push @tokens, ['tag', $whole_tag]; + $pos = pos $str; } push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len; \@tokens; @@ -1327,10 +1327,10 @@ sub _Outdent { # # Remove one level of line-leading tabs or spaces # - my $text = shift; + my $text = shift; - $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm; - return $text; + $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm; + return $text; } @@ -1339,10 +1339,10 @@ sub _Detab { # Cribbed from a post by Bart Lateur: # # - my $text = shift; + my $text = shift; - $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge; - return $text; + $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge; + return $text; } @@ -1385,7 +1385,7 @@ For more information about Markdown's syntax, see: Use "--" to end switch parsing. For example, to open a file named "-z", use: - Markdown.pl -- -z + Markdown.pl -- -z =over 4