s around - # "paragraphs" that are wrapped in non-block-level tags, such as anchors, - # phrase emphasis, and spans. The list of tags we're looking for is - # hard-coded: - my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; - my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; - - # First, look for nested blocks, e.g.: - #
s around + # "paragraphs" that are wrapped in non-block-level tags, such as anchors, + # phrase emphasis, and spans. The list of tags we're looking for is + # hard-coded: + my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; + my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; + + # First, look for nested blocks, e.g.: + #
tags around block-level tags.
+ $text = _HashHTMLBlocks($text);
- $text = _FormParagraphs($text);
+ $text = _FormParagraphs($text);
- return $text;
+ return $text;
}
@@ -462,60 +462,60 @@ sub _RunSpanGamut {
# These are all the transformations that occur *within* block-level
# tags like paragraphs, headers, and list items.
#
- my $text = shift;
+ my $text = shift;
- $text = _DoCodeSpans($text);
+ $text = _DoCodeSpans($text);
- $text = _EscapeSpecialChars($text);
+ $text = _EscapeSpecialChars($text);
- # Process anchor and image tags. Images must come first,
- # because ![foo][f] looks like an anchor.
- $text = _DoImages($text);
- $text = _DoAnchors($text);
+ # Process anchor and image tags. Images must come first,
+ # because ![foo][f] looks like an anchor.
+ $text = _DoImages($text);
+ $text = _DoAnchors($text);
- # Make links out of things like ` Just type Just type tags
+# Params:
+# $text - string to process with html tags
#
- my $text = shift;
-
- # Strip leading and trailing lines:
- $text =~ s/\A\n+//;
- $text =~ s/\n+\z//;
-
- my @grafs = split(/\n{2,}/, $text);
-
- #
- # Wrap tags.
- #
- foreach (@grafs) {
- unless (defined( $g_html_blocks{$_} )) {
- $_ = _RunSpanGamut($_);
- s/^([ \t]*)/ /;
- $_ .= " tags.
+ #
+ foreach (@grafs) {
+ unless (defined( $g_html_blocks{$_} )) {
+ $_ = _RunSpanGamut($_);
+ s/^([ \t]*)/ /;
+ $_ .= "
or tags.
-# my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
-
- foreach my $cur_token (@$tokens) {
- if ($cur_token->[0] eq "tag") {
- # Within tags, encode *, _ and ~ so they don't conflict
- # with their use in Markdown for italics and strong.
- # We're replacing each such character with its
- # corresponding MD5 checksum value; this is likely
- # overkill, but it should prevent us from colliding
- # with the escape values by accident.
- $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx;
- $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx;
- $cur_token->[1] =~ s! ~ !$g_escape_table{'~'}!gx;
- $text .= $cur_token->[1];
- } else {
- my $t = $cur_token->[1];
- $t = _EncodeBackslashEscapes($t);
- $text .= $t;
- }
+ my $text = shift;
+ my $tokens ||= _TokenizeHTML($text);
+
+ $text = ''; # rebuild $text from the tokens
+# my $in_pre = 0; # Keep track of when we're inside
or
tags.
+# my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
+
+ foreach my $cur_token (@$tokens) {
+ if ($cur_token->[0] eq "tag") {
+ # Within tags, encode *, _ and ~ so they don't conflict
+ # with their use in Markdown for italics and strong.
+ # We're replacing each such character with its
+ # corresponding MD5 checksum value; this is likely
+ # overkill, but it should prevent us from colliding
+ # with the escape values by accident.
+ $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx;
+ $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx;
+ $cur_token->[1] =~ s! ~ !$g_escape_table{'~'}!gx;
+ $text .= $cur_token->[1];
+ } else {
+ my $t = $cur_token->[1];
+ $t = _EncodeBackslashEscapes($t);
+ $text .= $t;
}
- return $text;
+ }
+ return $text;
}
@@ -523,100 +523,100 @@ sub _DoAnchors {
#
# Turn Markdown link shortcuts into XHTML tags.
#
- my $text = shift;
-
- #
- # First, handle reference-style links: [link text] [id]
- #
- $text =~ s{
- ( # wrap whole match in $1
- \[
- ($g_nested_brackets) # link text = $2
- \]
-
- [ ]? # one optional space
- (?:\n[ ]*)? # one optional newline followed by spaces
-
- \[
- (.*?) # id = $3
- \]
- )
- }{
- my $result;
- my $whole_match = $1;
- my $link_text = $2;
- my $link_id = lc $3;
-
- if ($link_id eq "") {
- $link_id = lc $link_text; # for shortcut links like [this][].
- }
-
- if (defined $g_urls{$link_id}) {
- my $url = $g_urls{$link_id};
- $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
- $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics, bold
- $url =~ s! ~ !$g_escape_table{'~'}!gx; # and strike through.
- $result = "? # href = $3
- [ \t]*
- ( # $4
- (['"]) # quote char = $5
- (.*?) # Title = $6
- \5 # matching quote
- )? # title is optional
- \)
- )
- }{
- my $result;
- my $whole_match = $1;
- my $link_text = $2;
- my $url = $3;
- my $title = $6;
-
- $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
- $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics, bold
- $url =~ s! ~ !$g_escape_table{'~'}!gx; # and strike through.
- $result = "? # href = $3
+ [ \t]*
+ ( # $4
+ (['"]) # quote char = $5
+ (.*?) # Title = $6
+ \5 # matching quote
+ )? # title is optional
+ \)
+ )
+ }{
+ my $result;
+ my $whole_match = $1;
+ my $link_text = $2;
+ my $url = $3;
+ my $title = $6;
+
+ $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
+ $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics, bold
+ $url =~ s! ~ !$g_escape_table{'~'}!gx; # and strike through.
+ $result = " tags.
#
- my $text = shift;
-
- #
- # First, handle reference-style labeled images: ![alt text][id]
- #
- $text =~ s{
- ( # wrap whole match in $1
- !\[
- (.*?) # alt text = $2
- \]
-
- [ ]? # one optional space
- (?:\n[ ]*)? # one optional newline followed by spaces
-
- \[
- (.*?) # id = $3
- \]
-
- )
- }{
- my $result;
- my $whole_match = $1;
- my $alt_text = $2;
- my $link_id = lc $3;
-
- if ($link_id eq "") {
- $link_id = lc $alt_text; # for shortcut links like ![this][].
- }
-
- $alt_text =~ s/"/"/g;
- if (defined $g_urls{$link_id}) {
- my $url = $g_urls{$link_id};
- $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
- $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics, bold
- $url =~ s! ~ !$g_escape_table{'~'}!gx; # and strike through.
- $result = "? # src url = $3
- [ \t]*
- ( # $4
- (['"]) # quote char = $5
- (.*?) # title = $6
- \5 # matching quote
- [ \t]*
- )? # title is optional
- \)
- )
- }{
- my $result;
- my $whole_match = $1;
- my $alt_text = $2;
- my $url = $3;
- my $title = '';
- if (defined($6)) {
- $title = $6;
- }
+ $result;
+ }xsge;
+
+ #
+ # Next, handle inline images: ![alt text](url "optional title")
+ # Don't forget: encode * and _
+
+ $text =~ s{
+ ( # wrap whole match in $1
+ !\[
+ (.*?) # alt text = $2
+ \]
+ \( # literal paren
+ [ \t]*
+ (\S+?)>? # src url = $3
+ [ \t]*
+ ( # $4
+ (['"]) # quote char = $5
+ (.*?) # title = $6
+ \5 # matching quote
+ [ \t]*
+ )? # title is optional
+ \)
+ )
+ }{
+ my $result;
+ my $whole_match = $1;
+ my $alt_text = $2;
+ my $url = $3;
+ my $title = '';
+ if (defined($6)) {
+ $title = $6;
+ }
- $alt_text =~ s/"/"/g;
- $title =~ s/"/"/g;
- $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
- $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics, bold
- $url =~ s! ~ !$g_escape_table{'~'}!gx; # and strike through.
- $result = "" . _RunSpanGamut($1) . "\n\n";
- }egmx;
-
- $text =~ s{ ^(?:-+[ \t]*\n)?(.+)[ \t]*\n-+[ \t]*\n+ }{
- "
" . _RunSpanGamut($1) . "
\n\n";
- }egmx;
-
- $text =~ s{ ^(?:~+[ \t]*\n)?(.+)[ \t]*\n~+[ \t]*\n+ }{
- "" . _RunSpanGamut($1) . "
\n\n";
+ my $text = shift;
+
+ # Setext-style headers:
+ # Header 1
+ # ========
+ #
+ # Header 2
+ # --------
+ #
+ # Header 3
+ # ~~~~~~~~
+ #
+ $text =~ s{ ^(?:=+[ \t]*\n)?(.+)[ \t]*\n=+[ \t]*\n+ }{
+ "" . _RunSpanGamut($1) . "
\n\n";
+ }egmx;
+
+ $text =~ s{ ^(?:-+[ \t]*\n)?(.+)[ \t]*\n-+[ \t]*\n+ }{
+ "" . _RunSpanGamut($1) . "
\n\n";
+ }egmx;
+
+ $text =~ s{ ^(?:~+[ \t]*\n)?(.+)[ \t]*\n~+[ \t]*\n+ }{
+ "" . _RunSpanGamut($1) . "
\n\n";
+ }egmx;
+
+
+ # atx-style headers:
+ # # Header 1
+ # ## Header 2
+ # ## Header 2 with closing hashes ##
+ # ...
+ # ###### Header 6
+ #
+ $text =~ s{
+ ^(\#{1,6}) # $1 = string of #'s
+ [ \t]*
+ (.+?) # $2 = Header text
+ [ \t]*
+ \#* # optional closing #'s (not counted)
+ \n+
+ }{
+ my $h_level = length($1);
+ "` blocks.
+# Process Markdown `
";
+ @egsx;
+
+ return $text;
}
@@ -1022,157 +1022,157 @@ sub _EncodeCode {
#
local $_ = shift;
- # Encode all ampersands; HTML entities are not
- # entities within a Markdown code span.
- s/&/&/g;
+ # Encode all ampersands; HTML entities are not
+ # entities within a Markdown code span.
+ s/&/&/g;
- # Encode $'s, but only if we're running under Blosxom.
- # (Blosxom interpolates Perl variables in article bodies.)
- {
- no warnings 'once';
- if (defined($blosxom::version)) {
- s/\$/$/g;
- }
+ # Encode $'s, but only if we're running under Blosxom.
+ # (Blosxom interpolates Perl variables in article bodies.)
+ {
+ no warnings 'once';
+ if (defined($blosxom::version)) {
+ s/\$/$/g;
+ }
}
- # Do the angle bracket song and dance:
- s! < !<!gx;
- s! > !>!gx;
+ # Do the angle bracket song and dance:
+ s! < !<!gx;
+ s! > !>!gx;
- # Now, escape characters that are magic in Markdown:
- s! \* !$g_escape_table{'*'}!gx;
- s! _ !$g_escape_table{'_'}!gx;
- s! ~ !$g_escape_table{'~'}!gx;
- s! { !$g_escape_table{'{'}!gx;
- s! } !$g_escape_table{'}'}!gx;
- s! \[ !$g_escape_table{'['}!gx;
- s! \] !$g_escape_table{']'}!gx;
- s! \\ !$g_escape_table{'\\'}!gx;
+ # Now, escape characters that are magic in Markdown:
+ s! \* !$g_escape_table{'*'}!gx;
+ s! _ !$g_escape_table{'_'}!gx;
+ s! ~ !$g_escape_table{'~'}!gx;
+ s! { !$g_escape_table{'{'}!gx;
+ s! } !$g_escape_table{'}'}!gx;
+ s! \[ !$g_escape_table{'['}!gx;
+ s! \] !$g_escape_table{']'}!gx;
+ s! \\ !$g_escape_table{'\\'}!gx;
- return $_;
+ return $_;
}
sub _DoItalicsAndBoldAndStrike {
- my $text = shift;
+ my $text = shift;
- # must go first:
- $text =~ s{ \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }
- {$1}gsx;
- $text =~ s{ (?$1}gsx;
+ # must go first:
+ $text =~ s{ \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }
+ {$1}gsx;
+ $text =~ s{ (?$1}gsx;
- $text =~ s{ ~~ (?=\S) (.+?[*_]*) (?<=\S) ~~ }
- {` blocks.
#
- my $text = shift;
-
- $text =~ s{
- (?:\n\n|\A)
- ( # $1 = the code block -- one or more lines, starting with a space/tab
- (?:
- (?:[ ]{$g_tab_width} | \t) # Lines must start with a tab or a tab-width of spaces
- .*\n+
- )+
- )
- ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
- }{
- my $codeblock = $1;
- my $result; # return value
+ my $text = shift;
+
+ $text =~ s{
+ (?:\n\n|\A)
+ ( # $1 = the code block -- one or more lines, starting with a space/tab
+ (?:
+ (?:[ ]{$g_tab_width} | \t) # Lines must start with a tab or a tab-width of spaces
+ .*\n+
+ )+
+ )
+ ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
+ }{
+ my $codeblock = $1;
+ my $result; # return value
- $codeblock = _EncodeCode(_Outdent($codeblock));
- $codeblock = _Detab($codeblock);
- $codeblock =~ s/\A\n+//; # trim leading newlines
- $codeblock =~ s/\s+\z//; # trim trailing whitespace
+ $codeblock = _EncodeCode(_Outdent($codeblock));
+ $codeblock = _Detab($codeblock);
+ $codeblock =~ s/\A\n+//; # trim leading newlines
+ $codeblock =~ s/\s+\z//; # trim trailing whitespace
- $result = "\n\n
";
- @egsx;
-
- return $text;
+ my $text = shift;
+
+ $text =~ s@
+ (`+) # $1 = Opening run of `
+ (.+?) # $2 = The code block
+ (?$c
\n\n";
+ $result = "\n\n" . $codeblock . "\n
\n\n";
- $result;
- }egmx;
+ $result;
+ }egmx;
- return $text;
+ return $text;
}
sub _DoCodeSpans {
#
-# * Backtick quotes are used for " . $codeblock . "\n
spans.
+# * Backtick quotes are used for
spans.
#
-# * You can use multiple backticks as the delimiters if you want to
-# include literal backticks in the code span. So, this input:
+# * You can use multiple backticks as the delimiters if you want to
+# include literal backticks in the code span. So, this input:
#
-# Just type ``foo `bar` baz`` at the prompt.
+# Just type ``foo `bar` baz`` at the prompt.
#
-# Will translate to:
+# Will translate to:
#
-#
foo `bar` baz
at the prompt.foo `bar` baz
at the prompt.`bar`
...
+# ... type `bar`
...
#
- my $text = shift;
-
- $text =~ s@
- (`+) # $1 = Opening run of `
- (.+?) # $2 = The code block
- (?$c$1}gsx;
+ $text =~ s{ ~~ (?=\S) (.+?[*_]*) (?<=\S) ~~ }
+ {$1}gsx;
- $text =~ s{ \* (?=\S) (.+?) (?<=\S) \* }
- {$1}gsx;
- $text =~ s{ (?$1}gsx;
+ $text =~ s{ \* (?=\S) (.+?) (?<=\S) \* }
+ {$1}gsx;
+ $text =~ s{ (?$1}gsx;
- return $text;
+ return $text;
}
sub _DoBlockQuotes {
- my $text = shift;
-
- $text =~ s{
- ( # Wrap whole match in $1
- (
- ^[ \t]*>[ \t]? # '>' at the start of a line
- .+\n # rest of the first line
- (.+\n)* # subsequent consecutive lines
- \n* # blanks
- )+
- )
+ my $text = shift;
+
+ $text =~ s{
+ ( # Wrap whole match in $1
+ (
+ ^[ \t]*>[ \t]? # '>' at the start of a line
+ .+\n # rest of the first line
+ (.+\n)* # subsequent consecutive lines
+ \n* # blanks
+ )+
+ )
+ }{
+ my $bq = $1;
+ $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting
+ $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines
+ $bq = _RunBlockGamut($bq); # recurse
+
+ $bq =~ s/^/ /g;
+ # These leading spaces screw with content, so we need to fix that:
+ $bq =~ s{
+ (\s*
.+?
)
}{
- my $bq = $1;
- $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting
- $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines
- $bq = _RunBlockGamut($bq); # recurse
-
- $bq =~ s/^/ /g;
- # These leading spaces screw with content, so we need to fix that:
- $bq =~ s{
- (\s*
.+?
)
- }{
- my $pre = $1;
- $pre =~ s/^ //mg;
- $pre;
- }egsx;
-
- "\n$bq\n
\n\n";
- }egmx;
-
-
- return $text;
+ my $pre = $1;
+ $pre =~ s/^ //mg;
+ $pre;
+ }egsx;
+
+ "\n$bq\n
\n\n";
+ }egmx;
+
+
+ return $text;
}
sub _FormParagraphs {
#
-# Params:
-# $text - string to process with html