@ -14,11 +14,18 @@ use strict;
use warnings ;
use warnings ;
close ( DATA ) if fileno ( DATA ) ;
close ( DATA ) if fileno ( DATA ) ;
require Exporter ;
use Digest::MD5 qw( md5_hex ) ;
use Digest::MD5 qw( md5_hex ) ;
use vars qw( $VERSION ) ;
use File::Basename qw( basename ) ;
use vars qw( $VERSION @ISA @EXPORT_OK ) ;
@ ISA = qw( Exporter ) ;
@ EXPORT_OK = qw( Markdown ) ;
$ INC { __PACKAGE__ . '.pm' } = $ INC { basename ( __FILE__ ) } unless exists $ INC { __PACKAGE__ . '.pm' } ;
$ VERSION = '1.0.3' ;
$ VERSION = '1.0.3' ;
# Sun 06 Sep 2015
# Sun 06 Sep 2015
## Disabled; causes problems under Perl 5.6.1:
## Disabled; causes problems under Perl 5.6.1:
# use utf8;
# use utf8;
# binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
# binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
@ -28,8 +35,6 @@ $VERSION = '1.0.3';
# Global default settings:
# Global default settings:
#
#
my $ g_empty_element_suffix = " />" ; # Change to ">" for HTML output
my $ g_empty_element_suffix = " />" ; # Change to ">" for HTML output
my $ g_url_prefix = "" ; # Prefixed to non-absolute URLs
my $ g_img_prefix = "" ; # Prefixed to non-absolute image URLs
my $ g_tab_width = 4 ;
my $ g_tab_width = 4 ;
@ -62,6 +67,7 @@ foreach my $char (split //, "\\\`*_{}[]()>#+-.!~") {
my % g_urls ;
my % g_urls ;
my % g_titles ;
my % g_titles ;
my % g_html_blocks ;
my % g_html_blocks ;
my % opt ;
# Used to track when we're inside an ordered or unordered list
# Used to track when we're inside an ordered or unordered list
# (see _ProcessListItems() for details):
# (see _ProcessListItems() for details):
@ -191,6 +197,7 @@ elsif (!caller) {
use warnings ;
use warnings ;
#### Check for command-line switches: #################
#### Check for command-line switches: #################
my % options = ( ) ;
my % cli_opts ;
my % cli_opts ;
use Getopt::Long ;
use Getopt::Long ;
Getopt::Long:: Configure ( 'pass_through' ) ;
Getopt::Long:: Configure ( 'pass_through' ) ;
@ -216,13 +223,13 @@ elsif (!caller) {
exit 0 ;
exit 0 ;
}
}
if ( $ cli_opts { 'html4tags' } ) { # Use HTML tag style instead of XHTML
if ( $ cli_opts { 'html4tags' } ) { # Use HTML tag style instead of XHTML
$ g_empty_element_suffix = ">" ;
$ options { empty_element_suffix } = ">" ;
}
}
if ( $ cli_opts { 'htmlroot' } ) { # Use URL prefix
if ( $ cli_opts { 'htmlroot' } ) { # Use URL prefix
$ g_url_prefix = $ cli_opts { 'htmlroot' } ;
$ options { url_prefix } = $ cli_opts { 'htmlroot' } ;
}
}
if ( $ cli_opts { 'imageroot' } ) { # Use image URL prefix
if ( $ cli_opts { 'imageroot' } ) { # Use image URL prefix
$ g_img_prefix = $ cli_opts { 'imageroot' } ;
$ options { img_prefix } = $ cli_opts { 'imageroot' } ;
}
}
@ -232,7 +239,7 @@ elsif (!caller) {
local $/ ; # Slurp the whole file
local $/ ; # Slurp the whole file
$ text = < > ;
$ text = < > ;
}
}
print Markdown ( $ text ) ;
print Markdown ( $ text , \ % options ) ;
}
}
}
}
@ -246,15 +253,35 @@ sub Markdown {
# and <img> tags get encoded.
# and <img> tags get encoded.
#
#
my $ text = shift ;
my $ text = shift ;
defined $ text or $ text = '' ;
# Any remaining arguments after the first are options; either a single
# hashref or a list of name, value paurs.
% opt = (
# set initial defaults
empty_element_suffix = > $ g_empty_element_suffix ,
tab_width = > $ g_tab_width ,
url_prefix = > "" , # Prefixed to non-absolute URLs
img_prefix = > "" , # Prefixed to non-absolute image URLs
) ;
my % args = ( ) ;
if ( ref ( $ _ [ 0 ] ) eq "HASH" ) {
% args = % { $ _ [ 0 ] } ;
} else {
% args = @ _ ;
}
while ( my ( $ k , $ v ) = each % args ) {
$ opt { $ k } = $ v ;
}
# Clear the global hashes. If we don't clear these, you get conflicts
# Clear the globals. If we don't clear these, you get conflicts
# from other articles when generating a page which contains more than
# from other articles when generating a page which contains more than
# one article (e.g. an index page that shows the N most recent
# one article (e.g. an index page that shows the N most recent
# articles):
# articles):
% g_urls = ( ) ;
% g_urls = ( ) ;
% g_titles = ( ) ;
% g_titles = ( ) ;
% g_html_blocks = ( ) ;
% g_html_blocks = ( ) ;
$ g_list_level = 0 ;
# Standardize line endings:
# Standardize line endings:
$ text =~ s{\r\n} {\n}g ; # DOS to Unix
$ text =~ s{\r\n} {\n}g ; # DOS to Unix
@ -292,7 +319,7 @@ sub _StripLinkDefinitions {
# hash references.
# hash references.
#
#
my $ text = shift ;
my $ text = shift ;
my $ less_than_tab = $ g_tab_width - 1 ;
my $ less_than_tab = $ opt { tab_width } - 1 ;
# Link defs are in the form: ^[id]: url "optional title"
# Link defs are in the form: ^[id]: url "optional title"
while ( $ text =~ s {
while ( $ text =~ s {
@ -327,7 +354,7 @@ sub _StripLinkDefinitions {
sub _HashHTMLBlocks {
sub _HashHTMLBlocks {
my $ text = shift ;
my $ text = shift ;
my $ less_than_tab = $ g_tab_width - 1 ;
my $ less_than_tab = $ opt { tab_width } - 1 ;
# Hashify HTML blocks:
# Hashify HTML blocks:
# We only want to do this for block-level HTML tags, such as headers,
# We only want to do this for block-level HTML tags, such as headers,
@ -445,9 +472,9 @@ sub _RunBlockGamut {
$ text = _DoHeaders ( $ text ) ;
$ text = _DoHeaders ( $ text ) ;
# Do Horizontal Rules:
# Do Horizontal Rules:
$ text =~ s{^[ ]{0,2} ([ ]?\*[ ]?) { 3 , } [ \ t ] * $ } { \ n < hr $ g_empty_element_suffix \ n } gmx ;
$ text =~ s{^[ ]{0,2} ([ ]?\*[ ]?) { 3 , } [ \ t ] * $ } { \ n < hr $ opt { empty_element_suffix } \ n } gmx ;
$ text =~ s{^[ ]{0,2} ([ ]? -[ ]?) { 3 , } [ \ t ] * $ } { \ n < hr $ g_empty_element_suffix \ n } gmx ;
$ text =~ s{^[ ]{0,2} ([ ]? -[ ]?) { 3 , } [ \ t ] * $ } { \ n < hr $ opt { empty_element_suffix } \ n } gmx ;
$ text =~ s{^[ ]{0,2} ([ ]? _[ ]?) { 3 , } [ \ t ] * $ } { \ n < hr $ g_empty_element_suffix \ n } gmx ;
$ text =~ s{^[ ]{0,2} ([ ]? _[ ]?) { 3 , } [ \ t ] * $ } { \ n < hr $ opt { empty_element_suffix } \ n } gmx ;
$ text = _DoLists ( $ text ) ;
$ text = _DoLists ( $ text ) ;
@ -493,7 +520,7 @@ sub _RunSpanGamut {
$ text = _DoItalicsAndBoldAndStrike ( $ text ) ;
$ text = _DoItalicsAndBoldAndStrike ( $ text ) ;
# Do hard breaks:
# Do hard breaks:
$ text =~ s/ {2,}\n/ <br$g_empty_element_suffix \n/g ;
$ text =~ s/ {2,}\n/ <br$opt{empty_element_suffix} \n/g ;
return $ text ;
return $ text ;
}
}
@ -678,7 +705,7 @@ sub _DoImages {
$ title =~ s! ~ !$g_escape_table{'~'}!gx ;
$ title =~ s! ~ !$g_escape_table{'~'}!gx ;
$ result . = " title=\"$title\"" ;
$ result . = " title=\"$title\"" ;
}
}
$ result . = $ g_empty_element_suffix ;
$ result . = $ opt { empty_element_suffix } ;
}
}
else {
else {
# If there's no such link ID, leave intact:
# If there's no such link ID, leave intact:
@ -732,7 +759,7 @@ sub _DoImages {
$ title =~ s! ~ !$g_escape_table{'~'}!gx ;
$ title =~ s! ~ !$g_escape_table{'~'}!gx ;
$ result . = " title=\"$title\"" ;
$ result . = " title=\"$title\"" ;
}
}
$ result . = $ g_empty_element_suffix ;
$ result . = $ opt { empty_element_suffix } ;
$ result ;
$ result ;
} xsge ;
} xsge ;
@ -795,7 +822,7 @@ sub _DoLists {
# Form HTML ordered (numbered) and unordered (bulleted) lists.
# Form HTML ordered (numbered) and unordered (bulleted) lists.
#
#
my $ text = shift ;
my $ text = shift ;
my $ less_than_tab = $ g_tab_width - 1 ;
my $ less_than_tab = $ opt { tab_width } - 1 ;
# Re-usable patterns to match list item bullets and number markers:
# Re-usable patterns to match list item bullets and number markers:
my $ marker_ul = qr/[*+-]/ ;
my $ marker_ul = qr/[*+-]/ ;
@ -957,11 +984,11 @@ sub _DoCodeBlocks {
( ? : \ n \ n | \ A )
( ? : \ n \ n | \ A )
( # $1 = the code block -- one or more lines, starting with a space/tab
( # $1 = the code block -- one or more lines, starting with a space/tab
( ? :
( ? :
( ? : [ ] { $ g_tab_width } | \ t ) # Lines must start with a tab or a tab-width of spaces
( ? : [ ] { $ opt { tab_width } } | \ t ) # Lines must start with a tab or a tab-width of spaces
. * \ n +
. * \ n +
) +
) +
)
)
( ( ? = ^ [ ] { 0 , $ g_tab_width } \ S ) | \ Z ) # Lookahead for non-space at line-start, or end of doc
( ( ? = ^ [ ] { 0 , $ opt { tab_width } } \ S ) | \ Z ) # Lookahead for non-space at line-start, or end of doc
} {
} {
my $ codeblock = $ 1 ;
my $ codeblock = $ 1 ;
my $ result ; # return value
my $ result ; # return value
@ -1364,7 +1391,7 @@ sub _Outdent {
#
#
my $ text = shift ;
my $ text = shift ;
$ text =~ s/^(\t|[ ]{1,$g_tab_width })//gm ;
$ text =~ s/^(\t|[ ]{1,$opt{tab_width} })//gm ;
return $ text ;
return $ text ;
}
}
@ -1376,7 +1403,7 @@ sub _Detab {
#
#
my $ text = shift ;
my $ text = shift ;
$ text =~ s{(.*?)\t} {$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))} ge ;
$ text =~ s{(.*?)\t} {$1.(' ' x ($opt{tab_width} - length ( $ 1 ) % $ opt { tab_width } ) ) } ge ;
return $ text ;
return $ text ;
}
}
@ -1387,11 +1414,11 @@ sub _PrefixURL {
#
#
my $ url = shift ;
my $ url = shift ;
return $ url unless $ g_url_prefix ne '' || $ g_img_prefix ne '' ;
return $ url unless $ opt { url_prefix } ne '' || $ opt { img_prefix } ne '' ;
return $ url if $ url =~ m , ^ // , || $ url =~ /^[A-Za-z][A-Za-z0-9+.-]*:/ ;
return $ url if $ url =~ m , ^ // , || $ url =~ /^[A-Za-z][A-Za-z0-9+.-]*:/ ;
my $ ans = $ g_url_prefix ;
my $ ans = $ opt { url_prefix } ;
$ ans = $ g_img_prefix
$ ans = $ opt { img_prefix }
if $ g_img_prefix ne '' && $ url =~ /\.(?:png|gif|jpe?g|svg?z)$/i ;
if $ opt { img_prefix } ne '' && $ url =~ /\.(?:png|gif|jpe?g|svg?z)$/i ;
return $ url unless $ ans ne '' ;
return $ url unless $ ans ne '' ;
$ ans . = '/' if substr ( $ ans , - 1 , 1 ) ne '/' ;
$ ans . = '/' if substr ( $ ans , - 1 , 1 ) ne '/' ;
$ ans . = substr ( $ url , 0 , 1 ) eq '/' ? substr ( $ url , 1 ) : $ url ;
$ ans . = substr ( $ url , 0 , 1 ) eq '/' ? substr ( $ url , 1 ) : $ url ;