Browse Source

Markdown.pl: introduce --tabwidth=<num> option

The --tabwidth=<num> option only affects the width to which
tabs are expanded.  It does NOT affect the number of spaces
required to start a new indent level.  That remains set at 4
no matter what value is used for the --tabwidth=<num> option.

With this change it's now, finally, possible to have proper
tab expansion without breaking the "4 spaces per indent level"
rule.

Note that backticks-delimited code blocks will always expand
their tabs to 8-character tab stop positions no matter what
value is used for the --tabwidth=<num> option.

With this change the default expansion width for tabs when
Markdown.pl is run from the command line is now 8.

When used as a module the default is still 4, but that's
easily changed by passing in a suitable option.

Signed-off-by: Kyle J. McKay <mackyle@gmail.com>
master
Kyle J. McKay 8 years ago
parent
commit
73176737ad
  1. 57
      Markdown.pl

57
Markdown.pl

@ -29,7 +29,7 @@ All rights reserved.
require Exporter;
use Digest::MD5 qw(md5);
use File::Basename qw(basename);
use Scalar::Util qw(refaddr);
use Scalar::Util qw(refaddr looks_like_number);
use Pod::Usage;
@ISA = qw(Exporter);
@EXPORT_OK = qw(Markdown);
@ -46,9 +46,10 @@ exit(&_main(@ARGV)||0) unless caller;
#
# Global default settings:
#
my ($g_empty_element_suffix, $g_tab_width);
my ($g_empty_element_suffix, $g_indent_width, $g_tab_width);
BEGIN {
$g_empty_element_suffix = " />"; # Change to ">" for HTML output
$g_indent_width = 4; # Number of spaces considered new level
$g_tab_width = 4; # Legacy even though it's wrong
}
@ -253,6 +254,7 @@ sub _main {
'html4tags',
'htmlroot|r=s',
'imageroot|i=s',
'tabwidth|tab-width=s',
);
if ($cli_opts{'help'}) {
pod2usage(-verbose => 2, -exitval => 0);
@ -273,12 +275,19 @@ sub _main {
if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML
$options{empty_element_suffix} = ">";
}
if ($cli_opts{'tabwidth'}) {
my $tw = $cli_opts{'tabwidth'};
die "invalid tab width (must be integer)\n" unless looks_like_number $tw;
die "invalid tab width (must be >= 2 and <= 32)\n" unless $tw >= 2 && $tw <= 32;
$options{tab_width} = int(0+$tw);
}
if ($cli_opts{'htmlroot'}) { # Use URL prefix
$options{url_prefix} = $cli_opts{'htmlroot'};
}
if ($cli_opts{'imageroot'}) { # Use image URL prefix
$options{img_prefix} = $cli_opts{'imageroot'};
}
$options{tab_width} = 8 unless defined($options{tab_width});
#### Process incoming text: ###########################
@ -313,6 +322,7 @@ sub Markdown {
# set initial defaults
empty_element_suffix => $g_empty_element_suffix,
tab_width => $g_tab_width,
indent_width => $g_indent_width,
url_prefix => "", # Prefixed to non-absolute URLs
img_prefix => "", # Prefixed to non-absolute image URLs
);
@ -409,11 +419,11 @@ sub _StripLinkDefinitions {
# hash references.
#
my $text = shift;
my $less_than_tab = $opt{tab_width} - 1;
my $less_than_indent = $opt{indent_width} - 1;
# Link defs are in the form: ^[id]: url "optional title"
while ($text =~ s{
^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1
^[ ]{0,$less_than_indent}\[(.+)\]: # id = $1
[ \t]*
\n? # maybe *one* newline
[ \t]*
@ -449,7 +459,7 @@ BEGIN {
sub _HashHTMLBlocks {
my $text = shift;
my $less_than_tab = $opt{tab_width} - 1;
my $less_than_indent = $opt{indent_width} - 1;
# Hashify HTML blocks:
# We only want to do this for block-level HTML tags, such as headers,
@ -513,7 +523,7 @@ sub _HashHTMLBlocks {
\A\n? # the beginning of the doc
)
( # save in $1
[ ]{0,$less_than_tab}
[ ]{0,$less_than_indent}
<(hr) # start tag = $2
\b # word break
([^<>])*? #
@ -535,7 +545,7 @@ sub _HashHTMLBlocks {
\A\n? # the beginning of the doc
)
( # save in $1
[ ]{0,$less_than_tab}
[ ]{0,$less_than_indent}
(?s:
<!
(--.*?--\s*)+
@ -913,13 +923,13 @@ sub _DoLists {
# Form HTML ordered (numbered) and unordered (bulleted) lists.
#
my $text = shift;
my $less_than_tab = $opt{tab_width} - 1;
my $less_than_indent = $opt{indent_width} - 1;
# Re-usable pattern to match any entirel ul or ol list:
my $whole_list = qr{
( # $1 = whole list
( # $2
[ ]{0,$less_than_tab}
[ ]{0,$less_than_indent}
(${marker_any}) # $3 = first list item marker
[ \t]+
)
@ -1068,19 +1078,18 @@ sub _DoCodeBlocks {
$text =~ s{
(?:\n\n|\A)
( # $1 = the code block -- one or more lines, starting with a space/tab
( # $1 = the code block -- one or more lines, starting with indent_width spaces
(?:
(?:[ ]{$opt{tab_width}} | \t) # Lines must start with a tab or a tab-width of spaces
(?:[ ]{$opt{indent_width}}) # Lines must start with indent_width of spaces
.*\n+
)+
)
((?=^[ ]{0,$opt{tab_width}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
((?=^[ ]{0,$opt{indent_width}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
}{
my $codeblock = $1;
my $result; # return value
$codeblock = _EncodeCode(_Outdent($codeblock));
$codeblock = _Detab($codeblock);
$codeblock =~ s/\A\n+//; # trim leading newlines
$codeblock =~ s/\s+\z//; # trim trailing whitespace
@ -1430,11 +1439,11 @@ sub _TokenizeHTML {
sub _Outdent {
#
# Remove one level of line-leading tabs or spaces
# Remove one level of line-leading indent_width of spaces
#
my $text = shift;
$text =~ s/^(\t|[ ]{1,$opt{tab_width}})//gm;
$text =~ s/^ {1,$opt{indent_width}}//gm;
return $text;
}
@ -1483,13 +1492,14 @@ Markdown.pl - convert Markdown format text files to HTML
=head1 SYNOPSIS
B<Markdown.pl> [B<--help>] [B<--html4tags>] [B<--htmlroot>=I<prefix>]
[B<--imageroot>=I<prefix>] [B<--version>] [B<--shortversion>] [--]
[I<file>...]
[B<--imageroot>=I<prefix>] [B<--version>] [B<--shortversion>]
[B<--tabwidth>=I<num>] [--] [I<file>...]
Options:
-h show short usage help
--help show long detailed help
--html4tags use <br> instead of <br />
--tabwidth=num expand tabs to num instead of 8
-r prefix | --htmlroot=prefix append relative non-img URLs
to prefix
-i prefix | --imageroot=prefix append relative img URLs to
@ -1536,6 +1546,19 @@ instead of Markdown's default XHTML style tags, e.g.:
<br />
=item B<--tabwidth>=I<num>
Expand tabs to I<num> character wide tab stop positions instead of the default
8. Don't use this; physical tabs should always be expanded to 8-character
positions. This option does I<not> affect the number of spaces needed to
start a new "indent level". That will always be 4 no matter what value is
used (or implied by default) with this option. Also note that tabs inside
backticks-delimited code blocks will always be expanded to 8-character tab
stop positions no matter what value is used for this option.
The value must be S<2 <= I<num> <= 32>.
=item B<-r> I<prefix>, B<--htmlroot>=I<prefix>
Any non-absolute URLs have I<prefix> prepended.

Loading…
Cancel
Save