#!/usr/bin/env perl
#
# Markdown -- A text-to-HTML conversion tool for web writers
#
# Copyright (C) 2004 John Gruber
# Copyright (C) 2015,2016,2017,2018,2019,2020,2021 Kyle J. McKay
# All rights reserved.
# License is Modified BSD (aka 3-clause BSD) License\n";
# See LICENSE file (or "; # _FormParagraphs open paragraph tag
$g_close_p = "
"; $g_close_p = "
"; } defined($o->{empty_element_suffix}) && ($o->{empty_element_suffix} eq " />" || $o->{empty_element_suffix} eq ">") or $o->{empty_element_suffix} = " />"; $o->{tab_width} = 8 unless looks_like_number($o->{tab_width}) && 1 <= $o->{tab_width} && $o->{tab_width} <= 32; $o->{tab_width} = int($o->{tab_width}); $o->{indent_width} = 4 unless looks_like_number($o->{indent_width}) && 1 <= $o->{indent_width} && $o->{indent_width} <= 32; $o->{indent_width} = int($o->{indent_width}); defined($o->{auto_number}) or $o->{auto_number} = ''; $o->{auto_number} eq '' || looks_like_number($o->{auto_number}) or $o->{auto_number} = 6; if ($o->{auto_number} ne '') { $o->{auto_number} = int(0+$o->{auto_number}); $o->{auto_number} >= 0 or $o->{auto_number} = 0; $o->{auto_number} <= 6 or $o->{auto_number} = 6; } defined($o->{style_prefix}) or $o->{style_prefix} = $g_style_prefix; $o->{abs_prefix} = _MakePrefixCODERef($o->{abs_prefix}, 1) unless ref($o->{abs_prefix}) eq 'CODE'; $o->{url_prefix} = _MakePrefixCODERef($o->{url_prefix}, 0) unless ref($o->{url_prefix}) eq 'CODE'; $o->{img_prefix} = _MakePrefixCODERef($o->{img_prefix}, 0) unless ref($o->{img_prefix}) eq 'CODE'; $o->{base_prefix} = _MakePrefixCODERef($o->{base_prefix}, -1) unless ref($o->{base_prefix}) eq 'CODE'; ref($o->{wikiopt}) eq "HASH" or $o->{wikiopt} = {}; # Note that because Markdown makes a copy of the options # before calling this function, this does not actually remove # any "h1" key that might have been set by the caller of # the Markdown function. However, by deleting it here, # this guarantees that any found value will actually be # picked up and stored (which will not happen if the key # already exists). delete $o->{h1}; # Default is to silently strip any known YAML front matter # Same comment about "yaml" key as above for "h1" key $o->{yamlmode} = 1 unless looks_like_number($o->{yamlmode}); $o->{yamlvis} = -1 unless looks_like_number($o->{yamlvis}); delete $o->{yaml}; # The anchors hash will only be returned if the key exists # (the key's value doesn't matter), set the value to an empty # HASH ref just in case to make sure it's always a HASH ref. $o->{anchors} = {} if exists($o->{anchors}); } my %_yamlopts; BEGIN {%_yamlopts = map({$_ => 1} qw( display_metadata header_enum title ))} sub _HasUnknownYAMLOptions { do { return 1 unless exists($_yamlopts{$_}) } foreach keys(%{$_[0]}); return 0; } sub _ApplyYAMLOpts { my ($yaml, $opt) = @_; if (defined($yaml->{display_metadata}) && $opt->{yamlvis} < 0) { # ignore display_metadata except in --yaml=enable mode $opt->{yamlvis} = _YAMLTrueValue($yaml->{display_metadata}) ? 1 : 0; } $opt->{h1} = $yaml->{title} if defined($yaml->{title}); if (defined($yaml->{header_enum}) && $opt->{auto_number} eq '') { $opt->{auto_number} = _YAMLTrueValue($yaml->{header_enum}) ? 6 : 0; } } sub _YAMLTrueValue { my $v = shift; defined($v) or $v = ""; $v = lc($v); return !($v eq "" || $v eq "0" || $v eq "false" || $v eq "disable" || $v eq "off" || $v eq "no"); } # Actually returns an empty string rather than a CODE ref # if an empty prefix is passed in. Trailing "/"s are trimmed # off if the second argument is positive or the string does NOT # consist of only "/"s. A trailing "/" is added unless the # trimmed prefix already has one or the second argument is true. # If the second argument is negative, the prefix is used as-is. sub _MakePrefixCODERef { my ($prefix, $mtok) = @_; defined($prefix) or $prefix = ""; looks_like_number($mtok) or $mtok = $mtok ? 1 : 0; if ($mtok > 0) { $prefix =~ s,/+$,,; } elsif (!$mtok) { $prefix =~ s,//+$,/,; } $prefix ne "" or return ""; $prefix .= '/' if !$mtok && substr($prefix, -1, 1) ne '/'; return sub { $prefix . $_[0] }; } sub Markdown { # # Primary function. The order in which other subs are called here is # essential. Link and image substitutions need to happen before # _EscapeSpecialChars(), so that any *'s or _'s in the # and tags get encoded. # my $text = shift; # Any remaining arguments after the first are options; either a single # hashref or a list of name, value pairs. See _SanitizeOpts comments. %opt = ( # set initial defaults style_prefix => $g_style_prefix, empty_element_suffix => $g_empty_element_suffix, tab_width => _tabDefault, indent_width => $g_indent_width, abs_prefix => "", # Prefixed to absolute path URLs url_prefix => "", # Prefixed to non-absolute URLs img_prefix => "", # Prefixed to non-absolute image URLs base_prefix => "", # Prefixed to fragment-only URLs ); @autonum = (); my %args = (); if (ref($_[0]) eq "HASH") { %args = %{$_[0]}; } else { %args = @_; } while (my ($k,$v) = each %args) { $opt{$k} = $v; } _SanitizeOpts(\%opt); my $yaml; ($text, $yaml) = _PrepareInput($text, $opt{yamlmode}); _ApplyYAMLOpts($yaml, \%opt) if ref($yaml) eq "HASH" && $opt{yamlmode} > 0; my $yamltable = ""; if (ref($yaml) eq "HASH" && %$yaml && $opt{yamlmode} && $opt{yamlvis}) { if ($opt{yamlvis} > 0 || _HasUnknownYAMLOptions($yaml)) { my ($hrows, $drows) = ("", ""); foreach (sort(keys(%$yaml))) { my $v = $yaml->{$_}; my $rspn = ''; if (defined($v)) { $v =~ s/&/&/g; $v =~ s/</g; utf8::encode($v); $drows .= ""
. $codeblock . "\n
"
. $codeblock . "\n
s around # "paragraphs" that are wrapped in non-block-level tags, such as anchors, # phrase emphasis, and spans. The list of tags we're looking for is # hard-coded: # First, look for nested blocks, e.g.: #
tags.
# my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
foreach my $cur_token (@$tokens) {
if ($cur_token->[0] eq "tag") {
# Within tags, encode *, _ and ~ so they don't conflict
# with their use in Markdown for italics and strong.
# We're replacing each such character with its
# corresponding block id value; this is likely
# overkill, but it should prevent us from colliding
# with the escape values by accident.
$cur_token->[1] =~ s!([*_~])!$g_escape_table{$1}!g;
$text .= $cur_token->[1];
} else {
my $t = $cur_token->[1];
$t = _EncodeBackslashEscapes($t);
$text .= $t;
}
}
return $text;
}
sub _ProcessWikiLink {
my ($link_text, $link_loc) = @_;
if (defined($link_loc) &&
($link_loc =~ m{^#\S*$} || $link_loc =~ m{^(?:http|ftp)s?://\S+$}i)) {
# Return the new link
return _MakeATag(_FindFragmentMatch($link_loc), $link_text);
}
if (!defined($link_loc)) {
$link_loc = _RunSpanGamut($link_text);
$link_loc = _strip(unescapeXML(_StripTags(_UnescapeSpecialChars($link_loc))));
$link_loc =~ m{^(?:http|ftp)s?://\S+$}i and
# Return the new link
return _MakeATag($link_loc, $link_text);
}
return undef if $link_loc eq "" || $link_text eq "";
if ($link_loc =~ /^[A-Za-z][A-Za-z0-9+.-]*:/os) {
# Unrecognized scheme
return undef;
}
if ($opt{wikipat}) {
my $o = $opt{wikiopt};
my $img_link = _strip($link_text);
my $img = 0;
my $qsfrag = "";
my $base;
my $imgopts = undef;
if ($img_link =~ /^[^#?\s]+\.(?:png|gif|jpe?g|svgz?)$/i) {
$base = _wxform($img_link, 1);
$img = 1;
$imgopts = _ParseWikiImgOpts($link_loc);
$imgopts->{imgflag} = \$img;
} else {
$base = $link_loc;
if ($link_loc =~ /^(.*?)([?#].*)$/os) {
($base, $qsfrag) = ($1, $2);
}
$base = _wxform($base);
}
my $result = $opt{wikipat};
$result =~ s/%\{\}.+$/%{}/os if $img;
$result =~ s/%\{\}/$base/;
if ($qsfrag =~ /^([^#]*)(#.+)$/os) {
my ($q,$f) = ($1,$2);
#$f = _wxform($f) if $f =~ / /;
$qsfrag = $q . $f;
}
$result .= $qsfrag;
$result = &{$opt{wikifunc}}($result, \%opt, ($img?$img_link:$link_loc), $base, $qsfrag, $imgopts)
if ref($opt{wikifunc}) eq 'CODE';
{
use bytes;
$result =~ s/%(?![0-9A-Fa-f]{2})/%25/sog;
if ($o->{r}) {
$result =~
s/([\x00-\x1F <>"{}|\\^`x7F])/sprintf("%%%02X",ord($1))/soge;
} else {
$result =~
s/([\x00-\x1F <>"{}|\\^`\x7F-\xFF])/sprintf("%%%02X",ord($1))/soge;
}
$result =~ s/(%(?![0-9A-F]{2})[0-9A-Fa-f]{2})/uc($1)/soge;
}
# Return the new link
return $img ? _MakeIMGTag($result, undef, undef, $imgopts) : _MakeATag($result, $link_text);
}
# leave it alone
return undef;
}
sub _ParseWikiImgOpts {
my $alts = shift;
my %o = ();
# alt= consumes the rest of the line, do it first
if ($alts =~ /(?:^|,)\s*alt\s*=\s*(.*)$/ios) {
my $atext = $1;
$alts = substr($alts, 0, $-[0]);
$o{alt} = _strip($atext);
}
foreach my $kv (split(/\s*,\s*/, lc($alts))) {
if ($kv =~ /^\s*([^\s]+)\s*=\s*([^\s]+)\s*$/os) {
my ($k, $v) = ($1, $2);
if (($k eq "width" || $k eq "height") && $v =~ /^\d+$/) {
$o{$k} = 0+$v if $v > 0;
next;
}
if ($k eq "align" && ($v eq "left" || $v eq "right" || $v eq "center")) {
$o{$k} = $v;
next;
}
}
}
return \%o;
}
sub _wxform {
my ($w, $img) = @_;
my $o = $opt{wikiopt};
my $opt_s = $o->{s};
if (!$img && $opt_s) {
if (ref($opt_s)) {
if ($w =~ m{^(.*)[.]([^./]*)$}) {
my ($base, $ext) = ($1, $2);
$w = $base if $opt_s->{lc($ext)};
}
} else {
$w =~ s{[.][^./]*$}{};
}
}
$w = uc($w) if $o->{u};
$w = lc($w) if $o->{l};
$w =~ s{/+}{%252F}gos if $o->{"%"};
$w =~ s/ +/%20/gos if $o->{b};
$w =~ tr{/}{ } if $o->{f};
$w =~ s{/+}{/}gos if !$o->{f} && !$o->{v};
if ($o->{d}) {
$w =~ tr{ }{-};
$w =~ s/-+/-/gos unless $o->{v};
} else {
$w =~ tr{ }{_};
$w =~ s/_+/_/gos unless $o->{v};
}
return $w;
}
# Return a suitably encoded tag string
# On input NONE of $url, $text or $title should be xmlencoded
# but $url should already be url-encoded if needed, but NOT g_escape_table'd
sub _MakeATag {
my ($url, $text, $title) = @_;
defined($url) or $url="";
defined($text) or $text="";
defined($title) or $title="";
$url =~ m"^#" && ref($opt{base_prefix}) eq 'CODE' and $url = &{$opt{base_prefix}}($url);
my $result = $g_escape_table{'<'}."a href=\"" . _EncodeAttText($url) . "\"";
$title = _strip($title);
$text =~ s{<(/?a)}{<$1}sogi;
$text = _DoItalicsAndBoldAndStrike($text);
# We've got to encode any of these remaining to avoid
# conflicting with other italics, bold and strike through and links.
$text =~ s!([]*_~[])!$g_escape_table{$1}!g;
$result .= " title=\"" . _EncodeAttText($title) . "\"" if $title ne "";
return $result . $g_escape_table{'>'} .
$text . $g_escape_table{'<'}."/a".$g_escape_table{'>'};
}
sub _DoAnchors {
#
# Turn Markdown link shortcuts into XHTML tags.
#
my $text = shift;
#
# First, handle wiki-style links: [[wiki style link]]
#
$text =~ s{
( # wrap whole match in $1
\[\[
($g_nested_brackets) # link text and id = $2
\]\]
)
}{
my $result;
my $whole_match = $1;
my $link_text = $2;
my $link_loc = undef;
if ($link_text =~ /^(.*)\|(.*)$/s) {
$link_text = $1;
$link_loc = _strip($2);
}
$result = _ProcessWikiLink($link_text, $link_loc);
defined($result) or $result = $whole_match;
$result;
}xsge;
#
# Next, handle reference-style links: [link text] [id]
#
$text =~ s{
( # wrap whole match in $1
\[
($g_nested_brackets) # link text = $2
\]
[ ]? # one optional space
(?:\n[ ]*)? # one optional newline followed by spaces
\[
($g_nested_brackets) # id = $3
\]
)
}{
my $result;
my $whole_match = $1;
my $link_text = $2;
my $link_id = $3;
if ($link_id eq "") {
# for shortcut links like [this][].
$link_id = _RunSpanGamut($link_text);
$link_id = unescapeXML(_StripTags(_UnescapeSpecialChars($link_id)));
}
$link_id = _strip(lc $link_id);
if (defined($g_urls{$link_id}) || defined($g_anchors{$link_id})) {
my $url = $g_urls{$link_id};
defined($url) or $url = $g_anchors{$link_id};
$url = _FindFragmentMatch($url);
$link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
$result = _MakeATag(_PrefixURL($url), $link_text, $g_titles{$link_id});
}
else {
$result = $whole_match;
}
$result;
}xsge;
#
# Subsequently, inline-style links: [link text](url "optional title")
#
$text =~ s{
( # wrap whole match in $1
\[
($g_nested_brackets) # link text = $2
\]
\( # literal paren
($g_nested_parens) # href and optional title = $3
\)
)
}{
#my $result;
my $whole_match = $1;
my $link_text = $2;
my ($url, $title) = _SplitUrlTitlePart($3);
if (defined($url)) {
$url = _FindFragmentMatch($url);
$link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
_MakeATag(_PrefixURL($url), $link_text, $title);
} else {
# The href/title part didn't match the pattern
$whole_match;
}
}xsge;
#
# Finally, handle reference-style implicit shortcut links: [link text]
#
$text =~ s{
( # wrap whole match in $1
\[
($g_nested_brackets) # link text = $2
\]
)
}{
my $result;
my $whole_match = $1;
my $link_text = $2;
my $link_id = _RunSpanGamut($2);
$link_id = _strip(lc(unescapeXML(_StripTags(_UnescapeSpecialChars($link_id)))));
if (defined($g_urls{$link_id}) || defined($g_anchors{$link_id})) {
my $url = $g_urls{$link_id};
defined($url) or $url = $g_anchors{$link_id};
$url = _FindFragmentMatch($url);
$link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
$result = _MakeATag(_PrefixURL($url), $link_text, $g_titles{$link_id});
}
else {
$result = $whole_match;
}
$result;
}xsge;
return $text;
}
sub _PeelWrapped {
defined($_[0]) or return undef;
if (substr($_[0],0,1) eq "(") {
return substr($_[0], 1, length($_[0]) - (substr($_[0], -1, 1) eq ")" ? 2 : 1));
}
return $_[0];
}
sub _SplitUrlTitlePart {
return ("", undef) if $_[0] =~ m{^\s*$}; # explicitly allowed
my $u = $_[0];
$u =~ s/^\s*(['\042])/# $1/;
if ($u =~ m{
^ # match beginning
\s*?
([^\s'\042]\S*?)>? # URL = $1
(?: # optional grouping
\s+ # must be distinct from URL
(['\042]?) # quote char = $2
(.*?) # Title = $3
\2? # matching quote
)? # title is optional
\s*
\z # match end
}osx) {
return (undef, undef) if $_[1] && ($1 eq "" || $1 eq "#");
return (_PeelWrapped($1), $2 ? $3 : _PeelWrapped($3));
} else {
return (undef, undef);
}
}
sub _FindFragmentMatchInternal {
my ($anchors_id, $url, $undefifnomatch) = @_;
if (defined($url) && $url =~ /^#\S/) {
# try very hard to find a match
my $idbase = _strip(lc(substr($url, 1)));
my $idbase0 = $idbase;
my $id = _MakeAnchorId($idbase);
$undefifnomatch and $url = undef;
if (defined($$anchors_id{$id})) {
$url = $$anchors_id{$id};
} else {
$idbase =~ s/-/_/gs;
$id = _MakeAnchorId($idbase);
if (defined($$anchors_id{$id})) {
$url = $$anchors_id{$id};
} else {
$id = _MakeAnchorId($idbase0, 1);
if (defined($$anchors_id{$id})) {
$url = $$anchors_id{$id};
} else {
$id = _MakeAnchorId($idbase, 1);
if (defined($$anchors_id{$id})) {
$url = $$anchors_id{$id};
}
}
}
}
}
return $url;
}
sub _FindFragmentMatch {
return _FindFragmentMatchInternal(\%g_anchors_id, @_);
}
sub _ToUTF8 {
my $input = shift;
my $output;
if (Encode::is_utf8($input) || utf8::decode($input)) {
$output = $input;
} else {
$output = $encoder->decode($input, Encode::FB_DEFAULT);
}
return $output;
}
# $_[0] -> HASH ref of anchors (e.g. the "anchors" OUTPUT from Markdown)
# $_[1] -> fragment to resolve, may optionally start with '#'
# An empty string ("") or hash ("#") is returned as-is.
# returns undef if no match otherwise resolved fragment name
# which will start with a '#' if $_[1] started with '#' otherwise will not.
# This function can be used to connect up links to "implicit" anchors.
# All Markdown-format H1-H6 headers have an implicit anchor added
# based on the header item text. Passing that text to this function
# will cough up the matching implicit anchor if there is one.
sub ResolveFragment
{
my ($anchors, $frag) = @_;
defined($frag) or return undef;
$frag eq "" || $frag eq "#" and return $frag;
my $hadhash = ($frag =~ s/^#//);
$frag =~ /^\S/ or return undef;
ref($anchors) eq 'HASH' or return undef;
my $ans = _FindFragmentMatchInternal($anchors, '#'._ToUTF8($frag), 1);
$hadhash || !defined($ans) or $ans =~ s/^#//;
defined($ans) and utf8::encode($ans);
return $ans;
}
# Return a suitably encoded tag string
# On input NONE of $url, $alt or $title should be xmlencoded
# but $url should already be url-encoded if needed, but NOT g_escape_table'd
sub _MakeIMGTag {
my ($url, $alt, $title, $iopts) = @_;
defined($url) or $url="";
defined($alt) or $alt="";
defined($title) or $title="";
ref($iopts) eq "HASH" or $iopts = {};
return "" unless $url ne "";
my ($w, $h, $lf, $rt) = (0, 0, '', '');
($alt, $title) = (_strip($alt), _strip($title));
if ($title =~ /^(.*)\(()([1-9][0-9]*)[xX\xd7]([1-9][0-9]*)(>?)\)$/os) {
($title, $w, $h, $lf, $rt) = (_strip($1), $3, $4, $2, $5);
} elsif ($title =~ /^(.*)\(()\?[xX\xd7]([1-9][0-9]*)(>?)\)$/os) {
($title, $h, $lf, $rt) = (_strip($1), $3, $2, $4);
} elsif ($title =~ /^(.*)\(()([1-9][0-9]*)[xX\xd7]\?(>?)\)$/os) {
($title, $w, $lf, $rt) = (_strip($1), $3, $2, $4);
} elsif ($title =~ /^(.*)\((?!\))()(>?)\)$/os) {
($title, $lf, $rt) = (_strip($1), $2, $3);
}
$iopts->{align} = "center" if $lf && $rt;
$iopts->{align} = "left" if $lf && !$rt;
$iopts->{align} = "right" if !$lf && $rt;
$iopts->{width} = $w if $w != 0;
$iopts->{height} = $h if $h != 0;
$iopts->{alt} = $alt if $alt ne "";
$iopts->{title} = $title if $title ne "";
my $iopt = sub { defined($iopts->{$_[0]}) ? $iopts->{$_[0]} : (@_ > 1 ? $_[1] : "") };
my $result = '';
$result .= $g_escape_table{'<'}."center".$g_escape_table{'>'}
if &$iopt("align") eq "center";
$result .= $g_escape_table{'<'}."img src=\"" . _EncodeAttText($url) . "\"";
$result .= " align=\"left\"" if &$iopt("align") eq "left";
$result .= " align=\"right\"" if &$iopt("align") eq "right";
$result .= " alt=\"" . _EncodeAttText($iopts->{alt}) . "\"" if &$iopt("alt") ne "";
$result .= " width=\"" . $iopts->{width} . "\"" if &$iopt("width",0) != 0;
$result .= " height=\"" . $iopts->{height} . "\"" if &$iopt("height",0) != 0;
$result .= " title=\"" . _EncodeAttText($iopts->{title}) . "\"" if &$iopt("title") ne "";
$result .= " /" unless $opt{empty_element_suffix} eq ">";
$result .= $g_escape_table{'>'};
$result .= $g_escape_table{'<'}."/center".$g_escape_table{'>'}
if &$iopt("align") eq "center";
return $result;
}
sub _DoImages {
#
# Turn Markdown image shortcuts into tags.
#
my $text = shift;
#
# First, handle reference-style labeled images: ![alt text][id]
#
$text =~ s{
( # wrap whole match in $1
!\[
($g_nested_brackets) # alt text = $2
\]
[ ]? # one optional space
(?:\n[ ]*)? # one optional newline followed by spaces
\[
($g_nested_brackets) # id = $3
\]
)
}{
my $result;
my $whole_match = $1;
my $alt_text = $2;
my $link_id = $3;
$link_id ne "" or $link_id = $alt_text; # for shortcut links like ![this][].
$link_id = _strip(lc $link_id);
if (defined $g_urls{$link_id}) {
$result = _MakeIMGTag(
_PrefixURL($g_urls{$link_id}), $alt_text, $g_titles{$link_id});
}
else {
# If there's no such link ID, leave intact:
$result = $whole_match;
}
$result;
}xsge;
#
# Next, handle inline images: ![alt text](url "optional title")
# Don't forget: encode * and _
$text =~ s{
( # wrap whole match in $1
!\[
($g_nested_brackets) # alt text = $2
\]
\( # literal paren
($g_nested_parens) # src and optional title = $3
\)
)
}{
my $whole_match = $1;
my $alt_text = $2;
my ($url, $title) = _SplitUrlTitlePart($3, 1);
defined($url) ? _MakeIMGTag(_PrefixURL($url), $alt_text, $title) : $whole_match;
}xsge;
#
# Finally, handle reference-style implicitly labeled links: ![alt text]
#
$text =~ s{
( # wrap whole match in $1
!\[
($g_nested_brackets) # alt text = $2
\]
)
}{
my $result;
my $whole_match = $1;
my $alt_text = $2;
my $link_id = lc(_strip($alt_text));
if (defined $g_urls{$link_id}) {
$result = _MakeIMGTag(
_PrefixURL($g_urls{$link_id}), $alt_text, $g_titles{$link_id});
}
else {
# If there's no such link ID, leave intact:
$result = $whole_match;
}
$result;
}xsge;
return $text;
}
sub _EncodeAttText {
my $text = shift;
defined($text) or return undef;
$text = escapeXML(_strip($text));
# We've got to encode these to avoid conflicting
# with italics, bold and strike through.
$text =~ s!([*_~:])!$g_escape_table{$1}!g;
return $text;
}
sub _MakeAnchorId {
use bytes;
my ($link, $strip) = @_;
$link = lc($link);
if ($strip) {
$link =~ s/\s+/_/gs;
$link =~ tr/-a-z0-9_//cd;
} else {
$link =~ tr/-a-z0-9_/_/cs;
}
return '' unless $link ne '';
$link = "_".$link."_";
$link =~ s/__+/_/gs;
$link = "_".md5_hex($link)."_" if length($link) > 66;
return $link;
}
sub _GetNewAnchorId {
my $link = _strip(lc(shift));
return '' if $link eq "" || defined($g_anchors{$link});
my $id = _MakeAnchorId($link);
return '' unless $id;
$g_anchors{$link} = '#'.$id;
$g_anchors_id{$id} = $g_anchors{$link};
if ($id =~ /-/) {
my $id2 = $id;
$id2 =~ s/-/_/gs;
$id2 =~ s/__+/_/gs;
defined($g_anchors_id{$id2}) or $g_anchors_id{$id2} = $g_anchors{$link};
}
my $idd = _MakeAnchorId($link, 1);
if ($idd) {
defined($g_anchors_id{$idd}) or $g_anchors_id{$idd} = $g_anchors{$link};
if ($idd =~ /-/) {
my $idd2 = $idd;
$idd2 =~ s/-/_/gs;
$idd2 =~ s/__+/_/gs;
defined($g_anchors_id{$idd2}) or $g_anchors_id{$idd2} = $g_anchors{$link};
}
}
$id;
}
sub _DoHeaders {
my ($text, $anchors) = @_;
my $h1;
my $geth1 = $anchors && !defined($opt{h1}) ? sub {
return unless !defined($h1);
my $h = shift;
$h1 = $h if $h ne "";
} : sub {};
# atx-style headers:
# # Header 1
# ## Header 2
# ## Header 2 with closing hashes ##
# ...
# ###### Header 6
#
$text =~ s{
^(\#{1,6}) # $1 = string of #'s
[ ]*
((?:(?:(?" . _AutoHeaderFlag($h_level) . $rsg . "\n\n";
}egmx;
# Setext-style headers:
# Header 1
# ========
#
# Header 2
# --------
#
# Header 3
# ~~~~~~~~
#
$text =~ s{ ^(?:=+[ ]*\n)?[ ]*(.+?)[ ]*\n=+[ ]*\n+ }{
my $h = $1;
my $rsg = _RunSpanGamut($h);
$h = _strip(unescapeXML(_StripTags(_UnescapeSpecialChars($rsg))));
my $id = $h eq "" ? "" : _GetNewAnchorId($h);
$id = " id=\"$id\"" if $id ne "";
&$geth1($h);
"" . _AutoHeaderFlag(1) . $rsg . "
\n\n";
}egmx;
$text =~ s{ ^(?:-+[ ]*\n)?[ ]*(.+?)[ ]*\n-+[ ]*\n+ }{
my $h = $1;
my $rsg = _RunSpanGamut($h);
$h = _strip(unescapeXML(_StripTags(_UnescapeSpecialChars($rsg))));
my $id = $h eq "" ? "" : _GetNewAnchorId($h);
$id = " id=\"$id\"" if $id ne "";
"" . _AutoHeaderFlag(2) . $rsg . "
\n\n";
}egmx;
$text =~ s{ ^(?:~+[ ]*\n)?[ ]*(.+?)[ ]*\n~+[ ]*\n+ }{
my $h = $1;
my $rsg = _RunSpanGamut($h);
$h = _strip(unescapeXML(_StripTags(_UnescapeSpecialChars($rsg))));
my $id = $h eq "" ? "" : _GetNewAnchorId($h);
$id = " id=\"$id\"" if $id ne "";
"" . _AutoHeaderFlag(3) . $rsg . "
\n\n";
}egmx;
$opt{h1} = $h1 if defined($h1) && $h1 ne "";
return $text;
}
sub _AutoHeaderFlag {
my $level = shift;
my $auto = $opt{auto_number} || 0;
return '' unless 1 <= $level && $level <= $auto;
return "\34".chr(0x30+$level);
}
sub _AutoHeaderNum {
my $level = shift;
my $auto = $opt{auto_number} || 0;
return '' unless 1 <= $level && $level <= $auto;
pop(@autonum) while @autonum > $level;
push(@autonum, 1) while @autonum < $level - 1;
$autonum[$level - 1] += 1;
return join('.', @autonum).' ';
}
my ($marker_ul, $marker_ol, $marker_any, $roman_numeral, $greek_lower);
BEGIN {
# Re-usable patterns to match list item bullets and number markers:
$roman_numeral = qr/(?:
[IiVvXx]|[Ii]{2,3}|[Ii][VvXx]|[VvXx][Ii]{1,3}|[Xx][Vv][Ii]{0,3}|
[Xx][Ii][VvXx]|[Xx]{2}[Ii]{0,3}|[Xx]{2}[Ii]?[Vv]|[Xx]{2}[Vv][Ii]{1,2})/ox;
$greek_lower = qr/(?:[\x{03b1}-\x{03c9}])/o;
$marker_ul = qr/[*+-]/o;
$marker_ol = qr/(?:\d+|[A-Za-z]|$roman_numeral|$greek_lower)[.\)]/o;
$marker_any = qr/(?:$marker_ul|$marker_ol)/o;
}
sub _GetListMarkerType {
my ($list_type, $list_marker, $last_marker) = @_;
return "" unless $list_type && $list_marker && lc($list_type) eq "ol";
my $last_marker_type = '';
$last_marker_type = _GetListMarkerType($list_type, $last_marker)
if defined($last_marker) &&
# these are roman unless $last_marker type case matches and is 'a' or 'A'
$list_marker =~ /^[IiVvXx][.\)]?$/;
return "I" if $list_marker =~ /^[IVX]/ && $last_marker_type ne 'A';
return "i" if $list_marker =~ /^[ivx]/ && $last_marker_type ne 'a';
return "A" if $list_marker =~ /^[A-Z]/;
return "a" if $list_marker =~ /^[a-z]/ || $list_marker =~ /^$greek_lower/o;
return "1";
}
sub _GetListItemTypeClass {
my ($list_type, $list_marker, $last_marker) = @_;
my $list_marker_type = _GetListMarkerType($list_type, $list_marker, $last_marker);
my $ans = &{sub{
return "" unless length($list_marker) >= 2 && $list_marker_type =~ /^[IiAa1]$/;
return "lower-greek" if $list_marker_type eq "a" && $list_marker =~ /^$greek_lower/o;
return "" unless $list_marker =~ /\)$/;
return "upper-roman" if $list_marker_type eq "I";
return "lower-roman" if $list_marker_type eq "i";
return "upper-alpha" if $list_marker_type eq "A";
return "lower-alpha" if $list_marker_type eq "a";
return "decimal";
}};
return ($list_marker_type, $ans);
}
my %_roman_number_table;
BEGIN {
%_roman_number_table = (
i => 1,
ii => 2,
iii => 3,
iv => 4,
v => 5,
vi => 6,
vii => 7,
viii => 8,
ix => 9,
x => 10,
xi => 11,
xii => 12,
xiii => 13,
xiv => 14,
xv => 15,
xvi => 16,
xvii => 17,
xviii => 18,
xix => 19,
xx => 20,
xxi => 21,
xxii => 22,
xxiii => 23,
xxiv => 24,
xxv => 25,
xxvi => 26,
xxvii => 27
);
}
# Necessary because ς and σ are the same value grrr
my %_greek_number_table;
BEGIN {
%_greek_number_table = (
"\x{03b1}" => 1, # α
"\x{03b2}" => 2, # β
"\x{03b3}" => 3, # γ
"\x{03b4}" => 4, # δ
"\x{03b5}" => 5, # ε
"\x{03b6}" => 6, # ζ
"\x{03b7}" => 7, # η
"\x{03b8}" => 8, # θ
"\x{03b9}" => 9, # ι
"\x{03ba}" => 10, # κ
"\x{03bb}" => 11, # λ
#"\x{00b5}"=> 12, # µ is "micro" not "mu"
"\x{03bc}" => 12, # μ
"\x{03bd}" => 13, # ν
"\x{03be}" => 14, # ξ
"\x{03bf}" => 15, # ο
"\x{03c0}" => 16, # π
"\x{03c1}" => 17, # ρ
"\x{03c2}" => 18, # ς
"\x{03c3}" => 18, # σ
"\x{03c4}" => 19, # τ
"\x{03c5}" => 20, # υ
"\x{03c6}" => 21, # φ
"\x{03c7}" => 22, # χ
"\x{03c8}" => 23, # ψ
"\x{03c9}" => 24 # ω
);
}
sub _GetMarkerIntegerNum {
my ($list_marker_type, $marker_val) = @_;
my $ans = &{sub{
return 0 + $marker_val if $list_marker_type eq "1";
$list_marker_type = lc($list_marker_type);
return $_greek_number_table{$marker_val}
if $list_marker_type eq "a" &&
defined($_greek_number_table{$marker_val});
$marker_val = lc($marker_val);
return ord($marker_val) - ord("a") + 1 if $list_marker_type eq "a";
return 1 unless $list_marker_type eq "i";
defined($_roman_number_table{$marker_val}) and
return $_roman_number_table{$marker_val};
return 1;
}};
return $ans if $ans == 0 && $list_marker_type eq "1";
return $ans >= 1 ? $ans : 1;
}
sub _IncrList {
my ($from, $to, $extra) = @_;
$extra = defined($extra) ? " $extra" : "";
my $result = "";
while ($from + 10 <= $to) {
$result .= "\n";
$from += 10;
}
while ($from + 5 <= $to) {
$result .= "\n";
$from += 5;
}
while ($from + 2 <= $to) {
$result .= "\n";
$from += 2;
}
while ($from < $to) {
$result .= "\n";
++$from;
}
return $result;
}
sub _DoListsAndBlocks {
#
# Form HTML ordered (numbered) and unordered (bulleted) lists.
#
my $text = shift;
my $indent = $opt{indent_width};
my $less_than_indent = $indent - 1;
my $less_than_double_indent = 2 * $indent - 1;
# Re-usable pattern to match any entire ul or ol list:
my $whole_list = qr{
( # $1 (or $_[0]) = whole list
( # $2 (or $_[1])
(?:(?<=\n)|\A)
[ ]{0,$less_than_indent}
(${marker_any}) # $3 (or $_[2]) = first list item marker
[ ]+
)
(?s:.+?)
( # $4 (or $_[3])
\z
|
\n{2,}
(?=\S)
(?! # Negative lookahead for another list item marker
${marker_any}[ ]
)
)
)
}mx;
my $list_item_sub = sub {
my $list = $_[0];
my $list_type = ($_[2] =~ m/$marker_ul/) ? "ul" : "ol";
my $list_att = "";
my $list_class = "";
my $list_incr = "";
# Turn double returns into triple returns, so that we can make a
# paragraph for the last item in a list, if necessary:
$list =~ s/\n\n/\n\n\n/g;
my ($result, $first_marker, $fancy) = _ProcessListItems($list_type, $list);
defined($first_marker) or return $list;
my $list_marker_type = _GetListMarkerType($list_type, $first_marker);
if ($list_marker_type) {
$first_marker =~ s/[.\)]$//;
my $first_marker_num = _GetMarkerIntegerNum($list_marker_type, $first_marker);
$list_att = $list_marker_type eq "1" ? "" : " type=\"$list_marker_type\"";
if ($fancy) {
$list_class = " class=\"$opt{style_prefix}ol\"";
my $start = $first_marker_num;
$start = 10 if $start > 10;
$start = 5 if $start > 5 && $start < 10;
$start = 1 if $start > 1 && $start < 5;
$list_att .= " start=\"$start\"" unless $start == 1;
$list_incr = _IncrList($start, $first_marker_num);
} else {
$list_class = " class=\"$opt{style_prefix}lc-greek\""
if $list_marker_type eq "a" && $first_marker =~ /^$greek_lower/o;
$list_att .= " start=\"$first_marker_num\"" unless $first_marker_num == 1;
}
}
my $idt = "\027" x $g_list_level;
$result = "$idt<$list_type$list_att$list_class>\n$list_incr" . $result . "$idt$list_type>\n\n";
$result;
};
# We use a different prefix before nested lists than top-level lists.
# See extended comment in _ProcessListItems().
#
# Note: (jg) There's a bit of duplication here. My original implementation
# created a scalar regex pattern as the conditional result of the test on
# $g_list_level, and then only ran the $text =~ s{...}{...}egmx
# substitution once, using the scalar as the pattern. This worked,
# everywhere except when running under MT on my hosting account at Pair
# Networks. There, this caused all rebuilds to be killed by the reaper (or
# perhaps they crashed, but that seems incredibly unlikely given that the
# same script on the same server ran fine *except* under MT. I've spent
# more time trying to figure out why this is happening than I'd like to
# admit. My only guess, backed up by the fact that this workaround works,
# is that Perl optimizes the substition when it can figure out that the
# pattern will never change, and when this optimization isn't on, we run
# afoul of the reaper. Thus, the slightly redundant code to that uses two
# static s/// patterns rather than one conditional pattern.
#
# Note: (kjm) With the addition of the two-of-the-same-kind-in-a-row-
# starts-a-list-at-the-top-level rule the two patterns really are somewhat
# different now, but the duplication has pretty much been eliminated via
# use of a separate sub which has the side-effect of making the below
# two cases much easier to grok all at once.
if ($g_list_level) {
my $parse = $text;
$text = "";
pos($parse) = 0;
while ($parse =~ /\G(?s:.)*?^$whole_list/gmc) {
my @captures = ($1, $2, $3, $4);
if ($-[1] > $-[0]) {
$text .= _DoListBlocks(substr($parse, $-[0], $-[1] - $-[0]));
}
$text .= &$list_item_sub(@captures);
}
$text .= _DoListBlocks(substr($parse, pos($parse))) if pos($parse) < length($parse);
}
else {
my $parse = $text;
$text = "";
pos($parse) = 0;
while ($parse =~ m{\G(?s:.)*?
(?: (?<=\n\n) |
\A\n? |
(?<=:\n) |
(?:(?<=\n) # a list starts with one unordered marker line
(?=[ ]{0,$less_than_indent}$marker_ul[ ])) |
(?:(?<=\n) # or two ordered marker lines in a row
(?=[ ]{0,$less_than_indent}$marker_ol[ ].*\n\n?
[ ]{0,$less_than_indent}$marker_ol[ ])) |
(?:(?<=\n) # or any marker and a sublist marker
(?=[ ]{0,$less_than_indent}$marker_any[ ].*\n\n?
[ ]{$indent,$less_than_double_indent}$marker_any[ ]))
)
$whole_list
}gmcx) {
my @captures = ($1, $2, $3, $4);
if ($-[1] > $-[0]) {
$text .= _DoListBlocks(substr($parse, $-[0], $-[1] - $-[0]));
}
$text .= &$list_item_sub(@captures);
}
$text .= _DoListBlocks(substr($parse, pos($parse))) if pos($parse) < length($parse);
}
return $text;
}
sub _ProcessListItems {
#
# Process the contents of a single ordered or unordered list, splitting it
# into individual list items.
#
my $list_type = shift;
my $list_str = shift;
# The $g_list_level global keeps track of when we're inside a list.
# Each time we enter a list, we increment it; when we leave a list,
# we decrement. If it's zero, we're not in a list anymore.
#
# We do this because when we're not inside a list, we want to treat
# something like this:
#
# I recommend upgrading to version
# 8. Oops, now this line is treated
# as a sub-list.
#
# As a single paragraph, despite the fact that the second line starts
# with a digit-period-space sequence.
#
# Whereas when we're inside a list (or sub-list), that line will be
# treated as the start of a sub-list. What a kludge, huh? This is
# an aspect of Markdown's syntax that's hard to parse perfectly
# without resorting to mind-reading. Perhaps the solution is to
# change the syntax rules such that sub-lists must start with a
# starting cardinal number; e.g. "1." or "a.".
$g_list_level++;
my $idt = "\027" x $g_list_level;
my $marker_kind = $list_type eq "ul" ? $marker_ul : $marker_ol;
my $first_marker;
my $first_marker_type;
my $first_marker_num;
my $last_marker;
my $fancy;
my $skipped;
my $typechanged;
my $next_num = 1;
# trim trailing blank lines:
$list_str =~ s/\n{2,}\z/\n/;
my $result = "";
my $oldpos = 0;
pos($list_str) = 0;
while ($list_str =~ m{\G # start where we left off
(\n+)? # leading line = $1
(^[ ]*) # leading whitespace = $2
($marker_any) [ ] ([ ]*) # list marker = $3 leading item space = $4
}cgmx) {
my $leading_line = $1;
my $leading_space = $2;
my $list_marker = $3;
my $list_marker_len = length($list_marker);
my $leading_item_space = $4;
if ($-[0] > $oldpos) {
$result .= substr($list_str, $oldpos, $-[0] - $oldpos); # Sort-of $`
$oldpos = $-[0]; # point at start of this entire match
}
if (!defined($first_marker)) {
$first_marker = $list_marker;
$first_marker_type = _GetListMarkerType($list_type, $first_marker);
if ($first_marker_type) {
(my $marker_val = $first_marker) =~ s/[.\)]$//;
$first_marker_num = _GetMarkerIntegerNum($first_marker_type, $marker_val);
$next_num = $first_marker_num;
$skipped = 1 if $next_num != 1;
}
} elsif ($list_marker !~ /$marker_kind/) {
# Wrong marker kind, "fix up" the marker to a correct "lazy" marker
# But keep the old length in $list_marker_len
$list_marker = $last_marker;
}
# Now grab the rest of this item's data upto but excluding the next
# list marker at the SAME indent level, but sublists must be INCLUDED
my $item = "";
while ($list_str =~ m{\G
((?:.+?)(?:\n{1,2})) # list item text = $1
(?= \n* (?: \z | # end of string OR
(^[ ]*) # leading whitespace = $2
($marker_any) # next list marker = $3
([ ]+) )) # one or more spaces after marker = $4
}cgmxs) {
# If $3 has a left edge that is at the left edge of the previous
# marker OR $3 has a right edge that is at the right edge of the
# previous marker then we stop; otherwise we go on
$item .= substr($list_str, $-[0], $+[0] - $-[0]); # $&
last if !defined($4) || length($2) == length($leading_space) ||
length($2) + length($3) == length($leading_space) + $list_marker_len;
# move along, you're not the marker droid we're looking for...
$item .= substr($list_str, $+[0], $+[4] - $+[0]);
pos($list_str) = $+[4]; # ...move along over the marker droid
}
# Remember where we parked
$oldpos = pos($list_str);
# Process the $list_marker $item
my $liatt = '';
my $checkbox = '';
my $incr = '';
if ($list_type eq "ul" && !$leading_item_space && $item =~ /^\[([ xX\xd7])\] +(.*)$/s) {
my $checkmark = $1;
$item = $2;
my ($checkbox_class, $checkbox_val);
if ($checkmark ne " ") {
($checkbox_class, $checkbox_val) = ("checkbox-on", "x");
} else {
($checkbox_class, $checkbox_val) = ("checkbox-off", " ");
}
$liatt = " class=\"$opt{style_prefix}$checkbox_class\"";
$checkbox = "[$checkbox_val] ";
} else {
my $list_marker_type;
($list_marker_type, $liatt) = _GetListItemTypeClass($list_type, $list_marker, $last_marker);
if ($list_type eq "ol" && defined($first_marker)) {
my $styled = $fancy = 1 if $liatt && $list_marker =~ /\)$/;
my ($sfx, $dash) = ("", "");
($sfx, $dash) = ("li", "-") if $styled;
if ($liatt =~ /lower/) {
$sfx .= "${dash}lc";
} elsif ($liatt =~ /upper/) {
$sfx .= "${dash}uc";
}
$sfx .= "-greek" if $liatt =~ /greek/;
$liatt = " class=\"$opt{style_prefix}$sfx\"" if $sfx;
$typechanged = 1 if $list_marker_type ne $first_marker_type;
(my $marker_val = $list_marker) =~ s/[.\)]$//;
my $marker_num = _GetMarkerIntegerNum($list_marker_type, $marker_val);
$marker_num = $next_num if $marker_num < $next_num;
$skipped = 1 if $next_num < $marker_num;
$incr = _IncrList($next_num, $marker_num, "incrlevel=$g_list_level");
$liatt = " value=\"$marker_num\"$liatt" if $fancy || $skipped;
$liatt = " type=\"$list_marker_type\"$liatt" if $styled || $typechanged;
$next_num = $marker_num + 1;
}
}
$last_marker = $list_marker;
if ($leading_line or ($item =~ m/\n{2,}/)) {
$item = _RunBlockGamut(_Outdent($item));
$item =~ s{([OUou][Ll]>)\s*\z}{$1} and $item .= "\n$idt ";
}
else {
# Recursion for sub-lists:
$item = _DoListsAndBlocks(_Outdent($item));
chomp $item;
$item = _RunSpanGamut($item);
}
# Append to $result
$result .= "$incr$idt" . $checkbox . $item . "$idt \n";
}
if ($fancy) {
# remove "incrlevel=$g_list_level " parts
$result =~ s{}
{$idt}g;
} else {
# remove the $g_list_level incr spans entirely
$result =~ s{\n}{}g;
# remove the class="$opt{style_prefix}lc-greek" if first_marker is greek
$result =~ s{(]*?) class="$opt{style_prefix}lc-greek">}{$1>}g
if defined($first_marker_type) && $first_marker_type eq "a" && $first_marker =~ /^$greek_lower/o;
}
# Anything left over (similar to $') goes into result, but this should always be empty
$result .= _RunBlockGamut(substr($list_str, pos($list_str))) if pos($list_str) < length($list_str);
$g_list_level--;
# After all that, if we only got an ordered list with a single item
# and its first marker is a four-digit number >= 1492 and <= 2999
# or an UPPERCASE letter, then pretend we didn't see any list at all.
if ($first_marker_type && $first_marker_num + 1 == $next_num) {
if (($first_marker_type eq "1" && $first_marker_num >= 1492 && $first_marker_num <= 2999) ||
($first_marker_type eq "A" && !$fancy)) {
return (undef, undef, undef);
}
}
return ($result, $first_marker, $fancy);
}
sub _DoCodeBlocks {
#
# Process Markdown `` blocks.
#
my $text = shift;
$text =~ s{
(?:\n\n|\A\n?)
( # $1 = the code block -- one or more lines, starting with indent_width spaces
(?:
(?:[ ]{$opt{indent_width}}) # Lines must start with indent_width of spaces
.*\n+
)+
)
((?=^[ ]{0,$opt{indent_width}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
}{
my $codeblock = $1;
$codeblock =~ s/\n\n\n/\n\n/g; # undo "paragraph for last list item" change
$codeblock = _EncodeCode(_Outdent($codeblock));
$codeblock =~ s/\A\n+//; # trim leading newlines
$codeblock =~ s/\s+\z//; # trim trailing whitespace
my $result = ""
. $codeblock . "\n
";
my $key = block_id($result, 2);
$g_code_blocks{$key} = $result;
"\n\n" . $key . "\n\n";
}egmx;
return $text;
}
sub _DoCodeSpans {
#
# * Backtick quotes are used for
spans.
#
# * You can use multiple backticks as the delimiters if you want to
# include literal backticks in the code span. So, this input:
#
# Just type ``foo `bar` baz`` at the prompt.
#
# Will translate to:
#
# Just type foo `bar` baz
at the prompt.
#
# There's no arbitrary limit to the number of backticks you
# can use as delimters. If you need three consecutive backticks
# in your code, use four for delimiters, etc.
#
# * You can use spaces to get literal backticks at the edges:
#
# ... type `` `bar` `` ...
#
# Turns to:
#
# ... type `bar`
...
#
my $text = shift;
$text =~ s@
(`+) # $1 = Opening run of `
(.+?) # $2 = The code block
(?$c
";
@egsx;
return $text;
}
sub _EncodeCode {
#
# Encode/escape certain characters inside Markdown code runs.
# The point is that in code, these characters are literals,
# and lose their special Markdown meanings.
#
local $_ = shift;
# Encode all ampersands; HTML entities are not
# entities within a Markdown code span.
s/&/&/g;
# Encode $'s, but only if we're running under Blosxom.
# (Blosxom interpolates Perl variables in article bodies.)
s/\$/$/g if $_haveBX;
# Do the angle bracket song and dance:
s! < !<!gx;
s! > !>!gx;
# Now, escape characters that are magic in Markdown:
s!([*_~{}\[\]\\])!$g_escape_table{$1}!g;
return $_;
}
sub _DoItalicsAndBoldAndStrike {
my $text = shift;
my $doital1 = sub {
my $text = shift;
$text =~ s{ \* (?=\S) (.+?) (?<=\S) \* }
{$1}gsx;
# We've got to encode any of these remaining to
# avoid conflicting with other italics and bold.
$text =~ s!([*])!$g_escape_table{$1}!g;
$text;
};
my $doital2 = sub {
my $text = shift;
$text =~ s{ (?$1}gsx;
# We've got to encode any of these remaining to
# avoid conflicting with other italics and bold.
$text =~ s!([_])!$g_escape_table{$1}!g;
$text;
};
# must go first:
$text =~ s{ \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }
{"".&$doital1($1).""}gsex;
$text =~ s{ (?".&$doital2($1).""}gsex;
$text =~ s{ ~~ (?=\S) (.+?[*_]*) (?<=\S) ~~ }
{$1}gsx;
$text =~ s{ \* (?=\S) (.+?) (?<=\S) \* }
{$1}gsx;
$text =~ s{ (?$1}gsx;
return $text;
}
sub _DoBlockQuotes {
my $text = shift;
$text =~ s{
( # Wrap whole match in $1
(
^[ ]*>[ ]? # '>' at the start of a line
.*\n # rest of the first line
(.+\n)* # subsequent consecutive lines
\n* # blanks
)+
)
}{
my $bq = $1;
$bq =~ s/^[ ]*>[ ]?//gm; # trim one level of quoting
$bq =~ s/^[ ]+$//mg; # trim whitespace-only lines
$bq = _RunBlockGamut($bq); # recurse
$bq =~ s/^/\027/mg;
"\n$bq\n
\n\n";
}egmx;
return $text;
}
my ($LEAD, $TRAIL, $LEADBAR, $LEADSP, $COLPL, $SEP);
BEGIN {
$LEAD = qr/(?>[ ]*(?:\|[ ]*)?)/o;
$TRAIL = qr/[ ]*(?[ ]*\|[ ]*)/o;
$LEADSP = qr/(?>[ ]*)/o;
$COLPL = qr/(?:[^\n|\\]|\\(?:(?>[^\n])|(?=\n|$)))+/o;
$SEP = qr/[ ]*:?-+:?[ ]*/o;
}
sub _DoTables {
my $text = shift;
$text =~ s{
( # Wrap whole thing to avoid $&
(?: (?<=\n\n) | \A\n? ) # Preceded by blank line or beginning of string
^( # Header line
$LEADBAR \| [^\n]* |
$LEADBAR $COLPL [^\n]* |
$LEADSP $COLPL \| [^\n]*
)\n
( # Separator line
$LEADBAR $SEP (?: \| $SEP )* (?: \| [ ]*)? |
$SEP (?: \| $SEP )+ (?: \| [ ]*)? |
$SEP \| [ ]*
)\n
((?: # Rows (0+)
$LEADBAR \| [^\n]* \n |
$LEADBAR $COLPL [^\n]* \n |
$LEADSP $COLPL \| [^\n]* \n
)*)
)
} {
my ($w, $h, $s, $rows) = ($1, $2, $3, $4);
my @heads = _SplitTableRow($h);
my @seps = _SplitTableRow($s);
if (@heads == @seps) {
my @align = map {
if (/^:-+:$/) {" align=\"center\""}
elsif (/^:/) {" align=\"left\""}
elsif (/:$/) {" align=\"right\""}
else {""}
} @seps;
my $nohdr = "";
$nohdr = " $opt{style_prefix}table-nohdr" if join("", @heads) eq "";
my $tab ="\n
\n";
$tab .=
" " . _MakeTableRow("th", \@align, @heads) . " \n"
unless $nohdr;
my $cnt = 0;
my @classes = ("class=\"$opt{style_prefix}row-even\"", "class=\"$opt{style_prefix}row-odd\"");
$tab .= " " . _MakeTableRow("td", \@align, @$_) . " \n"
foreach (_SplitMergeRows($rows));
$tab .= "
\n\n";
} else {
$w;
}
}egmx;
return $text;
}
sub _SplitMergeRows {
my @rows = ();
my ($mergeprev, $mergenext) = (0,0);
foreach (split(/\n/, $_[0])) {
$mergeprev = $mergenext;
$mergenext = 0;
my @cols = _SplitTableRow($_);
if (_endswithbareslash($cols[$#cols])) {
my $last = $cols[$#cols];
substr($last, -1, 1) = "";
$last =~ s/[ ]+$//;
$cols[$#cols] = $last;
$mergenext = 1;
}
if ($mergeprev) {
for (my $i = 0; $i <= $#cols; ++$i) {
my $cell = $rows[$#rows]->[$i];
defined($cell) or $cell = "";
$rows[$#rows]->[$i] = _MergeCells($cell, $cols[$i]);
}
} else {
push(@rows, [@cols]);
}
}
return @rows;
}
sub _endswithbareslash {
return 0 unless substr($_[0], -1, 1) eq "\\";
my @parts = split(/\\\\/, $_[0], -1);
return substr($parts[$#parts], -1, 1) eq "\\";
}
sub _MergeCells {
my ($c1, $c2) = @_;
return $c1 if $c2 eq "";
return $c2 if $c1 eq "";
return $c1 . " " . $c2;
}
sub _SplitTableRow {
my $row = shift;
$row =~ s/^$LEAD//;
$row =~ s/$TRAIL$//;
$row =~ s!\\\\!$g_escape_table{'\\'}!go; # Must process escaped backslashes first.
$row =~ s!\\\|!$g_escape_table{'|'}!go; # Then do \|
my @elems = map {
s!$g_escape_table{'|'}!|!go;
s!$g_escape_table{'\\'}!\\\\!go;
s/^[ ]+//;
s/[ ]+$//;
$_;
} split(/[ ]*\|[ ]*/, $row, -1);
@elems or push(@elems, "");
return @elems;
}
sub _MakeTableRow {
my $etype = shift;
my $align = shift;
my $row = "";
for (my $i = 0; $i < @$align; ++$i) {
my $data = $_[$i];
defined($data) or $data = "";
$row .= "<" . $etype . $$align[$i] . ">" .
_RunSpanGamut($data) . "" . $etype . ">";
}
return $row;
}
sub _FormParagraphs {
#
# Params:
# $text - string to process with html tags
#
my ($text, $anchors) = @_;
# Strip leading and trailing lines:
$text =~ s/\A\n+//;
$text =~ s/\n+\z//;
my @grafs = split(/\n{2,}/, $text);
#
# Wrap
s{
<
(?:mailto:)?
(
[-.\w]+
\@
[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
)
>
}{
_EncodeEmailAddress(_UnescapeSpecialChars($1), "<", ">");
}egix;
# (kjm) I don't do "x" patterns
s{(?:^|(?<=\s))((?:https?|ftps?)://(?:[-a-zA-Z0-9./?\&\%=_~!*;:\@+\$,\x23](?:(?foo
# @example.com
#
# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
# mailing list:
#
my ($addr, $prefix, $suffix) = @_;
$prefix = "" unless defined($prefix);
$suffix = "" unless defined($suffix);
srand(unpack('N',md5($addr)));
my @encode = (
sub { '' . ord(shift) . ';' },
sub { '' . sprintf( "%X", ord(shift) ) . ';' },
sub { shift },
);
$addr = "mailto:" . $addr;
$addr =~ s{(.)}{
my $char = $1;
if ( $char eq '@' ) {
# this *must* be encoded. I insist.
$char = $encode[int rand 1]->($char);
} elsif ( $char ne ':' ) {
# leave ':' alone (to spot mailto: later)
my $r = rand;
# roughly 10% raw, 45% hex, 45% dec
$char = (
$r > .9 ? $encode[2]->($char) :
$r < .45 ? $encode[1]->($char) :
$encode[0]->($char)
);
}
$char;
}gex;
# strip the mailto: from the visible part
(my $bareaddr = $addr) =~ s/^.+?://;
$addr = _MakeATag("$addr", $prefix.$bareaddr.$suffix);
return $addr;
}
sub _UnescapeSpecialChars {
#
# Swap back in all the special characters we've hidden.
#
my $text = shift;
while( my($char, $hash) = each(%g_escape_table) ) {
$text =~ s/$hash/$char/g;
}
return $text;
}
sub _TokenizeHTML {
#
# Parameter: String containing HTML markup.
# Returns: Reference to an array of the tokens comprising the input
# string. Each token is either a tag (possibly with nested,
# tags contained therein, such as , or a
# run of text between tags. Each element of the array is a
# two-element array; the first is either 'tag' or 'text';
# the second is the actual value.
#
#
# Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
#
#
my $str = shift;
my $pos = 0;
my $len = length $str;
my @tokens;
my $depth = 6;
my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth);
my $match = qr/(?s: ) | # comment
(?s: <\? .*? \?> ) | # processing instruction
$nested_tags/iox; # nested tags
while ($str =~ m/($match)/g) {
my $whole_tag = $1;
my $sec_start = pos $str;
my $tag_start = $sec_start - length $whole_tag;
if ($pos < $tag_start) {
push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
}
push @tokens, ['tag', $whole_tag];
$pos = pos $str;
}
push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
\@tokens;
}
sub _Outdent {
#
# Remove one level of line-leading indent_width of spaces
#
my $text = shift;
$text =~ s/^ {1,$opt{indent_width}}//gm;
return $text;
}
# _DeTab
#
# $1 => input text
# $2 => optional tab width (default is $opt{tab_width})
# $3 => leading spaces to strip off each line first (default is 0 aka none)
# <= result with tabs expanded
sub _DeTab {
my $text = shift;
my $ts = shift || $opt{tab_width};
my $leadsp = shift || 0;
my $spr = qr/^ {1,$leadsp}/ if $leadsp;
pos($text) = 0;
my $end = length($text);
my $ans = "";
while (pos($text) < $end) {
my $line;
if ($text =~ /\G(.*?\n)/gcs) {
$line = $1;
} else {
$line = substr($text, pos($text));
pos($text) = $end;
}
$line =~ s/$spr// if $leadsp;
# From the Perl camel book section "Fluent Perl" but modified a bit
$line =~ s/(.*?)(\t+)/$1 . ' ' x (length($2) * $ts - length($1) % $ts)/ges;
$ans .= $line;
}
return $ans;
}
sub _PrefixURL {
#
# Add URL prefix if needed
#
my $url = shift;
$url =~ s/^\s+//;
$url =~ s/\s+$//;
$url = "#" unless $url ne "";
return $url unless
ref($opt{abs_prefix}) eq 'CODE' ||
ref($opt{url_prefix}) eq 'CODE' ||
ref($opt{img_prefix}) eq 'CODE' ;
return $url if $url =~ m"^\002\003" || $url =~ m"^#" || $url =~ m,^//,;
$url = &{$opt{abs_prefix}}($url) if $url =~ m,^/, && ref($opt{abs_prefix}) eq 'CODE';
return $url if $url =~ /^[A-Za-z][A-Za-z0-9+.-]*:/ || $url =~ m,^//, ||
($opt{keepabs} && $url =~ m,^/,);
my $cr = $opt{url_prefix};
$cr = $opt{img_prefix}
if ref($opt{img_prefix}) eq 'CODE' && $url =~ m"^[^#?]*\.(?:png|gif|jpe?g|svgz?)(?:[#?]|$)"i;
return $url unless ref($cr) eq 'CODE';
return "\2\3".&$cr(substr($url, 0, 1) eq '/' ? substr($url, 1) : $url);
}
BEGIN {
$g_style_sheet = <<'STYLESHEET';
STYLESHEET
$g_style_sheet =~ s/^\s+//g;
$g_style_sheet =~ s/\s+$//g;
$g_style_sheet .= "\n";
}
1;
__DATA__
=head1 NAME
Markdown.pl - convert Markdown format text files to HTML
=head1 SYNOPSIS
B [B<--help>] [B<--html4tags>] [B<--htmlroot>=I]
[B<--imageroot>=I] [B<--version>] [B<--shortversion>]
[B<--tabwidth>=I] [B<--stylesheet>] [B<--stub>] [--]
[I...]
Options:
-h show short usage help
--help show long detailed help
--html4tags use
instead of
--deprecated allow and