Browse Source

Markdown: enhance link handling

Allow links of the form [...](#...) to find themselves on the
page the same way links of the form [...] can.

Be flexible accepting either '-' or '_' in place of spaces
in the heading name since fragment names may not contain spaces.

Refactor the code that manufactures "img" and "a" tags to both
simplify the code and make sure that all href, src, alt and title
attributes are fully and properly "escaped".

In addition, if the "title" for an image ends with something that
looks like "(512x342)", "(?x342)" or "(512x?)" then strip that out
of the title and set the appropriate width and height attributes
on the manufactured "img" tag.  For example something like this:

    ![Nice pic](pic.jpg "Nice (500x300)")

or this:

    ![Nice pic][1]
    [1]: <pic.jpg> "Nice (500x300)"

now produces this:

    <img src="pic.jpg" alt="Nice pic" width="500" height="300" title="Nice" />

Update the syntax doc to mention these additions.

Signed-off-by: Kyle J. McKay <mackyle@gmail.com>
master
Kyle J. McKay 5 years ago
parent
commit
c86fea4089
  1. 180
      Markdown.pl
  2. 34
      syntax.md

180
Markdown.pl

@ -73,6 +73,7 @@ my %g_perm_block_ids;
my %g_urls; my %g_urls;
my %g_titles; my %g_titles;
my %g_anchors; my %g_anchors;
my %g_anchors_id;
my %g_block_ids; my %g_block_ids;
my %g_html_blocks; my %g_html_blocks;
my %g_code_blocks; my %g_code_blocks;
@ -794,6 +795,22 @@ sub _ProcessWikiLink {
} }
# Return a suitably encoded <a...> tag string
# On input NONE of $url, $text or $title should be xmlencoded
# but $url should already be url-encoded if needed, but NOT g_escape_table'd
sub _MakeATag {
my ($url, $text, $title) = @_;
defined($url) or $url="";
defined($text) or $text="";
defined($title) or $title="";
my $result = "<a href=\"" . _EncodeAttText($url) . "\"";
$title = _strip($title);
$result .= " title=\"" . _EncodeAttText($title) . "\"" if $title ne "";
return $result . ">" . $text . "</a>";
}
sub _DoAnchors { sub _DoAnchors {
# #
# Turn Markdown link shortcuts into XHTML <a> tags. # Turn Markdown link shortcuts into XHTML <a> tags.
@ -845,25 +862,16 @@ sub _DoAnchors {
my $result; my $result;
my $whole_match = $1; my $whole_match = $1;
my $link_text = $2; my $link_text = $2;
my $link_id = _strip(lc $3); my $link_id = $3;
if ($link_id eq "") { $link_id ne "" or $link_id = $link_text; # for shortcut links like [this][].
$link_id = _strip(lc $link_text); # for shortcut links like [this][]. $link_id = _strip(lc $link_id);
}
if (defined($g_urls{$link_id}) || defined($g_anchors{$link_id})) { if (defined($g_urls{$link_id}) || defined($g_anchors{$link_id})) {
my $url = $g_urls{$link_id}; my $url = $g_urls{$link_id};
$url = defined($url) ? _PrefixURL($url) : $g_anchors{$link_id}; $url = defined($url) ? _PrefixURL($url) : $g_anchors{$link_id};
# We've got to encode these to avoid conflicting
# with italics, bold and strike through.
$url =~ s!([*_~])!$g_escape_table{$1}!g;
$result = "<a href=\"$url\"";
if ( defined $g_titles{$link_id} ) {
my $title = _EncodeAttText($g_titles{$link_id});
$result .= " title=\"$title\"";
}
$link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/; $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
$result .= ">$link_text</a>"; $result = _MakeATag($url, $link_text, $g_titles{$link_id});
} }
else { else {
$result = $whole_match; $result = $whole_match;
@ -891,26 +899,27 @@ sub _DoAnchors {
\) \)
) )
}{ }{
my $result; #my $result;
my $whole_match = $1; my $whole_match = $1;
my $link_text = $2; my $link_text = $2;
my $url = $3; my $url = $3;
my $title = _EncodeAttText($6); my $title = $6;
$url = _PrefixURL($url);
# We've got to encode these to avoid conflicting
# with italics, bold and strike through.
$url =~ s!([*_~])!$g_escape_table{$1}!g;
$result = "<a href=\"$url\"";
if (defined $title) { if ($url =~ /^#\S/) {
$result .= " title=\"$title\""; my $idbase = _strip(lc(substr($url, 1)));
my $id = _MakeAnchorId($idbase);
if (defined($g_anchors_id{$id})) {
$url = $g_anchors_id{$id};
} else {
$idbase =~ s/-/_/gs;
$id = _MakeAnchorId($idbase);
if (defined($g_anchors_id{$id})) {
$url = $g_anchors_id{$id};
}
}
} }
$link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/; $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
$result .= ">$link_text</a>"; _MakeATag(_PrefixURL($url), $link_text, $title);
$result;
}xsge; }xsge;
# #
@ -931,16 +940,8 @@ sub _DoAnchors {
if (defined($g_urls{$link_id}) || defined($g_anchors{$link_id})) { if (defined($g_urls{$link_id}) || defined($g_anchors{$link_id})) {
my $url = $g_urls{$link_id}; my $url = $g_urls{$link_id};
$url = defined($url) ? _PrefixURL($url) : $g_anchors{$link_id}; $url = defined($url) ? _PrefixURL($url) : $g_anchors{$link_id};
# We've got to encode these to avoid conflicting
# with italics, bold and strike through.
$url =~ s!([*_~])!$g_escape_table{$1}!g;
$result = "<a href=\"$url\"";
if ( defined $g_titles{$link_id} ) {
my $title = _EncodeAttText($g_titles{$link_id});
$result .= " title=\"$title\"";
}
$link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/; $link_text = '[' . $link_text . ']' if $link_text =~ /^\d{1,3}$/;
$result .= ">$link_text</a>"; $result = _MakeATag($url, $link_text, $g_titles{$link_id});
} }
else { else {
$result = $whole_match; $result = $whole_match;
@ -952,6 +953,35 @@ sub _DoAnchors {
} }
# Return a suitably encoded <img...> tag string
# On input NONE of $url, $alt or $title should be xmlencoded
# but $url should already be url-encoded if needed, but NOT g_escape_table'd
sub _MakeIMGTag {
my ($url, $alt, $title) = @_;
defined($url) or $url="";
defined($alt) or $alt="";
defined($title) or $title="";
return "" unless $url ne "";
my $result = "<img src=\"" . _EncodeAttText($url) . "\"";
my ($w, $h) = (0, 0);
($alt, $title) = (_strip($alt), _strip($title));
if ($title =~ /^(.*)\(([1-9][0-9]*)[xX]([1-9][0-9]*)\)$/os) {
($title, $w, $h) = (_strip($1), $2, $3);
} elsif ($title =~ /^(.*)\(\?[xX]([1-9][0-9]*)\)$/os) {
($title, $h) = (_strip($1), $2);
} elsif ($title =~ /^(.*)\(([1-9][0-9]*)[xX]\?\)$/os) {
($title, $w) = (_strip($1), $2);
}
$result .= " alt=\"" . _EncodeAttText($alt) . "\"" if $alt ne "";
$result .= " width=\"$w\"" if $w != 0;
$result .= " height=\"$h\"" if $h != 0;
$result .= " title=\"" . _EncodeAttText($title) . "\"" if $title ne "";
$result .= $opt{empty_element_suffix};
return $result;
}
sub _DoImages { sub _DoImages {
# #
# Turn Markdown image shortcuts into <img> tags. # Turn Markdown image shortcuts into <img> tags.
@ -978,25 +1008,15 @@ sub _DoImages {
}{ }{
my $result; my $result;
my $whole_match = $1; my $whole_match = $1;
my $alt_text = _strip($2); my $alt_text = $2;
my $link_id = _strip(lc $3); my $link_id = $3;
if ($link_id eq "") { $link_id ne "" or $link_id = $alt_text; # for shortcut links like ![this][].
$link_id = lc $alt_text; # for shortcut links like ![this][]. $link_id = _strip(lc $link_id);
}
$alt_text = _EncodeAttText($alt_text);
if (defined $g_urls{$link_id}) { if (defined $g_urls{$link_id}) {
my $url = _PrefixURL($g_urls{$link_id}); $result = _MakeIMGTag(
# We've got to encode these to avoid conflicting _PrefixURL($g_urls{$link_id}), $alt_text, $g_titles{$link_id});
# with italics, bold and strike through.
$url =~ s!([*_~])!$g_escape_table{$1}!g;
$result = "<img src=\"$url\" alt=\"$alt_text\"";
if (defined $g_titles{$link_id}) {
my $title = _EncodeAttText($g_titles{$link_id});
$result .= " title=\"$title\"";
}
$result .= $opt{empty_element_suffix};
} }
else { else {
# If there's no such link ID, leave intact: # If there's no such link ID, leave intact:
@ -1028,26 +1048,15 @@ sub _DoImages {
\) \)
) )
}{ }{
my $result; #my $whole_match = $1;
my $whole_match = $1; my $alt_text = $2;
my $alt_text = _EncodeAttText($2);
my $url = $3; my $url = $3;
my $title = ''; my $title = '';
if (defined($6)) { if (defined($6)) {
$title = _EncodeAttText($6); $title = $6;
} }
$url = _PrefixURL($url); _MakeIMGTag(_PrefixURL($url), $alt_text, $title);
# We've got to encode these to avoid conflicting
# with italics, bold and strike through.
$url =~ s!([*_~])!$g_escape_table{$1}!g;
$result = "<img src=\"$url\" alt=\"$alt_text\"";
if (defined $title) {
$result .= " title=\"$title\"";
}
$result .= $opt{empty_element_suffix};
$result;
}xsge; }xsge;
# #
@ -1062,21 +1071,12 @@ sub _DoImages {
}{ }{
my $result; my $result;
my $whole_match = $1; my $whole_match = $1;
my $alt_text = _strip($2); my $alt_text = $2;
my $link_id = lc $alt_text; my $link_id = lc(_strip($alt_text));
$alt_text = _EncodeAttText($alt_text);
if (defined $g_urls{$link_id}) { if (defined $g_urls{$link_id}) {
my $url = _PrefixURL($g_urls{$link_id}); $result = _MakeIMGTag(
# We've got to encode these to avoid conflicting _PrefixURL($g_urls{$link_id}), $alt_text, $g_titles{$link_id});
# with italics, bold and strike through.
$url =~ s!([*_~])!$g_escape_table{$1}!g;
$result = "<img src=\"$url\" alt=\"$alt_text\"";
if (defined $g_titles{$link_id}) {
my $title = _EncodeAttText($g_titles{$link_id});
$result .= " title=\"$title\"";
}
$result .= $opt{empty_element_suffix};
} }
else { else {
# If there's no such link ID, leave intact: # If there's no such link ID, leave intact:
@ -1092,9 +1092,9 @@ sub _DoImages {
sub _EncodeAttText { sub _EncodeAttText {
my $text = shift; my $text = shift;
defined($text) or return undef; defined($text) or return undef;
$text = _EncodeAmps(_strip($text)); $text = _HTMLEncode(_strip($text));
$text =~ s/\042/&quot;/g; # We've got to encode these to avoid conflicting
$text =~ s/</&lt;/g; # with italics, bold and strike through.
$text =~ s!([*_~:])!$g_escape_table{$1}!g; $text =~ s!([*_~:])!$g_escape_table{$1}!g;
return $text; return $text;
} }
@ -1103,10 +1103,13 @@ sub _EncodeAttText {
sub _MakeAnchorId { sub _MakeAnchorId {
use bytes; use bytes;
my $link = shift; my $link = shift;
$link = lc($link);
$link =~ tr/-a-z0-9_/_/cs; $link =~ tr/-a-z0-9_/_/cs;
return '' unless $link ne ''; return '' unless $link ne '';
$link = md5_hex($link) if length($link) > 64; $link = "_".$link."_";
"_".$link."_"; $link =~ s/__+/_/gs;
$link = "_".md5_hex($link)."_" if length($link) > 66;
return $link;
} }
@ -1116,6 +1119,13 @@ sub _GetNewAnchorId {
my $id = _MakeAnchorId($link); my $id = _MakeAnchorId($link);
return '' unless $id; return '' unless $id;
$g_anchors{$link} = '#'.$id; $g_anchors{$link} = '#'.$id;
$g_anchors_id{$id} = $g_anchors{$link};
if ($id =~ /-/) {
my $id2 = $id;
$id2 =~ s/-/_/gs;
$id2 =~ s/__+/_/gs;
defined($g_anchors_id{$id2}) or $g_anchors_id{$id2} = $g_anchors{$link};
}
$id; $id;
} }

34
syntax.md

@ -876,6 +876,24 @@ already a previous definition with the same id. You can use this
to place a table-of-contents at the top of the document that links to place a table-of-contents at the top of the document that links
to subsections later in the document. Just like this document. to subsections later in the document. Just like this document.
For example, all six of these links point to subsections later in
the same document:
* Self Same
* [Introduction]
* [Part Two]
* [Part Three]
* Different
* [Introduction](#Part-Two)
* [Part Two](#Part_Three)
* [Part Three](#introduction)
## Introduction
## Part Two
## Part Three
Here's an example of reference links in action: Here's an example of reference links in action:
I get 10 times more traffic from [Google] [1] than from I get 10 times more traffic from [Google] [1] than from
@ -1064,9 +1082,19 @@ are defined using syntax identical to link references:
[id]: url/to/image "Optional title attribute" [id]: url/to/image "Optional title attribute"
As of this writing, Markdown has no syntax for specifying the To specify one or both dimensions of an image, include the dimensions
dimensions of an image; if this is important to you, you can simply in parentheses at the end of the title like so:
use regular HTML `<img>` tags.
[id]: url/to/image "Optional title attribute (512x342)"
To resize in just one dimension, specify the other as a "?" like so:
[id]: url/to/image "Optional title attribute (?x342)"
[id]: url/to/image "Optional title attribute (512x?)"
The first dimension sets the "width" attribute and the second
dimension sets the "height" attribute. The dimensions are then
removed from the "title" attribute.
- - - - - - - - - -

Loading…
Cancel
Save