From 990f74e52eca6cedf794080b2d1d9358a45b6f22 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Fri, 25 Oct 2019 00:17:22 -0700 Subject: [PATCH] Markdown.pl: support [[wiki style links]] with --wiki The new `--wiki` option (with optional argument) specifies how to transform [[wiki style links]] into URLs. There are a veritable plethora of options available to affect the transformation. Absolute URL wiki style links continue to be recognized even without the `--wiki` option. Signed-off-by: Kyle J. McKay --- Markdown.pl | 162 +++++++++++++++++++++++++++++++++++++++++++++++++--- syntax.md | 49 +++++++++++++++- 2 files changed, 203 insertions(+), 8 deletions(-) diff --git a/Markdown.pl b/Markdown.pl index 318f20b..0e8ed06 100755 --- a/Markdown.pl +++ b/Markdown.pl @@ -278,6 +278,7 @@ sub _main { 'deprecated', 'htmlroot|r=s', 'imageroot|i=s', + 'wiki|w:s', 'tabwidth|tab-width=s', 'stylesheet|style-sheet', 'no-stylesheet|no-style-sheet', @@ -323,6 +324,18 @@ sub _main { if ($cli_opts{'imageroot'}) { # Use image URL prefix $options{img_prefix} = $cli_opts{'imageroot'}; } + if (exists $cli_opts{'wiki'}) { # Enable wiki links + my $wpat = $cli_opts{'wiki'}; + defined($wpat) or $wpat = ""; + my $wopt = "s"; + if ($wpat =~ /^(.*?)%\{([0-9A-Za-z]*)\}(.*)$/) { + $options{wikipat} = $1 . "%{}" . $3; + $wopt = $2; + } else { + $options{wikipat} = $wpat . "%{}.html"; + } + $options{wikiopt} = { map({$_ => 1} split(//,lc($wopt))) }; + } if ($cli_opts{'stylesheet'}) { # Display the style sheet $options{show_styles} = 1; } @@ -794,20 +807,74 @@ sub _ProcessWikiLink { my ($link_text, $link_loc) = @_; if (defined($link_loc) && ($link_loc =~ m{^#\S*$} || $link_loc =~ m{^(?:http|ftp)s?://\S+$}i)) { - # Just rewrite it to [...](...) form - return "[".$link_text."](".$link_loc.")"; + # Return the new link + return _MakeATag($link_loc, $link_text); } - my $sloc; if (!defined($link_loc) && - ($sloc = _strip($link_text)) =~ m{^(?:http|ftp)s?://\S+$}i) { - # Just rewrite it to [...](...) form - return "[".$link_text."](".$sloc.")"; + ($link_loc = _strip($link_text)) =~ m{^(?:http|ftp)s?://\S+$}i) { + # Return the new link + return _MakeATag($link_loc, $link_text); + } + return undef if $link_loc eq "" || $link_text eq ""; + if ($link_loc =~ /^[A-Za-z][A-Za-z0-9+.-]*:/os) { + # Unrecognized scheme + return undef; + } + if ($opt{wikipat}) { + my $o = $opt{wikiopt}; + my $qsfrag = ""; + my $base = $link_loc; + if ($link_loc =~ /^(.*?)([?#].*)$/os) { + ($base, $qsfrag) = ($1, $2); + } + $base = _wxform($base); + my $result = $opt{wikipat}; + $result =~ s/%\{\}/$base/; + if ($qsfrag =~ /^([^#]*)(#.+)$/os) { + my ($q,$f) = ($1,$2); + #$f = _wxform($f) if $f =~ / /; + $qsfrag = $q . $f; + } + $result .= $qsfrag; + { + use bytes; + $result =~ s/%(?![0-9A-Fa-f]{2})/%25/sog; + if ($o->{r}) { + $result =~ + s/([\x00-\x1F <>"{}|\\^`x7F])/sprintf("%%%02X",ord($1))/soge; + } else { + $result =~ + s/([\x00-\x1F <>"{}|\\^`\x7F-\xFF])/sprintf("%%%02X",ord($1))/soge; + } + $result =~ s/(%(?![0-9A-F]{2})[0-9A-Fa-f]{2})/uc($1)/soge; + } + # Return the new link + return _MakeATag($result, $link_text); } - # We don't handle any other wiki-style links yet + # leave it alone return undef; } +sub _wxform { + my $w = shift; + my $o = $opt{wikiopt}; + $w =~ s{[.][^./]*$}{} if $o->{s}; + $w =~ tr{/}{ } if $o->{f}; + $w =~ s{/+}{/}gos if !$o->{f} && !$o->{v}; + if ($o->{d}) { + $w =~ tr{ }{-}; + $w =~ s/-+/-/gos unless $o->{v}; + } else { + $w =~ tr{ }{_}; + $w =~ s/_+/_/gos unless $o->{v}; + } + $w = uc($w) if $o->{u}; + $w = lc($w) if $o->{l}; + return $w; +} + + # Return a suitably encoded tag string # On input NONE of $url, $text or $title should be xmlencoded # but $url should already be url-encoded if needed, but NOT g_escape_table'd @@ -2496,6 +2563,7 @@ B [B<--help>] [B<--html4tags>] [B<--htmlroot>=I] to prefix -i prefix | --imageroot=prefix append relative img URLs to prefix + -w [wikipat] | --wiki[=wikipat] activate wiki links using wikipat -V | --version show version, authors, license and copyright -s | --shortversion show just the version number @@ -2584,6 +2652,86 @@ Any non-absolute URLs have I prepended (overriding the B<-r> prefix if any) but only if they end in an image suffix. +=item B<-w> [I], B<--wiki>[=I] + +Activate wiki links. Any link enclosed in double brackets (e.g. "[[link]]") is +considered a wiki link. By default only absolute URL and fragment links are +allowed in the "wiki link style" format. Any other double-bracketed strings +are left unmolested. + +If this option is given, all other wiki links are enabled as well. Any +non-absolute URL or fragment links will be transformed into a link using +I where the default I if none is given is C<%{s}.html>. + +If the given I does not contain a C<%{...}> placeholder sequence +then it will automatically have C<%{s}.html> suffixed to it. + +The C<...> part of the C<%{...}> sequence specifies zero or more case-insensitive +single-letter options with the following effects: + +=over + +=item B + +Convert spaces to dashes (ASCII 0x2D) instead of underscore (ASCII 0x5F). Note +that if this option is given then runs of multiple dashes will be converted to +a single dash I but runs of multiple underscores will be left untouched. + +=item B + +Flatten the resulting name by replacing forward slashes (ASCII 0x2F) as well. +They will be converted to underscores unless the C option is given (in which +case they will be converted to dashes). This conversion takes place before +applying the runs-of-multiple reduction. + +=item B + +Convert link target (excluding any query string and/or fragment) to lowercase. +Takes precedence over any C option, but specifically excludes C<%>-escapes +which are always UPPERCASE hexadecimal. + +=item B + +Leave raw UTF-8 characters in the result. Normally anything not allowed +directly in a URL ends up URL-encoded. With this option, raw valid UTF-8 +sequences will be left untouched. Use with care. + +=item B + +After (temporarily) removing any query string and/or fragment, strip any final +"dot" suffix so long as it occurs after the last slash (if any slash was present +before applying the C option). The "dot" (ASCII 0x2E) and all following +characters (if any) are removed. + +=item B + +Convert link target (excluding any query string and/or fragment) to UPPERCASE. + +=item B + +Leave runs-of-multiple characters alone (aka "verbatim"). Does not affect +any of the other options except by eliminating the runs-of-multple reduction +step. Also does I inhibit the initial whitespace trimming. + +=back + +The URL target of the wiki link is created by first trimming whitespace +(starting and ending whitespace is removed and all other runs of consecutive +whitespace are replaced with a single space) from the wiki link target, +removing (temporarily) any query string and/or fragment, if no options are +present, spaces are converted to underscores (C<_>) and runs of multiple +consecutive underscores are replaced with a single underscore (ASCII 0x5F). +Finally, the I string gets its first placeholder (the C<%{...}> +sequence) replaced with this computed value and the original query string +and/or fragment is re-appended (if any were originally present) and +URL-encoding is applied as needed to produce the actual final target URL. + +See above option descriptions for possible available modifications. + +One of the commonly used hosting platforms does something substantially similar +to using C<%{dfrsv}> as the placeholder. + + =item B<-V>, B<--version> Display Markdown's version number and copyright information. diff --git a/syntax.md b/syntax.md index 48ae4bb..ecc91c2 100644 --- a/syntax.md +++ b/syntax.md @@ -752,10 +752,14 @@ Span Elements Links ~~~~~ -Markdown supports two style of links: *inline* and *reference*. +Markdown supports two style of links: *inline* and *reference* by default. In both styles, the link text is delimited by [square brackets]. +Additionally, if enabled, Wiki Style Links are also supported, but +they are delimited by doubled square brackets (e.g. `[[wiki link]]`) +and have different semantics -- see the end of this section for that. + To create an inline link, use a set of regular parentheses immediately after the link text's closing square bracket. Inside the parentheses, put the URL where you want the link to point, along with an *optional* @@ -940,6 +944,49 @@ allowing you to move the markup-related metadata out of the paragraph, you can add links without interrupting the narrative flow of your prose. +#### Wiki Style Links + +To create a wiki style link, simply use double brackets instead of +single brackets like so: + + [[wiki link]] + [[wiki link|alternate_destination]] + +Even when not explicitly enabled, a few, limited, wiki style links +are always recognized: + + [[http://example.com]] + [[link here|http://example.com]] + [[link here|#destination]] + +The "http:" part can also be "https:", "ftp:" and "ftps:". The +three above links generate these "a" tags: + + http://example.com + link here + link here + +If full wiki style links have been enabled (via the `--wiki` option), +then additional links like these will work too: + + [[another page]] + [[link here|another page]] + [[elsewhere#section]] + [[link here|elsewhere#section]] + +They will all generate "a" tags and are intended to link to another +document. Exactly what link is generated depends on the value +passed to the `--wiki` option. Using the default value, those four +links above would generate these "a" tags: + + another page + link here + elsewhere#section + link here + +See the command line help (`Markdown.pl --help`) for more details +on exactly how the wiki style links are transformed into "a" tags. + ~~~~~~~~ Emphasis