Browse Source

Markdown.pl: improve ```-delimited code blocks handling

Handle ```-delimited code blocks earlier so that tabs within them
can be correctly expanded to 8-character tab stop positions and
also to avoid the result being incorrectly interpreted any further.

Signed-off-by: Kyle J. McKay <mackyle@gmail.com>
master
Kyle J. McKay 8 years ago
parent
commit
f3b8f48b3a
  1. 62
      Markdown.pl
  2. 39
      basics.text
  3. 3
      syntax.text

62
Markdown.pl

@ -35,7 +35,7 @@ $VERSION = '1.0.3';
# Global default settings:
#
my $g_empty_element_suffix = " />"; # Change to ">" for HTML output
my $g_tab_width = 4;
my $g_tab_width = 4; # Legacy even though it's wrong
#
@ -290,6 +290,9 @@ sub Markdown {
# Make sure $text ends with a couple of newlines:
$text .= "\n\n";
# Handle backticks-delimited code blocks
$text = _HashBTCodeBlocks($text);
# Convert all tabs to spaces.
$text = _Detab($text);
@ -313,6 +316,40 @@ sub Markdown {
}
sub _HashBTCodeBlocks {
#
# Process Markdown backticks (```) delimited code blocks
#
my $text = shift;
$text =~ s{
(?:\n|\A)
``(`+)[ \t]*(?:([\w.+-]+)[ \t]*)?\n
( # $3 = the code block -- one or more lines, starting with ```
(?:
.*\n+
)+?
)
(?:(?:``\1[ \t]*(?:\n|\Z))|\Z) # and ending with ``` or end of document
}{
# $2 contains syntax highlighting to use if defined
my $codeblock = $3;
$codeblock =~ s/[ \t]+$//mg; # trim trailing spaces on lines
$codeblock = _Detab($codeblock, 8); # physical tab stops are always 8
$codeblock =~ s/\A\n+//; # trim leading newlines
$codeblock =~ s/\s+\z//; # trim trailing whitespace
$codeblock = _EncodeCode($codeblock); # or run highlighter here
$codeblock = "<pre><code>" . $codeblock . "\n</code></pre>";
my $key = md5_hex($codeblock);
$g_html_blocks{$key} = $codeblock;
"\n\n" . $key . "\n\n";
}egmx;
return $text;
}
sub _StripLinkDefinitions {
#
# Strips link definitions from text, stores the URLs and titles in
@ -1003,29 +1040,6 @@ sub _DoCodeBlocks {
$result;
}egmx;
$text =~ s{
(?:\n|\A)
``(`+)[ \t]*(?:[\w.-]+[ \t]*)?\n
( # $1 = the code block -- one or more lines, starting with ```
(?:
.*\n+
)+?
)
(?:(?:``\1[ \t]*(?:\n|\Z))|\Z) # and ending with ``` or end of document
}{
my $codeblock = $2;
my $result; # return value
$codeblock = _EncodeCode($codeblock);
$codeblock = _Detab($codeblock);
$codeblock =~ s/\A\n+//; # trim leading newlines
$codeblock =~ s/\s+\z//; # trim trailing whitespace
$result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
$result;
}egmx;
return $text;
}

39
basics.text

@ -293,11 +293,40 @@ Output:
To specify an entire block of pre-formatted code, indent every line of
the block by 4 spaces or 1 tab. Just like with code spans, `&`, `<`,
and `>` characters will be escaped automatically. Alternatively an
entire block of pre-formatted code may be preceded with a line consisting
of 3 backtick quotes (or more) and followed by a line consisting of the
same number of backtick quotes -- in which case the code itself does not
need to be additionally indented.
and `>` characters will be escaped automatically.
Alternatively an entire block of pre-formatted code may be preceded with a
line consisting of 3 backtick quotes (or more) and followed by a line
consisting of the same number of backtick quotes -- in which case the code
itself does not need to be additionally indented. The first line may
optionally have a syntax specifier (e.g. sh, c, perl, etc.) appended.
Note also that any physical tab characters within a 3-backtick-quotes,
non-indented code block are always expanded correctly to 8-character tab-stop
positions (which differs from the 4-character positions used in the rest of a
markdown file). This is to facilitate simple copy-and-paste to include code
snippets.
Markdown:
```
# This is a simple code block with unspecified syntax
```
Output:
<pre><code># This is a simple code block with unspecified syntax
</code></pre>
Markdown:
``` perl
my $var = "value"; # this should be highlighted as Perl code
```
Output:
<pre><code>my $var = "value"; # this should be highlighted as Perl code
</code></pre>
Markdown:

3
syntax.text

@ -535,6 +535,9 @@ Note that the 3 backtick quotes (or more) must appear at the beginning
of the line. To include a code block within a list (or other indented
element), the indentation technique must be used.
Also note that within a backticks-delimited code block, tab characters
are always expanded with the tab stop locations 8 characters apart.
Within a code block, ampersands (`&`) and angle brackets (`<` and `>`)
are automatically converted into HTML entities. This makes it very
easy to include example HTML source code using Markdown -- just paste

Loading…
Cancel
Save