From 951005b131a5362cd2e8ef29a34db1bb421cbd67 Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Sun, 10 Nov 2019 14:41:11 -0700 Subject: [PATCH] Markdown.pl: defang detab The old tab expansion code exhibits very poor performance when passed the contents of entire files as input. Mitigate this inefficiency by operating on one line at a time. Signed-off-by: Kyle J. McKay --- Markdown.pl | 47 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/Markdown.pl b/Markdown.pl index e7299cb..f9001be 100755 --- a/Markdown.pl +++ b/Markdown.pl @@ -542,7 +542,7 @@ sub Markdown { $text = _HashBTCodeBlocks($text); # Convert all tabs to spaces. - $text = _Detab($text); + $text = _DeTab($text); # Strip any lines consisting only of spaces. # This makes subsequent regexen easier to write, because we can @@ -602,7 +602,7 @@ sub _HashBTCodeBlocks { my $leadsp = length($1); my $codeblock = $4; $codeblock =~ s/[ \t]+$//mg; # trim trailing spaces on lines - $codeblock = _Detab($codeblock, 8, $leadsp); # physical tab stops are always 8 + $codeblock = _DeTab($codeblock, 8, $leadsp); # physical tab stops are always 8 $codeblock =~ s/\A\n+//; # trim leading newlines $codeblock =~ s/\s+\z//; # trim trailing whitespace $codeblock = _EncodeCode($codeblock); # or run highlighter here @@ -2614,17 +2614,46 @@ sub _Outdent { } -sub _Detab { -# -# Expand tabs to spaces using $opt{tab_width} if no second argument +# _DeTab # +# $1 => input text +# $2 => optional tab width (default is $opt{tab_width}) +# $3 => leading spaces to strip off each line first (default is 0 aka none) +# <= result with tabs expanded +sub _DeTab { my $text = shift; my $ts = shift || $opt{tab_width}; my $leadsp = shift || 0; - $text =~ s/^ {1,$leadsp}//mg if $leadsp; # trim leading space(s) - # From the Perl camel book "Fluent Perl" section (slightly modified) - $text =~ s/(.*?)(\t+)/$1 . ' ' x (length($2) * $ts - length($1) % $ts)/ge; - return $text; + my $spr = qr/^ {1,$leadsp}/ if $leadsp; + pos($text) = 0; + my $end = length($text); + my $ans = ""; + while (pos($text) < $end) { + my $line; + if ($text =~ /\G([^\n]*\n)/gc) { + $line = $1; + } else { + $line = substr($text, pos($text)); + pos($text) = $end; + } + $line =~ s/$spr// if $leadsp; + my $eline = length($line); + pos($line) = 0; + my $xline = ""; + while (pos($line) < $eline) { + if ($line =~ /\G([^\t]+)/gc) { + $xline .= $1; + next; + } + if ($line =~ /\G(\t+)/gc) { + $xline .= ' ' x (length($1) * $ts - (length($xline) % $ts)); + next; + } + die "programmer error"; # cannot get here + } + $ans .= $xline; + } + return $ans; }