Browse Source

Bring pipe escape algorithm in tables closer to gfm

- table now doesn't deal with backticks at all
 - `|` is considered escaped if and only if there is a `\` character immediately before it
 - number of elements in the first row (thead) now must match second row (aligns) exactly
 - no tbody if it would be empty

close https://github.com/markdown-it/markdown-it/issues/689
close https://github.com/markdown-it/markdown-it/pull/697
pull/713/head
Alex Kocharin 4 years ago
parent
commit
3021a5207b
  1. 98
      lib/rules_block/table.js
  2. 335
      test/fixtures/markdown-it/tables.txt

98
lib/rules_block/table.js

@ -1,4 +1,4 @@
// GFM table, non-standard
// GFM table, https://github.github.com/gfm/#tables-extension-
'use strict';
@ -17,56 +17,42 @@ function escapedSplit(str) {
pos = 0,
max = str.length,
ch,
escapes = 0,
isEscaped = false,
lastPos = 0,
backTicked = false,
lastBackTick = 0;
current = '';
ch = str.charCodeAt(pos);
while (pos < max) {
if (ch === 0x60/* ` */) {
if (backTicked) {
// make \` close code sequence, but not open it;
// the reason is: `\` is correct code block
backTicked = false;
lastBackTick = pos;
} else if (escapes % 2 === 0) {
backTicked = true;
lastBackTick = pos;
if (ch === 0x7c/* | */) {
if (!isEscaped) {
// pipe separating cells, '|'
result.push(current + str.substring(lastPos, pos));
current = '';
lastPos = pos + 1;
} else {
// escaped pipe, '\|'
current += str.substring(lastPos, pos - 1);
lastPos = pos;
}
} else if (ch === 0x7c/* | */ && (escapes % 2 === 0) && !backTicked) {
result.push(str.substring(lastPos, pos));
lastPos = pos + 1;
}
if (ch === 0x5c/* \ */) {
escapes++;
} else {
escapes = 0;
}
isEscaped = (ch === 0x5c/* \ */);
pos++;
// If there was an un-closed backtick, go back to just after
// the last backtick, but as if it was a normal character
if (pos === max && backTicked) {
backTicked = false;
pos = lastBackTick + 1;
}
ch = str.charCodeAt(pos);
}
result.push(str.substring(lastPos));
result.push(current + str.substring(lastPos));
return result;
}
module.exports = function table(state, startLine, endLine, silent) {
var ch, lineText, pos, i, nextLine, columns, columnCount, token,
aligns, t, tableLines, tbodyLines;
var ch, lineText, pos, i, l, nextLine, columns, columnCount, token,
aligns, t, tableLines, tbodyLines, oldParentType, terminate,
terminatorRules;
// should have at least two lines
if (startLine + 2 > endLine) { return false; }
@ -125,15 +111,24 @@ module.exports = function table(state, startLine, endLine, silent) {
lineText = getLine(state, startLine).trim();
if (lineText.indexOf('|') === -1) { return false; }
if (state.sCount[startLine] - state.blkIndent >= 4) { return false; }
columns = escapedSplit(lineText.replace(/^\||\|$/g, ''));
columns = escapedSplit(lineText);
if (columns.length && columns[0] === '') columns.shift();
if (columns.length && columns[columns.length - 1] === '') columns.pop();
// header row will define an amount of columns in the entire table,
// and align row shouldn't be smaller than that (the rest of the rows can)
// and align row should be exactly the same (the rest of the rows can differ)
columnCount = columns.length;
if (columnCount > aligns.length) { return false; }
if (columnCount !== aligns.length) { return false; }
if (silent) { return true; }
oldParentType = state.parentType;
state.parentType = 'table';
// use 'blockquote' lists for termination because it's
// the most similar to tables
terminatorRules = state.md.block.ruler.getRules('blockquote');
token = state.push('table_open', 'table', 1);
token.map = tableLines = [ startLine, 0 ];
@ -161,16 +156,29 @@ module.exports = function table(state, startLine, endLine, silent) {
token = state.push('tr_close', 'tr', -1);
token = state.push('thead_close', 'thead', -1);
token = state.push('tbody_open', 'tbody', 1);
token.map = tbodyLines = [ startLine + 2, 0 ];
for (nextLine = startLine + 2; nextLine < endLine; nextLine++) {
if (state.sCount[nextLine] < state.blkIndent) { break; }
terminate = false;
for (i = 0, l = terminatorRules.length; i < l; i++) {
if (terminatorRules[i](state, nextLine, endLine, true)) {
terminate = true;
break;
}
}
if (terminate) { break; }
lineText = getLine(state, nextLine).trim();
if (lineText.indexOf('|') === -1) { break; }
if (!lineText) { break; }
if (state.sCount[nextLine] - state.blkIndent >= 4) { break; }
columns = escapedSplit(lineText.replace(/^\||\|$/g, ''));
columns = escapedSplit(lineText);
if (columns.length && columns[0] === '') columns.shift();
if (columns.length && columns[columns.length - 1] === '') columns.pop();
if (nextLine === startLine + 2) {
token = state.push('tbody_open', 'tbody', 1);
token.map = tbodyLines = [ startLine + 2, 0 ];
}
token = state.push('tr_open', 'tr', 1);
for (i = 0; i < columnCount; i++) {
@ -189,10 +197,16 @@ module.exports = function table(state, startLine, endLine, silent) {
}
token = state.push('tr_close', 'tr', -1);
}
token = state.push('tbody_close', 'tbody', -1);
if (tbodyLines) {
token = state.push('tbody_close', 'tbody', -1);
tbodyLines[1] = nextLine;
}
token = state.push('table_close', 'table', -1);
tableLines[1] = nextLine;
tableLines[1] = tbodyLines[1] = nextLine;
state.parentType = oldParentType;
state.line = nextLine;
return true;
};

335
test/fixtures/markdown-it/tables.txt

@ -273,106 +273,6 @@ bar|bar
.
Should be terminated via row without "|" symbol:
.
foo|foo
---|---
paragraph
.
<table>
<thead>
<tr>
<th>foo</th>
<th>foo</th>
</tr>
</thead>
<tbody></tbody>
</table>
<p>paragraph</p>
.
Delimiter escaping:
.
| Heading 1 \\\\| Heading 2
| --------- | ---------
| Cell\|1\|| Cell\|2
\| Cell\\\|3 \\| Cell\|4
.
<table>
<thead>
<tr>
<th>Heading 1 \\</th>
<th>Heading 2</th>
</tr>
</thead>
<tbody>
<tr>
<td>Cell|1|</td>
<td>Cell|2</td>
</tr>
<tr>
<td>| Cell\|3 \</td>
<td>Cell|4</td>
</tr>
</tbody>
</table>
.
Pipes inside backticks don't split cells:
.
| Heading 1 | Heading 2
| --------- | ---------
| Cell 1 | Cell 2
| `Cell|3` | Cell 4
.
<table>
<thead>
<tr>
<th>Heading 1</th>
<th>Heading 2</th>
</tr>
</thead>
<tbody>
<tr>
<td>Cell 1</td>
<td>Cell 2</td>
</tr>
<tr>
<td><code>Cell|3</code></td>
<td>Cell 4</td>
</tr>
</tbody>
</table>
.
Unclosed backticks don't count
.
| Heading 1 | Heading 2
| --------- | ---------
| Cell 1 | Cell 2
| `Cell 3| Cell 4
.
<table>
<thead>
<tr>
<th>Heading 1</th>
<th>Heading 2</th>
</tr>
</thead>
<tbody>
<tr>
<td>Cell 1</td>
<td>Cell 2</td>
</tr>
<tr>
<td>`Cell 3</td>
<td>Cell 4</td>
</tr>
</tbody>
</table>
.
Another complicated backticks case
.
| Heading 1 | Heading 2
@ -452,7 +352,7 @@ x | \`\` | `x`
An amount of rows might be different across the table (issue #171):
.
| 1 | 2 |
| :-----: | :-----: | :-----: |
| :-----: | :-----: |
| 3 | 4 | 5 | 6 |
.
<table>
@ -581,7 +481,6 @@ Tables should not be indented more than 4 spaces (3rd line):
<th>Col2a</th>
</tr>
</thead>
<tbody></tbody>
</table>
<pre><code>| Col1b | Col2b |
</code></pre>
@ -600,7 +499,6 @@ Allow tables with empty body:
<th>Col2a</th>
</tr>
</thead>
<tbody></tbody>
</table>
.
@ -615,3 +513,234 @@ Col2a | Col2b | Col2c
----- | -----
Col2a | Col2b | Col2c</p>
.
Escaped pipes inside backticks don't split cells:
.
| Heading 1 | Heading 2
| --------- | ---------
| Cell 1 | Cell 2
| `Cell 3\|` | Cell 4
.
<table>
<thead>
<tr>
<th>Heading 1</th>
<th>Heading 2</th>
</tr>
</thead>
<tbody>
<tr>
<td>Cell 1</td>
<td>Cell 2</td>
</tr>
<tr>
<td><code>Cell 3|</code></td>
<td>Cell 4</td>
</tr>
</tbody>
</table>
.
Escape before escaped Pipes inside backticks don't split cells:
.
| Heading 1 | Heading 2
| --------- | ---------
| Cell 1 | Cell 2
| `Cell 3\\|` | Cell 4
.
<table>
<thead>
<tr>
<th>Heading 1</th>
<th>Heading 2</th>
</tr>
</thead>
<tbody>
<tr>
<td>Cell 1</td>
<td>Cell 2</td>
</tr>
<tr>
<td><code>Cell 3\|</code></td>
<td>Cell 4</td>
</tr>
</tbody>
</table>
.
GFM 4.10 Tables (extension), Example 198
.
| foo | bar |
| --- | --- |
| baz | bim |
.
<table>
<thead>
<tr>
<th>foo</th>
<th>bar</th>
</tr>
</thead>
<tbody>
<tr>
<td>baz</td>
<td>bim</td>
</tr>
</tbody>
</table>
.
GFM 4.10 Tables (extension), Example 199
.
| abc | defghi |
:-: | -----------:
bar | baz
.
<table>
<thead>
<tr>
<th style="text-align:center">abc</th>
<th style="text-align:right">defghi</th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align:center">bar</td>
<td style="text-align:right">baz</td>
</tr>
</tbody>
</table>
.
GFM 4.10 Tables (extension), Example 200
.
| f\|oo |
| ------ |
| b `\|` az |
| b **\|** im |
.
<table>
<thead>
<tr>
<th>f|oo</th>
</tr>
</thead>
<tbody>
<tr>
<td>b <code>|</code> az</td>
</tr>
<tr>
<td>b <strong>|</strong> im</td>
</tr>
</tbody>
</table>
.
GFM 4.10 Tables (extension), Example 201
.
| abc | def |
| --- | --- |
| bar | baz |
> bar
.
<table>
<thead>
<tr>
<th>abc</th>
<th>def</th>
</tr>
</thead>
<tbody>
<tr>
<td>bar</td>
<td>baz</td>
</tr>
</tbody>
</table>
<blockquote>
<p>bar</p>
</blockquote>
.
GFM 4.10 Tables (extension), Example 202
.
| abc | def |
| --- | --- |
| bar | baz |
bar
bar
.
<table>
<thead>
<tr>
<th>abc</th>
<th>def</th>
</tr>
</thead>
<tbody>
<tr>
<td>bar</td>
<td>baz</td>
</tr>
<tr>
<td>bar</td>
<td></td>
</tr>
</tbody>
</table>
<p>bar</p>
.
GFM 4.10 Tables (extension), Example 203
.
| abc | def |
| --- |
| bar |
.
<p>| abc | def |
| — |
| bar |</p>
.
GFM 4.10 Tables (extension), Example 204
.
| abc | def |
| --- | --- |
| bar |
| bar | baz | boo |
.
<table>
<thead>
<tr>
<th>abc</th>
<th>def</th>
</tr>
</thead>
<tbody>
<tr>
<td>bar</td>
<td></td>
</tr>
<tr>
<td>bar</td>
<td>baz</td>
</tr>
</tbody>
</table>
.
GFM 4.10 Tables (extension), Example 205
.
| abc | def |
| --- | --- |
.
<table>
<thead>
<tr>
<th>abc</th>
<th>def</th>
</tr>
</thead>
</table>
.

Loading…
Cancel
Save