Browse Source

Put escape sequences into separate token

pull/864/head
Alex Kocharin 2 years ago
parent
commit
75037c6514
  1. 17
      CHANGELOG.md
  2. 5
      lib/parser_core.js
  3. 9
      lib/parser_inline.js
  4. 5
      lib/presets/commonmark.js
  5. 5
      lib/presets/zero.js
  6. 45
      lib/rules_core/text_join.js
  7. 67
      lib/rules_inline/escape.js
  8. 2
      lib/rules_inline/fragments_join.js
  9. 13
      test/fixtures/markdown-it/smartquotes.txt
  10. 17
      test/fixtures/markdown-it/typographer.txt
  11. 8
      test/misc.js

17
CHANGELOG.md

@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [13.0.0] - WIP
### Added
- Added a new token type `text_special` to store escaped characters, same as `text` but
unaffected by replacement plugins (smartquotes, typographer, linkifier, etc.).
- Added a new rule `text_join` in `core` ruler. Text replacement plugins may choose to
insert themselves before it.
### Changed
- `text_collapse` rule is renamed to `fragments_join`.
### Fixed
- Smartquotes, typographic replacements and plain text links can now be escaped
with backslash (e.g. `\(c)` or `google\.com` are no longer replaced).
## [12.3.2] - 2022-01-08
### Security
- Fix possible ReDOS in newline rule. Thanks to @MakeNowJust.
@ -592,6 +608,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Renamed presets folder (configs -> presets).
[13.0.0]: https://github.com/markdown-it/markdown-it/compare/12.3.2...13.0.0
[12.3.2]: https://github.com/markdown-it/markdown-it/compare/12.3.1...12.3.2
[12.3.1]: https://github.com/markdown-it/markdown-it/compare/12.3.0...12.3.1
[12.3.0]: https://github.com/markdown-it/markdown-it/compare/12.2.0...12.3.0

5
lib/parser_core.js

@ -16,7 +16,10 @@ var _rules = [
[ 'inline', require('./rules_core/inline') ],
[ 'linkify', require('./rules_core/linkify') ],
[ 'replacements', require('./rules_core/replacements') ],
[ 'smartquotes', require('./rules_core/smartquotes') ]
[ 'smartquotes', require('./rules_core/smartquotes') ],
// `text_join` finds `text_special` tokens (for escape sequences)
// and joins them with the rest of the text
[ 'text_join', require('./rules_core/text_join') ]
];

9
lib/parser_inline.js

@ -26,11 +26,18 @@ var _rules = [
[ 'entity', require('./rules_inline/entity') ]
];
// `rule2` ruleset was created specifically for emphasis/strikethrough
// post-processing and may be changed in the future.
//
// Don't use this for anything except pairs (plugins working with `balance_pairs`).
//
var _rules2 = [
[ 'balance_pairs', require('./rules_inline/balance_pairs') ],
[ 'strikethrough', require('./rules_inline/strikethrough').postProcess ],
[ 'emphasis', require('./rules_inline/emphasis').postProcess ],
[ 'text_collapse', require('./rules_inline/text_collapse') ]
// rules for pairs separate '**' into its own text tokens, which may be left unused,
// rule below merges unused segments back with the rest of the text
[ 'fragments_join', require('./rules_inline/fragments_join') ]
];

5
lib/presets/commonmark.js

@ -38,7 +38,8 @@ module.exports = {
rules: [
'normalize',
'block',
'inline'
'inline',
'text_join'
]
},
@ -73,7 +74,7 @@ module.exports = {
rules2: [
'balance_pairs',
'emphasis',
'text_collapse'
'fragments_join'
]
}
}

5
lib/presets/zero.js

@ -39,7 +39,8 @@ module.exports = {
rules: [
'normalize',
'block',
'inline'
'inline',
'text_join'
]
},
@ -55,7 +56,7 @@ module.exports = {
],
rules2: [
'balance_pairs',
'text_collapse'
'fragments_join'
]
}
}

45
lib/rules_core/text_join.js

@ -0,0 +1,45 @@
// Join raw text tokens with the rest of the text
//
// This is set as a separate rule to provide an opportunity for plugins
// to run text replacements after text join, but before escape join.
//
// For example, `\:)` shouldn't be replaced with an emoji.
//
'use strict';
module.exports = function text_join(state) {
var j, l, tokens, curr, max, last,
blockTokens = state.tokens;
for (j = 0, l = blockTokens.length; j < l; j++) {
if (blockTokens[j].type !== 'inline') continue;
tokens = blockTokens[j].children;
max = tokens.length;
for (curr = 0; curr < max; curr++) {
if (tokens[curr].type === 'text_special') {
tokens[curr].type = 'text';
}
}
for (curr = last = 0; curr < max; curr++) {
if (tokens[curr].type === 'text' &&
curr + 1 < max &&
tokens[curr + 1].type === 'text') {
// collapse two adjacent text nodes
tokens[curr + 1].content = tokens[curr].content + tokens[curr + 1].content;
} else {
if (curr !== last) { tokens[last] = tokens[curr]; }
last++;
}
}
if (curr !== last) {
tokens.length = last;
}
}
};

67
lib/rules_inline/escape.js

@ -13,40 +13,59 @@ for (var i = 0; i < 256; i++) { ESCAPED.push(0); }
module.exports = function escape(state, silent) {
var ch, pos = state.pos, max = state.posMax;
if (state.src.charCodeAt(pos) !== 0x5C/* \ */) { return false; }
var ch1, ch2, origStr, escapedStr, token, pos = state.pos, max = state.posMax;
if (state.src.charCodeAt(pos) !== 0x5C/* \ */) return false;
pos++;
if (pos < max) {
ch = state.src.charCodeAt(pos);
// '\' at the end of the inline block
if (pos >= max) return false;
ch1 = state.src.charCodeAt(pos);
if (ch < 256 && ESCAPED[ch] !== 0) {
if (!silent) { state.pending += state.src[pos]; }
state.pos += 2;
return true;
if (ch1 === 0x0A) {
if (!silent) {
state.push('hardbreak', 'br', 0);
}
if (ch === 0x0A) {
if (!silent) {
state.push('hardbreak', 'br', 0);
}
pos++;
// skip leading whitespaces from next line
while (pos < max) {
ch1 = state.src.charCodeAt(pos);
if (!isSpace(ch1)) break;
pos++;
}
state.pos = pos;
return true;
}
escapedStr = state.src[pos];
if (ch1 >= 0xD800 && ch1 <= 0xDBFF && pos + 1 < max) {
ch2 = state.src.charCodeAt(pos + 1);
if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
escapedStr += state.src[pos + 1];
pos++;
// skip leading whitespaces from next line
while (pos < max) {
ch = state.src.charCodeAt(pos);
if (!isSpace(ch)) { break; }
pos++;
}
state.pos = pos;
return true;
}
}
if (!silent) { state.pending += '\\'; }
state.pos++;
origStr = '\\' + escapedStr;
if (!silent) {
token = state.push('text_special', '', 0);
if (ch1 < 256 && ESCAPED[ch1] !== 0) {
token.content = escapedStr;
} else {
token.content = origStr;
}
token.markup = origStr;
token.info = 'escape';
}
state.pos = pos + 1;
return true;
};

2
lib/rules_inline/text_collapse.js → lib/rules_inline/fragments_join.js

@ -9,7 +9,7 @@
'use strict';
module.exports = function text_collapse(state) {
module.exports = function fragments_join(state) {
var curr, last,
level = 0,
tokens = state.tokens,

13
test/fixtures/markdown-it/smartquotes.txt

@ -164,3 +164,16 @@ Should parse quotes adjacent to inline html, #677:
<p>“test <br>”</p>
<p>“<br> test”</p>
.
Should be escapable:
.
"foo"
\"foo"
"foo\"
.
<p>“foo”</p>
<p>&quot;foo&quot;</p>
<p>&quot;foo&quot;</p>
.

17
test/fixtures/markdown-it/typographer.txt

@ -60,6 +60,13 @@ dupes
<p>!!! ??? ,</p>
.
copyright should be escapable
.
\(c)
.
<p>(c)</p>
.
dashes
.
@ -80,6 +87,16 @@ markdownit--awesome
<p>markdownit–awesome</p>
.
dashes should be escapable
.
foo \-- bar
foo -\- bar
.
<p>foo -- bar</p>
<p>foo -- bar</p>
.
regression tests for #624
.
1---2---3

8
test/misc.js

@ -254,6 +254,14 @@ describe('Misc', function () {
md.render('# test\n\n - hello\n - world\n')
);
});
it('Should escape surrogate pairs (coverage)', function () {
var md = markdownit();
assert.strictEqual(md.render('\\\uD835\uDC9C'), '<p>\\\uD835\uDC9C</p>\n');
assert.strictEqual(md.render('\\\uD835x'), '<p>\\\uD835x</p>\n');
assert.strictEqual(md.render('\\\uD835'), '<p>\\\uD835</p>\n');
});
});

Loading…
Cancel
Save