Browse Source

Improve emphasis algorithm

This fixes quadratic complexity in `**<...>**a**<...>**`
pathological case.
pull/829/head
Alex Kocharin 3 years ago
parent
commit
24abaa51a6
  1. 9
      CHANGELOG.md
  2. 36
      lib/rules_inline/balance_pairs.js
  3. 15
      lib/rules_inline/emphasis.js
  4. 1
      lib/rules_inline/strikethrough.js
  5. 4
      test/pathological.js
  6. 2
      test/pathological.json

9
CHANGELOG.md

@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [12.3.0] - WIP
### Changed
- `StateInline.delimiters[].jump` is removed.
### Fixed
- Fixed quadratic complexity in pathological `***<10k stars>***a***<10k stars>***` case.
## [12.2.0] - 2021-08-02
### Added
- Ordered lists: add order value to token info.
@ -575,6 +583,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Renamed presets folder (configs -> presets).
[12.3.0]: https://github.com/markdown-it/markdown-it/compare/12.2.0...12.3.0
[12.2.0]: https://github.com/markdown-it/markdown-it/compare/12.1.0...12.2.0
[12.1.0]: https://github.com/markdown-it/markdown-it/compare/12.0.6...12.1.0
[12.0.6]: https://github.com/markdown-it/markdown-it/compare/12.0.5...12.0.6

36
lib/rules_inline/balance_pairs.js

@ -9,9 +9,28 @@ function processDelimiters(state, delimiters) {
openersBottom = {},
max = delimiters.length;
if (!max) return;
// headerIdx is the first delimiter of the current (where closer is) delimiter run
var headerIdx = 0;
var lastTokenIdx = -2; // needs any value lower than -1
var jumps = [];
for (closerIdx = 0; closerIdx < max; closerIdx++) {
closer = delimiters[closerIdx];
jumps.push(0);
// markers belong to same delimiter run if:
// - they have adjacent tokens
// - AND markers are the same
//
if (delimiters[headerIdx].marker !== closer.marker || lastTokenIdx !== closer.token - 1) {
headerIdx = closerIdx;
}
lastTokenIdx = closer.token;
// Length is only used for emphasis-specific "rule of 3",
// if it's not defined (in strikethrough or 3rd party plugins),
// we can default it to 0 to disable those checks.
@ -30,14 +49,11 @@ function processDelimiters(state, delimiters) {
minOpenerIdx = openersBottom[closer.marker][(closer.open ? 3 : 0) + (closer.length % 3)];
openerIdx = closerIdx - closer.jump - 1;
// avoid crash if `closer.jump` is pointing outside of the array, see #742
if (openerIdx < -1) openerIdx = -1;
openerIdx = headerIdx - jumps[headerIdx] - 1;
newMinOpenerIdx = openerIdx;
for (; openerIdx > minOpenerIdx; openerIdx -= opener.jump + 1) {
for (; openerIdx > minOpenerIdx; openerIdx -= jumps[openerIdx] + 1) {
opener = delimiters[openerIdx];
if (opener.marker !== closer.marker) continue;
@ -67,15 +83,19 @@ function processDelimiters(state, delimiters) {
// sure algorithm has linear complexity (see *_*_*_*_*_... case).
//
lastJump = openerIdx > 0 && !delimiters[openerIdx - 1].open ?
delimiters[openerIdx - 1].jump + 1 :
jumps[openerIdx - 1] + 1 :
0;
closer.jump = closerIdx - openerIdx + lastJump;
jumps[closerIdx] = closerIdx - openerIdx + lastJump;
jumps[openerIdx] = lastJump;
closer.open = false;
opener.end = closerIdx;
opener.jump = lastJump;
opener.close = false;
newMinOpenerIdx = -1;
// treat next token as start of run,
// it optimizes skips in **<...>**a**<...>** pathological case
lastTokenIdx = -2;
break;
}
}

15
lib/rules_inline/emphasis.js

@ -29,15 +29,6 @@ module.exports.tokenize = function emphasis(state, silent) {
//
length: scanned.length,
// An amount of characters before this one that's equivalent to
// current one. In plain English: if this delimiter does not open
// an emphasis, neither do previous `jump` characters.
//
// Used to skip sequences like "*****" in one step, for 1st asterisk
// value will be 0, for 2nd it's 1 and so on.
//
jump: i,
// A position of the token this delimiter corresponds to.
//
token: state.tokens.length - 1,
@ -91,9 +82,11 @@ function postProcess(state, delimiters) {
//
isStrong = i > 0 &&
delimiters[i - 1].end === startDelim.end + 1 &&
// check that first two markers match and adjacent
delimiters[i - 1].marker === startDelim.marker &&
delimiters[i - 1].token === startDelim.token - 1 &&
delimiters[startDelim.end + 1].token === endDelim.token + 1 &&
delimiters[i - 1].marker === startDelim.marker;
// check that last two markers are adjacent (we can safely assume they match)
delimiters[startDelim.end + 1].token === endDelim.token + 1;
ch = String.fromCharCode(startDelim.marker);

1
lib/rules_inline/strikethrough.js

@ -33,7 +33,6 @@ module.exports.tokenize = function strikethrough(state, silent) {
state.delimiters.push({
marker: marker,
length: 0, // disable "rule of 3" length checks meant for emphasis
jump: i / 2, // for `~~` 1 marker = 2 characters
token: state.tokens.length - 1,
end: -1,
open: scanned.can_open,

4
test/pathological.js

@ -57,6 +57,10 @@ describe('Pathological sequences speed', () => {
);
});
it('nested inlines', async () => {
await test_pattern('*'.repeat(60000) + 'a' + '*'.repeat(60000));
});
it('nested strong emph', async () => {
await test_pattern('*a **a '.repeat(5000) + 'b' + ' a** a*'.repeat(5000));
});

2
test/pathological.json

@ -1 +1 @@
{ "md5": "10730e0b6dec7355412bb632e9eb1d98" }
{ "md5": "c417101e12950cc61ff0a6d2cebb80e0" }

Loading…
Cancel
Save