Browse Source

fix quadratic complexity in reference parser (#1004)

pull/1009/head
Alex Kocharin 2 months ago
committed by GitHub
parent
commit
de814cae73
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 1
      lib/helpers/parse_link_destination.mjs
  2. 66
      lib/helpers/parse_link_title.mjs
  3. 117
      lib/rules_block/reference.mjs
  4. 34
      test/fixtures/markdown-it/commonmark_extras.txt

1
lib/helpers/parse_link_destination.mjs

@ -10,7 +10,6 @@ export default function parseLinkDestination (str, start, max) {
const result = {
ok: false,
pos: 0,
lines: 0,
str: ''
}

66
lib/helpers/parse_link_title.mjs

@ -3,50 +3,64 @@
import { unescapeAll } from '../common/utils.mjs'
export default function parseLinkTitle (str, start, max) {
let code, marker
let lines = 0
// Parse link title within `str` in [start, max] range,
// or continue previous parsing if `prev_state` is defined (equal to result of last execution).
//
export default function parseLinkTitle (str, start, max, prev_state) {
let code
let pos = start
const result = {
const state = {
// if `true`, this is a valid link title
ok: false,
// if `true`, this link can be continued on the next line
can_continue: false,
// if `ok`, it's the position of the first character after the closing marker
pos: 0,
lines: 0,
str: ''
// if `ok`, it's the unescaped title
str: '',
// expected closing marker character code
marker: 0
}
if (pos >= max) { return result }
if (prev_state) {
// this is a continuation of a previous parseLinkTitle call on the next line,
// used in reference links only
state.str = prev_state.str
state.marker = prev_state.marker
} else {
if (pos >= max) { return state }
marker = str.charCodeAt(pos)
let marker = str.charCodeAt(pos)
if (marker !== 0x22 /* " */ && marker !== 0x27 /* ' */ && marker !== 0x28 /* ( */) { return state }
if (marker !== 0x22 /* " */ && marker !== 0x27 /* ' */ && marker !== 0x28 /* ( */) { return result }
start++
pos++
pos++
// if opening marker is "(", switch it to closing marker ")"
if (marker === 0x28) { marker = 0x29 }
// if opening marker is "(", switch it to closing marker ")"
if (marker === 0x28) { marker = 0x29 }
state.marker = marker
}
while (pos < max) {
code = str.charCodeAt(pos)
if (code === marker) {
result.pos = pos + 1
result.lines = lines
result.str = unescapeAll(str.slice(start + 1, pos))
result.ok = true
return result
} else if (code === 0x28 /* ( */ && marker === 0x29 /* ) */) {
return result
} else if (code === 0x0A) {
lines++
if (code === state.marker) {
state.pos = pos + 1
state.str += unescapeAll(str.slice(start, pos))
state.ok = true
return state
} else if (code === 0x28 /* ( */ && state.marker === 0x29 /* ) */) {
return state
} else if (code === 0x5C /* \ */ && pos + 1 < max) {
pos++
if (str.charCodeAt(pos) === 0x0A) {
lines++
}
}
pos++
}
return result
// no closing marker found, but this link title may continue on the next line (for references)
state.can_continue = true
state.str += unescapeAll(str.slice(start, pos))
return state
}

117
lib/rules_block/reference.mjs

@ -1,8 +1,6 @@
import { isSpace, normalizeReference } from '../common/utils.mjs'
export default function reference (state, startLine, _endLine, silent) {
let lines = 0
let pos = state.bMarks[startLine] + state.tShift[startLine]
let max = state.eMarks[startLine]
let nextLine = startLine + 1
@ -12,45 +10,53 @@ export default function reference (state, startLine, _endLine, silent) {
if (state.src.charCodeAt(pos) !== 0x5B/* [ */) { return false }
// Simple check to quickly interrupt scan on [link](url) at the start of line.
// Can be useful on practice: https://github.com/markdown-it/markdown-it/issues/54
while (++pos < max) {
if (state.src.charCodeAt(pos) === 0x5D /* ] */ &&
state.src.charCodeAt(pos - 1) !== 0x5C/* \ */) {
if (pos + 1 === max) { return false }
if (state.src.charCodeAt(pos + 1) !== 0x3A/* : */) { return false }
break
}
}
const endLine = state.lineMax
function getNextLine (nextLine) {
const endLine = state.lineMax
// jump line-by-line until empty one or EOF
const terminatorRules = state.md.block.ruler.getRules('reference')
if (nextLine >= endLine || state.isEmpty(nextLine)) {
// empty line or end of input
return null
}
const oldParentType = state.parentType
state.parentType = 'reference'
let isContinuation = false
for (; nextLine < endLine && !state.isEmpty(nextLine); nextLine++) {
// this would be a code block normally, but after paragraph
// it's considered a lazy continuation regardless of what's there
if (state.sCount[nextLine] - state.blkIndent > 3) { continue }
if (state.sCount[nextLine] - state.blkIndent > 3) { isContinuation = true }
// quirk for blockquotes, this line should already be checked by that rule
if (state.sCount[nextLine] < 0) { continue }
// Some tags can terminate paragraph without empty line.
let terminate = false
for (let i = 0, l = terminatorRules.length; i < l; i++) {
if (terminatorRules[i](state, nextLine, endLine, true)) {
terminate = true
break
if (state.sCount[nextLine] < 0) { isContinuation = true }
if (!isContinuation) {
const terminatorRules = state.md.block.ruler.getRules('reference')
const oldParentType = state.parentType
state.parentType = 'reference'
// Some tags can terminate paragraph without empty line.
let terminate = false
for (let i = 0, l = terminatorRules.length; i < l; i++) {
if (terminatorRules[i](state, nextLine, endLine, true)) {
terminate = true
break
}
}
state.parentType = oldParentType
if (terminate) {
// terminated by another block
return null
}
}
if (terminate) { break }
const pos = state.bMarks[nextLine] + state.tShift[nextLine]
const max = state.eMarks[nextLine]
// max + 1 explicitly includes the newline
return state.src.slice(pos, max + 1)
}
const str = state.getLines(startLine, nextLine, state.blkIndent, false).trim()
let str = state.src.slice(pos, max + 1)
max = str.length
let labelEnd = -1
@ -62,11 +68,21 @@ export default function reference (state, startLine, _endLine, silent) {
labelEnd = pos
break
} else if (ch === 0x0A /* \n */) {
lines++
const lineContent = getNextLine(nextLine)
if (lineContent !== null) {
str += lineContent
max = str.length
nextLine++
}
} else if (ch === 0x5C /* \ */) {
pos++
if (pos < max && str.charCodeAt(pos) === 0x0A) {
lines++
const lineContent = getNextLine(nextLine)
if (lineContent !== null) {
str += lineContent
max = str.length
nextLine++
}
}
}
}
@ -78,7 +94,12 @@ export default function reference (state, startLine, _endLine, silent) {
for (pos = labelEnd + 2; pos < max; pos++) {
const ch = str.charCodeAt(pos)
if (ch === 0x0A) {
lines++
const lineContent = getNextLine(nextLine)
if (lineContent !== null) {
str += lineContent
max = str.length
nextLine++
}
} else if (isSpace(ch)) {
/* eslint no-empty:0 */
} else {
@ -95,11 +116,10 @@ export default function reference (state, startLine, _endLine, silent) {
if (!state.md.validateLink(href)) { return false }
pos = destRes.pos
lines += destRes.lines
// save cursor state, we could require to rollback later
const destEndPos = pos
const destEndLineNo = lines
const destEndLineNo = nextLine
// [label]: destination 'title'
// ^^^ skipping those spaces
@ -107,7 +127,12 @@ export default function reference (state, startLine, _endLine, silent) {
for (; pos < max; pos++) {
const ch = str.charCodeAt(pos)
if (ch === 0x0A) {
lines++
const lineContent = getNextLine(nextLine)
if (lineContent !== null) {
str += lineContent
max = str.length
nextLine++
}
} else if (isSpace(ch)) {
/* eslint no-empty:0 */
} else {
@ -117,17 +142,25 @@ export default function reference (state, startLine, _endLine, silent) {
// [label]: destination 'title'
// ^^^^^^^ parse this
const titleRes = state.md.helpers.parseLinkTitle(str, pos, max)
let titleRes = state.md.helpers.parseLinkTitle(str, pos, max)
while (titleRes.can_continue) {
const lineContent = getNextLine(nextLine)
if (lineContent === null) break
str += lineContent
pos = max
max = str.length
nextLine++
titleRes = state.md.helpers.parseLinkTitle(str, pos, max, titleRes)
}
let title
if (pos < max && start !== pos && titleRes.ok) {
title = titleRes.str
pos = titleRes.pos
lines += titleRes.lines
} else {
title = ''
pos = destEndPos
lines = destEndLineNo
nextLine = destEndLineNo
}
// skip trailing spaces until the rest of the line
@ -143,7 +176,7 @@ export default function reference (state, startLine, _endLine, silent) {
// but it could still be a valid reference if we roll back
title = ''
pos = destEndPos
lines = destEndLineNo
nextLine = destEndLineNo
while (pos < max) {
const ch = str.charCodeAt(pos)
if (!isSpace(ch)) { break }
@ -174,8 +207,6 @@ export default function reference (state, startLine, _endLine, silent) {
state.env.references[label] = { title, href }
}
state.parentType = oldParentType
state.line = startLine + lines + 1
state.line = nextLine
return true
}

34
test/fixtures/markdown-it/commonmark_extras.txt

@ -49,6 +49,40 @@ Reference labels: support ligatures (equivalent according to unicode case foldin
<p><a href="/url">fffifl</a></p>
.
Reference can be interrupted by other rules
.
[foo]: /url 'title
- - -
'
[foo]
.
<p>[foo]: /url 'title</p>
<hr>
<p>’</p>
<p>[foo]</p>
.
Escape character in link reference title doesn't escape newlines
.
[foo]: /url "
hello
\
\
\
world
"
[foo]
.
<p><a href="/url" title="
hello
\
\
\
world
">foo</a></p>
.
Issue #35. `<` should work as punctuation
.

Loading…
Cancel
Save