Skip to content

Commit 8245d42

Browse files
authored
Update marked (#225096)
* Update marked This bumps our marked version from 4.x to 14.x. This fixes a number of bugs in markdown parsing however is somewhat risky as we've missed so many versions. Hoping to merge early in the iteration so we can catch any potential issues * Add highlighting Copying over marked-highlight for now * Fix newline in generated html * Fix plaintext rendering of headers
1 parent 1dcacc6 commit 8245d42

File tree

13 files changed

+3456
-3964
lines changed

13 files changed

+3456
-3964
lines changed

src/vs/base/browser/markdownRenderer.ts

Lines changed: 51 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import { defaultGenerator } from 'vs/base/common/idGenerator';
1818
import { KeyCode } from 'vs/base/common/keyCodes';
1919
import { Lazy } from 'vs/base/common/lazy';
2020
import { DisposableStore, IDisposable, toDisposable } from 'vs/base/common/lifecycle';
21-
import { marked } from 'vs/base/common/marked/marked';
21+
import * as marked from 'vs/base/common/marked/marked';
2222
import { parse } from 'vs/base/common/marshalling';
2323
import { FileAccess, Schemas } from 'vs/base/common/network';
2424
import { cloneAndChange } from 'vs/base/common/objects';
@@ -45,7 +45,7 @@ export interface ISanitizerOptions {
4545
}
4646

4747
const defaultMarkedRenderers = Object.freeze({
48-
image: (href: string | null, title: string | null, text: string): string => {
48+
image: ({ href, title, text }: marked.Tokens.Image): string => {
4949
let dimensions: string[] = [];
5050
let attributes: string[] = [];
5151
if (href) {
@@ -64,11 +64,12 @@ const defaultMarkedRenderers = Object.freeze({
6464
return '<img ' + attributes.join(' ') + '>';
6565
},
6666

67-
paragraph: (text: string): string => {
68-
return `<p>${text}</p>`;
67+
paragraph(this: marked.Renderer, { tokens }: marked.Tokens.Paragraph): string {
68+
return `<p>${this.parser.parseInline(tokens)}</p>`;
6969
},
7070

71-
link: (href: string | null, title: string | null, text: string): string => {
71+
link(this: marked.Renderer, { href, title, tokens }: marked.Tokens.Link): string {
72+
let text = this.parser.parseInline(tokens);
7273
if (typeof href !== 'string') {
7374
return '';
7475
}
@@ -162,18 +163,18 @@ export function renderMarkdown(markdown: IMarkdownString, options: MarkdownRende
162163
const syncCodeBlocks: [string, HTMLElement][] = [];
163164

164165
if (options.codeBlockRendererSync) {
165-
renderer.code = (code, lang) => {
166+
renderer.code = ({ text, lang }: marked.Tokens.Code) => {
166167
const id = defaultGenerator.nextId();
167-
const value = options.codeBlockRendererSync!(postProcessCodeBlockLanguageId(lang), code);
168+
const value = options.codeBlockRendererSync!(postProcessCodeBlockLanguageId(lang), text);
168169
syncCodeBlocks.push([id, value]);
169-
return `<div class="code" data-code="${id}">${escape(code)}</div>`;
170+
return `<div class="code" data-code="${id}">${escape(text)}</div>`;
170171
};
171172
} else if (options.codeBlockRenderer) {
172-
renderer.code = (code, lang) => {
173+
renderer.code = ({ text, lang }: marked.Tokens.Code) => {
173174
const id = defaultGenerator.nextId();
174-
const value = options.codeBlockRenderer!(postProcessCodeBlockLanguageId(lang), code);
175+
const value = options.codeBlockRenderer!(postProcessCodeBlockLanguageId(lang), text);
175176
codeBlocks.push(value.then(element => [id, element]));
176-
return `<div class="code" data-code="${id}">${escape(code)}</div>`;
177+
return `<div class="code" data-code="${id}">${escape(text)}</div>`;
177178
};
178179
}
179180

@@ -219,23 +220,16 @@ export function renderMarkdown(markdown: IMarkdownString, options: MarkdownRende
219220
}
220221

221222
if (!markdown.supportHtml) {
222-
// TODO: Can we deprecated this in favor of 'supportHtml'?
223-
224-
// Use our own sanitizer so that we can let through only spans.
225-
// Otherwise, we'd be letting all html be rendered.
226-
// If we want to allow markdown permitted tags, then we can delete sanitizer and sanitize.
227-
// We always pass the output through dompurify after this so that we don't rely on
228-
// marked for sanitization.
229-
markedOptions.sanitizer = (html: string): string => {
223+
// Note: we always pass the output through dompurify after this so that we don't rely on
224+
// marked for real sanitization.
225+
renderer.html = ({ text }) => {
230226
if (options.sanitizerOptions?.replaceWithPlaintext) {
231-
return escape(html);
227+
return escape(text);
232228
}
233229

234-
const match = markdown.isTrusted ? html.match(/^(<span[^>]+>)|(<\/\s*span>)$/) : undefined;
235-
return match ? html : '';
230+
const match = markdown.isTrusted ? text.match(/^(<span[^>]+>)|(<\/\s*span>)$/) : undefined;
231+
return match ? text : '';
236232
};
237-
markedOptions.sanitize = true;
238-
markedOptions.silent = true;
239233
}
240234

241235
markedOptions.renderer = renderer;
@@ -261,7 +255,7 @@ export function renderMarkdown(markdown: IMarkdownString, options: MarkdownRende
261255
const newTokens = fillInIncompleteTokens(tokens);
262256
renderedMarkdown = marked.parser(newTokens, opts);
263257
} else {
264-
renderedMarkdown = marked.parse(value, markedOptions);
258+
renderedMarkdown = marked.parse(value, { ...markedOptions, async: false });
265259
}
266260

267261
// Rewrite theme icons
@@ -553,7 +547,7 @@ export function renderMarkdownAsPlaintext(markdown: IMarkdownString, withCodeBlo
553547
value = `${value.substr(0, 100_000)}…`;
554548
}
555549

556-
const html = marked.parse(value, { renderer: withCodeBlocks ? plainTextWithCodeBlocksRenderer.value : plainTextRenderer.value }).replace(/&(#\d+|[a-zA-Z]+);/g, m => unescapeInfo.get(m) ?? m);
550+
const html = marked.parse(value, { async: false, renderer: withCodeBlocks ? plainTextWithCodeBlocksRenderer.value : plainTextRenderer.value }).replace(/&(#\d+|[a-zA-Z]+);/g, m => unescapeInfo.get(m) ?? m);
557551

558552
return sanitizeRenderedMarkdown({ isTrusted: false }, html).toString();
559553
}
@@ -570,73 +564,70 @@ const unescapeInfo = new Map<string, string>([
570564
function createRenderer(): marked.Renderer {
571565
const renderer = new marked.Renderer();
572566

573-
renderer.code = (code: string): string => {
574-
return code;
567+
renderer.code = ({ text }: marked.Tokens.Code): string => {
568+
return text;
575569
};
576-
renderer.blockquote = (quote: string): string => {
577-
return quote;
570+
renderer.blockquote = ({ text }: marked.Tokens.Blockquote): string => {
571+
return text + '\n';
578572
};
579-
renderer.html = (_html: string): string => {
573+
renderer.html = (_: marked.Tokens.HTML): string => {
580574
return '';
581575
};
582-
renderer.heading = (text: string, _level: 1 | 2 | 3 | 4 | 5 | 6, _raw: string): string => {
583-
return text + '\n';
576+
renderer.heading = function ({ tokens }: marked.Tokens.Heading): string {
577+
return this.parser.parseInline(tokens) + '\n';
584578
};
585579
renderer.hr = (): string => {
586580
return '';
587581
};
588-
renderer.list = (body: string, _ordered: boolean): string => {
589-
return body;
582+
renderer.list = function ({ items }: marked.Tokens.List): string {
583+
return items.map(x => this.listitem(x)).join('\n') + '\n';
590584
};
591-
renderer.listitem = (text: string): string => {
585+
renderer.listitem = ({ text }: marked.Tokens.ListItem): string => {
592586
return text + '\n';
593587
};
594-
renderer.paragraph = (text: string): string => {
595-
return text + '\n';
588+
renderer.paragraph = function ({ tokens }: marked.Tokens.Paragraph): string {
589+
return this.parser.parseInline(tokens) + '\n';
596590
};
597-
renderer.table = (header: string, body: string): string => {
598-
return header + body + '\n';
591+
renderer.table = function ({ header, rows }: marked.Tokens.Table): string {
592+
return header.map(cell => this.tablecell(cell)).join(' ') + '\n' + rows.map(cells => cells.map(cell => this.tablecell(cell)).join(' ')).join('\n') + '\n';
599593
};
600-
renderer.tablerow = (content: string): string => {
601-
return content;
594+
renderer.tablerow = ({ text }: marked.Tokens.TableRow): string => {
595+
return text;
602596
};
603-
renderer.tablecell = (content: string, _flags: {
604-
header: boolean;
605-
align: 'center' | 'left' | 'right' | null;
606-
}): string => {
607-
return content + ' ';
597+
renderer.tablecell = function ({ tokens }: marked.Tokens.TableCell): string {
598+
return this.parser.parseInline(tokens);
608599
};
609-
renderer.strong = (text: string): string => {
600+
renderer.strong = ({ text }: marked.Tokens.Strong): string => {
610601
return text;
611602
};
612-
renderer.em = (text: string): string => {
603+
renderer.em = ({ text }: marked.Tokens.Em): string => {
613604
return text;
614605
};
615-
renderer.codespan = (code: string): string => {
616-
return code;
606+
renderer.codespan = ({ text }: marked.Tokens.Codespan): string => {
607+
return text;
617608
};
618-
renderer.br = (): string => {
609+
renderer.br = (_: marked.Tokens.Br): string => {
619610
return '\n';
620611
};
621-
renderer.del = (text: string): string => {
612+
renderer.del = ({ text }: marked.Tokens.Del): string => {
622613
return text;
623614
};
624-
renderer.image = (_href: string, _title: string, _text: string): string => {
615+
renderer.image = (_: marked.Tokens.Image): string => {
625616
return '';
626617
};
627-
renderer.text = (text: string): string => {
618+
renderer.text = ({ text }: marked.Tokens.Text): string => {
628619
return text;
629620
};
630-
renderer.link = (_href: string, _title: string, text: string): string => {
621+
renderer.link = ({ text }: marked.Tokens.Link): string => {
631622
return text;
632623
};
633624
return renderer;
634625
}
635626
const plainTextRenderer = new Lazy<marked.Renderer>((withCodeBlocks?: boolean) => createRenderer());
636627
const plainTextWithCodeBlocksRenderer = new Lazy<marked.Renderer>(() => {
637628
const renderer = createRenderer();
638-
renderer.code = (code: string): string => {
639-
return `\n\`\`\`\n${code}\n\`\`\`\n`;
629+
renderer.code = ({ text }: marked.Tokens.Code): string => {
630+
return `\n\`\`\`\n${text}\n\`\`\`\n`;
640631
};
641632
return renderer;
642633
});
@@ -807,21 +798,14 @@ function fillInIncompleteTokensOnce(tokens: marked.TokensList): marked.TokensLis
807798
let newTokens: marked.Token[] | undefined;
808799
for (i = 0; i < tokens.length; i++) {
809800
const token = tokens[i];
810-
let codeblockStart: RegExpMatchArray | null;
811-
if (token.type === 'paragraph' && (codeblockStart = token.raw.match(/(\n|^)(````*)/))) {
812-
const codeblockLead = codeblockStart[2];
813-
// If the code block was complete, it would be in a type='code'
814-
newTokens = completeCodeBlock(tokens.slice(i), codeblockLead);
815-
break;
816-
}
817801

818802
if (token.type === 'paragraph' && token.raw.match(/(\n|^)\|/)) {
819803
newTokens = completeTable(tokens.slice(i));
820804
break;
821805
}
822806

823807
if (i === tokens.length - 1 && token.type === 'list') {
824-
const newListToken = completeListItemPattern(token);
808+
const newListToken = completeListItemPattern(token as marked.Tokens.List);
825809
if (newListToken) {
826810
newTokens = [newListToken];
827811
break;
@@ -830,7 +814,7 @@ function fillInIncompleteTokensOnce(tokens: marked.TokensList): marked.TokensLis
830814

831815
if (i === tokens.length - 1 && token.type === 'paragraph') {
832816
// Only operates on a single token, because any newline that follows this should break these patterns
833-
const newToken = completeSingleLinePattern(token);
817+
const newToken = completeSingleLinePattern(token as marked.Tokens.Paragraph);
834818
if (newToken) {
835819
newTokens = [newToken];
836820
break;
@@ -850,10 +834,6 @@ function fillInIncompleteTokensOnce(tokens: marked.TokensList): marked.TokensLis
850834
return null;
851835
}
852836

853-
function completeCodeBlock(tokens: marked.Token[], leader: string): marked.Token[] {
854-
const mergedRawText = mergeRawTokenText(tokens);
855-
return marked.lexer(mergedRawText + `\n${leader}`);
856-
}
857837

858838
function completeCodespan(token: marked.Token): marked.Token {
859839
return completeWithString(token, '`');

src/vs/base/common/marked/cgmanifest.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
"git": {
77
"name": "marked",
88
"repositoryUrl": "https://github.com/markedjs/marked",
9-
"commitHash": "7e2ef307846427650114591f9257b5545868e928"
9+
"commitHash": "7972d7f9b578a31b32f469c14fc97c39ceb2b6c6"
1010
}
1111
},
1212
"license": "MIT",
13-
"version": "4.1.0"
13+
"version": "14.0.0"
1414
}
1515
],
1616
"version": 1

0 commit comments

Comments
 (0)