diff --git a/src/Parser.events.spec.ts b/src/Parser.events.spec.ts index 06e417a0..b8836d92 100644 --- a/src/Parser.events.spec.ts +++ b/src/Parser.events.spec.ts @@ -164,6 +164,18 @@ describe("Events", () => { it("Scripts ending with <", () => runTest("")); + it("Special end tags ending with /> in script", () => + runTest("")); + + it("Special end tags ending with /> in style", () => + runTest("")); + + it("Special end tags ending with /> in title", () => + runTest("safe")); + + it("Special end tags ending with /> in textarea", () => + runTest("")); + it("CDATA more edge-cases", () => runTest("baz]]>", { recognizeCDATA: true })); diff --git a/src/Tokenizer.spec.ts b/src/Tokenizer.spec.ts index c893ffcb..c817bb37 100644 --- a/src/Tokenizer.spec.ts +++ b/src/Tokenizer.spec.ts @@ -2,7 +2,10 @@ import { describe, expect, it } from "vitest"; import { Tokenizer } from "./index.js"; import type { Callbacks } from "./Tokenizer.js"; -function tokenize(data: string, options = {}) { +function tokenize( + data: string | ((tokenizer: Tokenizer, log: unknown[][]) => void), + options = {}, +) { const log: unknown[][] = []; const tokenizer = new Tokenizer( options, @@ -17,8 +20,12 @@ function tokenize(data: string, options = {}) { ) as Callbacks, ); - tokenizer.write(data); - tokenizer.end(); + if (typeof data === "function") { + data(tokenizer, log); + } else { + tokenizer.write(data); + tokenizer.end(); + } return log; } @@ -82,6 +89,23 @@ describe("Tokenizer", () => { }); }); + describe("should close special tags on end tags ending with />", () => { + it("for script tag", () => { + expect(tokenize("")).toMatchSnapshot(); + }); + it("for style tag", () => { + expect(tokenize("")).toMatchSnapshot(); + }); + it("for title tag", () => { + expect(tokenize("safe")).toMatchSnapshot(); + }); + it("for textarea tag", () => { + expect( + tokenize(""), + ).toMatchSnapshot(); + }); + }); + describe("should correctly mark attributes", () => { it("for no value attribute", () => { expect(tokenize("
")).toMatchSnapshot(); @@ -128,6 +152,42 @@ describe("Tokenizer", () => { expect(tokenize("≧̸")).toMatchSnapshot()); }); + it("should close comments on --!>", () => { + expect( + tokenize("-->"), + ).toMatchSnapshot(); + }); + + it.each([ + "script", + "style", + "title", + "textarea", + ])("should reset after an unclosed %s tag", (tag) => { + expect( + tokenize((tokenizer, events) => { + tokenizer.write(`<${tag}>body{color:red}`); + tokenizer.end(); + events.length = 0; + tokenizer.reset(); + tokenizer.write("
hello
"); + tokenizer.end(); + }).map(([event]) => event), + ).toEqual([ + "onopentagname", + "onopentagend", + "ontext", + "onclosetag", + "onend", + ]); + }); + + it("should terminate XML processing instructions on ?>", () => { + expect( + tokenize(" injected ?>", { xmlMode: true }), + ).toMatchSnapshot(); + }); + it("should not lose data when pausing", () => { const log: unknown[][] = []; const tokenizer = new Tokenizer( diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts index 0a2212d6..4c631f29 100644 --- a/src/Tokenizer.ts +++ b/src/Tokenizer.ts @@ -138,7 +138,7 @@ const Sequences = { Empty: new Uint8Array(0), Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), // CDATA[ CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]), // ]]> - CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]), // `-->` + CommentEnd: new Uint8Array([0x2d, 0x2d, 0x21, 0x3e]), // `--!>` ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]), // ``. + this.cbs.oncomment(this.sectionStart, this.index, 2); + + this.sequenceIndex = 0; + this.sectionStart = this.index + 1; + this.state = State.Text; + } else if ( + this.currentSequence === Sequences.CommentEnd && + this.sequenceIndex === this.currentSequence.length - 1 && + c !== CharCodes.Gt + ) { + this.sequenceIndex = Number(c === CharCodes.Dash); + } else if (c === this.currentSequence[this.sequenceIndex]) { if (++this.sequenceIndex === this.currentSequence.length) { if (this.currentSequence === Sequences.CdataEnd) { this.cbs.oncdata(this.sectionStart, this.index, 2); } else { - this.cbs.oncomment(this.sectionStart, this.index, 2); + this.cbs.oncomment(this.sectionStart, this.index, 3); } this.sequenceIndex = 0; @@ -399,6 +418,7 @@ export default class Tokenizer { this.sectionStart = this.index + 1; } else if (c === CharCodes.Questionmark) { this.state = State.InProcessingInstruction; + this.sequenceIndex = 0; this.sectionStart = this.index + 1; } else if (this.isTagStartChar(c)) { const lower = c | 0x20; @@ -443,7 +463,7 @@ export default class Tokenizer { } } private stateInClosingTagName(c: number): void { - if (c === CharCodes.Gt || isWhitespace(c)) { + if (isEndOfTagSection(c)) { this.cbs.onclosetag(this.sectionStart, this.index); this.sectionStart = -1; this.state = State.AfterClosingTagName; @@ -574,7 +594,25 @@ export default class Tokenizer { } } private stateInProcessingInstruction(c: number): void { - if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { + if (this.xmlMode) { + if (c === CharCodes.Questionmark) { + // Remember that we just consumed `?`, so the next `>` closes the PI. + this.sequenceIndex = 1; + } else if (c === CharCodes.Gt && this.sequenceIndex === 1) { + this.cbs.onprocessinginstruction( + this.sectionStart, + this.index - 1, + ); + this.sequenceIndex = 0; + this.state = State.Text; + this.sectionStart = this.index + 1; + } else { + // Keep scanning for the next `?`, which can start a closing `?>`. + this.sequenceIndex = Number( + this.fastForwardTo(CharCodes.Questionmark), + ); + } + } else if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { this.cbs.onprocessinginstruction(this.sectionStart, this.index); this.state = State.Text; this.sectionStart = this.index + 1; diff --git a/src/__snapshots__/Parser.events.spec.ts.snap b/src/__snapshots__/Parser.events.spec.ts.snap index fd4f109d..ca7af1db 100644 --- a/src/__snapshots__/Parser.events.spec.ts.snap +++ b/src/__snapshots__/Parser.events.spec.ts.snap @@ -1825,6 +1825,274 @@ exports[`Events > Self-closing indices (#941) 1`] = ` ] `; +exports[`Events > Special end tags ending with /> in script 1`] = ` +[ + { + "$event": "opentagname", + "data": [ + "script", + ], + "endIndex": 7, + "startIndex": 0, + }, + { + "$event": "opentag", + "data": [ + "script", + {}, + false, + ], + "endIndex": 7, + "startIndex": 0, + }, + { + "$event": "text", + "data": [ + "safe", + ], + "endIndex": 11, + "startIndex": 8, + }, + { + "$event": "closetag", + "data": [ + "script", + false, + ], + "endIndex": 20, + "startIndex": 12, + }, + { + "$event": "opentagname", + "data": [ + "img", + ], + "endIndex": 26, + "startIndex": 21, + }, + { + "$event": "opentag", + "data": [ + "img", + {}, + false, + ], + "endIndex": 26, + "startIndex": 21, + }, + { + "$event": "closetag", + "data": [ + "img", + true, + ], + "endIndex": 26, + "startIndex": 21, + }, +] +`; + +exports[`Events > Special end tags ending with /> in style 1`] = ` +[ + { + "$event": "opentagname", + "data": [ + "style", + ], + "endIndex": 6, + "startIndex": 0, + }, + { + "$event": "opentag", + "data": [ + "style", + {}, + false, + ], + "endIndex": 6, + "startIndex": 0, + }, + { + "$event": "text", + "data": [ + "safe", + ], + "endIndex": 10, + "startIndex": 7, + }, + { + "$event": "closetag", + "data": [ + "style", + false, + ], + "endIndex": 18, + "startIndex": 11, + }, + { + "$event": "opentagname", + "data": [ + "img", + ], + "endIndex": 24, + "startIndex": 19, + }, + { + "$event": "opentag", + "data": [ + "img", + {}, + false, + ], + "endIndex": 24, + "startIndex": 19, + }, + { + "$event": "closetag", + "data": [ + "img", + true, + ], + "endIndex": 24, + "startIndex": 19, + }, +] +`; + +exports[`Events > Special end tags ending with /> in textarea 1`] = ` +[ + { + "$event": "opentagname", + "data": [ + "textarea", + ], + "endIndex": 9, + "startIndex": 0, + }, + { + "$event": "opentag", + "data": [ + "textarea", + {}, + false, + ], + "endIndex": 9, + "startIndex": 0, + }, + { + "$event": "text", + "data": [ + "safe", + ], + "endIndex": 13, + "startIndex": 10, + }, + { + "$event": "closetag", + "data": [ + "textarea", + false, + ], + "endIndex": 24, + "startIndex": 14, + }, + { + "$event": "opentagname", + "data": [ + "img", + ], + "endIndex": 30, + "startIndex": 25, + }, + { + "$event": "opentag", + "data": [ + "img", + {}, + false, + ], + "endIndex": 30, + "startIndex": 25, + }, + { + "$event": "closetag", + "data": [ + "img", + true, + ], + "endIndex": 30, + "startIndex": 25, + }, +] +`; + +exports[`Events > Special end tags ending with /> in title 1`] = ` +[ + { + "$event": "opentagname", + "data": [ + "title", + ], + "endIndex": 6, + "startIndex": 0, + }, + { + "$event": "opentag", + "data": [ + "title", + {}, + false, + ], + "endIndex": 6, + "startIndex": 0, + }, + { + "$event": "text", + "data": [ + "safe", + ], + "endIndex": 10, + "startIndex": 7, + }, + { + "$event": "closetag", + "data": [ + "title", + false, + ], + "endIndex": 18, + "startIndex": 11, + }, + { + "$event": "opentagname", + "data": [ + "img", + ], + "endIndex": 24, + "startIndex": 19, + }, + { + "$event": "opentag", + "data": [ + "img", + {}, + false, + ], + "endIndex": 24, + "startIndex": 19, + }, + { + "$event": "closetag", + "data": [ + "img", + true, + ], + "endIndex": 24, + "startIndex": 19, + }, +] +`; + exports[`Events > Special special tags 1`] = ` [ { diff --git a/src/__snapshots__/Tokenizer.spec.ts.snap b/src/__snapshots__/Tokenizer.spec.ts.snap index 2e7e9f3b..d81daf97 100644 --- a/src/__snapshots__/Tokenizer.spec.ts.snap +++ b/src/__snapshots__/Tokenizer.spec.ts.snap @@ -1,5 +1,207 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html +exports[`Tokenizer > should close comments on --!> 1`] = ` +[ + [ + "oncomment", + 4, + 8, + 3, + ], + [ + "onopentagname", + 10, + 13, + ], + [ + "onattribname", + 14, + 17, + ], + [ + "onattribdata", + 18, + 19, + ], + [ + "onattribend", + 1, + 19, + ], + [ + "onattribname", + 20, + 27, + ], + [ + "onattribdata", + 28, + 36, + ], + [ + "onattribend", + 1, + 36, + ], + [ + "onopentagend", + 36, + ], + [ + "ontext", + 37, + 40, + ], + [ + "onend", + ], +] +`; + +exports[`Tokenizer > should close special tags on end tags ending with /> > for script tag 1`] = ` +[ + [ + "onopentagname", + 1, + 7, + ], + [ + "onopentagend", + 7, + ], + [ + "ontext", + 8, + 12, + ], + [ + "onclosetag", + 14, + 20, + ], + [ + "onopentagname", + 23, + 26, + ], + [ + "onopentagend", + 26, + ], + [ + "onend", + ], +] +`; + +exports[`Tokenizer > should close special tags on end tags ending with /> > for style tag 1`] = ` +[ + [ + "onopentagname", + 1, + 6, + ], + [ + "onopentagend", + 6, + ], + [ + "ontext", + 7, + 11, + ], + [ + "onclosetag", + 13, + 18, + ], + [ + "onopentagname", + 21, + 24, + ], + [ + "onopentagend", + 24, + ], + [ + "onend", + ], +] +`; + +exports[`Tokenizer > should close special tags on end tags ending with /> > for textarea tag 1`] = ` +[ + [ + "onopentagname", + 1, + 9, + ], + [ + "onopentagend", + 9, + ], + [ + "ontext", + 10, + 14, + ], + [ + "onclosetag", + 16, + 24, + ], + [ + "onopentagname", + 27, + 30, + ], + [ + "onopentagend", + 30, + ], + [ + "onend", + ], +] +`; + +exports[`Tokenizer > should close special tags on end tags ending with /> > for title tag 1`] = ` +[ + [ + "onopentagname", + 1, + 6, + ], + [ + "onopentagend", + 6, + ], + [ + "ontext", + 7, + 11, + ], + [ + "onclosetag", + 13, + 18, + ], + [ + "onopentagname", + 21, + 24, + ], + [ + "onopentagend", + 24, + ], + [ + "onend", + ], +] +`; + exports[`Tokenizer > should correctly mark attributes > for double quotes attribute 1`] = ` [ [ @@ -698,6 +900,19 @@ exports[`Tokenizer > should support standard special tags > for normal xmp tag 1 ] `; +exports[`Tokenizer > should terminate XML processing instructions on ?> 1`] = ` +[ + [ + "onprocessinginstruction", + 2, + 25, + ], + [ + "onend", + ], +] +`; + exports[`Tokenizer > should treat html inside special tags as text > for div inside script tag 1`] = ` [ [ diff --git a/src/__snapshots__/WritableStream.spec.ts.snap b/src/__snapshots__/WritableStream.spec.ts.snap index d2efaafd..fe2473e8 100644 --- a/src/__snapshots__/WritableStream.spec.ts.snap +++ b/src/__snapshots__/WritableStream.spec.ts.snap @@ -6,9 +6,9 @@ exports[`WritableStream > Atom feed 1`] = ` "$event": "processinginstruction", "data": [ "?xml", - "?xml version="1.0" encoding="utf-8"?", + "?xml version="1.0" encoding="utf-8"", ], - "endIndex": 37, + "endIndex": 36, "startIndex": 0, }, { @@ -18,7 +18,7 @@ exports[`WritableStream > Atom feed 1`] = ` ", ], "endIndex": 38, - "startIndex": 38, + "startIndex": 37, }, { "$event": "comment", @@ -1609,9 +1609,9 @@ exports[`WritableStream > RDF feed 1`] = ` "$event": "processinginstruction", "data": [ "?xml", - "?xml version="1.0" encoding="UTF-8"?", + "?xml version="1.0" encoding="UTF-8"", ], - "endIndex": 37, + "endIndex": 36, "startIndex": 0, }, { @@ -1621,7 +1621,7 @@ exports[`WritableStream > RDF feed 1`] = ` ", ], "endIndex": 38, - "startIndex": 38, + "startIndex": 37, }, { "$event": "opentagname", @@ -3609,9 +3609,9 @@ exports[`WritableStream > RSS feed 1`] = ` "$event": "processinginstruction", "data": [ "?xml", - "?xml version="1.0"?", + "?xml version="1.0"", ], - "endIndex": 20, + "endIndex": 19, "startIndex": 0, }, { @@ -3621,7 +3621,7 @@ exports[`WritableStream > RSS feed 1`] = ` ", ], "endIndex": 21, - "startIndex": 21, + "startIndex": 20, }, { "$event": "comment",