diff --git a/src/Parser.events.spec.ts b/src/Parser.events.spec.ts
index 06e417a0..b8836d92 100644
--- a/src/Parser.events.spec.ts
+++ b/src/Parser.events.spec.ts
@@ -164,6 +164,18 @@ describe("Events", () => {
it("Scripts ending with <", () => runTest(""));
+ it("Special end tags ending with /> in script", () =>
+ runTest("
"));
+
+ it("Special end tags ending with /> in style", () =>
+ runTest("
"));
+
+ it("Special end tags ending with /> in title", () =>
+ runTest("
")).toMatchSnapshot();
@@ -128,6 +152,42 @@ describe("Tokenizer", () => {
expect(tokenize("≧̸")).toMatchSnapshot());
});
+ it("should close comments on --!>", () => {
+ expect(
+ tokenize("

-->"),
+ ).toMatchSnapshot();
+ });
+
+ it.each([
+ "script",
+ "style",
+ "title",
+ "textarea",
+ ])("should reset after an unclosed %s tag", (tag) => {
+ expect(
+ tokenize((tokenizer, events) => {
+ tokenizer.write(`<${tag}>body{color:red}`);
+ tokenizer.end();
+ events.length = 0;
+ tokenizer.reset();
+ tokenizer.write("
hello
");
+ tokenizer.end();
+ }).map(([event]) => event),
+ ).toEqual([
+ "onopentagname",
+ "onopentagend",
+ "ontext",
+ "onclosetag",
+ "onend",
+ ]);
+ });
+
+ it("should terminate XML processing instructions on ?>", () => {
+ expect(
+ tokenize(" injected ?>", { xmlMode: true }),
+ ).toMatchSnapshot();
+ });
+
it("should not lose data when pausing", () => {
const log: unknown[][] = [];
const tokenizer = new Tokenizer(
diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
index 0a2212d6..4c631f29 100644
--- a/src/Tokenizer.ts
+++ b/src/Tokenizer.ts
@@ -138,7 +138,7 @@ const Sequences = {
Empty: new Uint8Array(0),
Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), // CDATA[
CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]), // ]]>
- CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]), // `-->`
+ CommentEnd: new Uint8Array([0x2d, 0x2d, 0x21, 0x3e]), // `--!>`
ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]), // ``.
+ this.cbs.oncomment(this.sectionStart, this.index, 2);
+
+ this.sequenceIndex = 0;
+ this.sectionStart = this.index + 1;
+ this.state = State.Text;
+ } else if (
+ this.currentSequence === Sequences.CommentEnd &&
+ this.sequenceIndex === this.currentSequence.length - 1 &&
+ c !== CharCodes.Gt
+ ) {
+ this.sequenceIndex = Number(c === CharCodes.Dash);
+ } else if (c === this.currentSequence[this.sequenceIndex]) {
if (++this.sequenceIndex === this.currentSequence.length) {
if (this.currentSequence === Sequences.CdataEnd) {
this.cbs.oncdata(this.sectionStart, this.index, 2);
} else {
- this.cbs.oncomment(this.sectionStart, this.index, 2);
+ this.cbs.oncomment(this.sectionStart, this.index, 3);
}
this.sequenceIndex = 0;
@@ -399,6 +418,7 @@ export default class Tokenizer {
this.sectionStart = this.index + 1;
} else if (c === CharCodes.Questionmark) {
this.state = State.InProcessingInstruction;
+ this.sequenceIndex = 0;
this.sectionStart = this.index + 1;
} else if (this.isTagStartChar(c)) {
const lower = c | 0x20;
@@ -443,7 +463,7 @@ export default class Tokenizer {
}
}
private stateInClosingTagName(c: number): void {
- if (c === CharCodes.Gt || isWhitespace(c)) {
+ if (isEndOfTagSection(c)) {
this.cbs.onclosetag(this.sectionStart, this.index);
this.sectionStart = -1;
this.state = State.AfterClosingTagName;
@@ -574,7 +594,25 @@ export default class Tokenizer {
}
}
private stateInProcessingInstruction(c: number): void {
- if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
+ if (this.xmlMode) {
+ if (c === CharCodes.Questionmark) {
+ // Remember that we just consumed `?`, so the next `>` closes the PI.
+ this.sequenceIndex = 1;
+ } else if (c === CharCodes.Gt && this.sequenceIndex === 1) {
+ this.cbs.onprocessinginstruction(
+ this.sectionStart,
+ this.index - 1,
+ );
+ this.sequenceIndex = 0;
+ this.state = State.Text;
+ this.sectionStart = this.index + 1;
+ } else {
+ // Keep scanning for the next `?`, which can start a closing `?>`.
+ this.sequenceIndex = Number(
+ this.fastForwardTo(CharCodes.Questionmark),
+ );
+ }
+ } else if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.onprocessinginstruction(this.sectionStart, this.index);
this.state = State.Text;
this.sectionStart = this.index + 1;
diff --git a/src/__snapshots__/Parser.events.spec.ts.snap b/src/__snapshots__/Parser.events.spec.ts.snap
index fd4f109d..ca7af1db 100644
--- a/src/__snapshots__/Parser.events.spec.ts.snap
+++ b/src/__snapshots__/Parser.events.spec.ts.snap
@@ -1825,6 +1825,274 @@ exports[`Events > Self-closing indices (#941) 1`] = `
]
`;
+exports[`Events > Special end tags ending with /> in script 1`] = `
+[
+ {
+ "$event": "opentagname",
+ "data": [
+ "script",
+ ],
+ "endIndex": 7,
+ "startIndex": 0,
+ },
+ {
+ "$event": "opentag",
+ "data": [
+ "script",
+ {},
+ false,
+ ],
+ "endIndex": 7,
+ "startIndex": 0,
+ },
+ {
+ "$event": "text",
+ "data": [
+ "safe",
+ ],
+ "endIndex": 11,
+ "startIndex": 8,
+ },
+ {
+ "$event": "closetag",
+ "data": [
+ "script",
+ false,
+ ],
+ "endIndex": 20,
+ "startIndex": 12,
+ },
+ {
+ "$event": "opentagname",
+ "data": [
+ "img",
+ ],
+ "endIndex": 26,
+ "startIndex": 21,
+ },
+ {
+ "$event": "opentag",
+ "data": [
+ "img",
+ {},
+ false,
+ ],
+ "endIndex": 26,
+ "startIndex": 21,
+ },
+ {
+ "$event": "closetag",
+ "data": [
+ "img",
+ true,
+ ],
+ "endIndex": 26,
+ "startIndex": 21,
+ },
+]
+`;
+
+exports[`Events > Special end tags ending with /> in style 1`] = `
+[
+ {
+ "$event": "opentagname",
+ "data": [
+ "style",
+ ],
+ "endIndex": 6,
+ "startIndex": 0,
+ },
+ {
+ "$event": "opentag",
+ "data": [
+ "style",
+ {},
+ false,
+ ],
+ "endIndex": 6,
+ "startIndex": 0,
+ },
+ {
+ "$event": "text",
+ "data": [
+ "safe",
+ ],
+ "endIndex": 10,
+ "startIndex": 7,
+ },
+ {
+ "$event": "closetag",
+ "data": [
+ "style",
+ false,
+ ],
+ "endIndex": 18,
+ "startIndex": 11,
+ },
+ {
+ "$event": "opentagname",
+ "data": [
+ "img",
+ ],
+ "endIndex": 24,
+ "startIndex": 19,
+ },
+ {
+ "$event": "opentag",
+ "data": [
+ "img",
+ {},
+ false,
+ ],
+ "endIndex": 24,
+ "startIndex": 19,
+ },
+ {
+ "$event": "closetag",
+ "data": [
+ "img",
+ true,
+ ],
+ "endIndex": 24,
+ "startIndex": 19,
+ },
+]
+`;
+
+exports[`Events > Special end tags ending with /> in textarea 1`] = `
+[
+ {
+ "$event": "opentagname",
+ "data": [
+ "textarea",
+ ],
+ "endIndex": 9,
+ "startIndex": 0,
+ },
+ {
+ "$event": "opentag",
+ "data": [
+ "textarea",
+ {},
+ false,
+ ],
+ "endIndex": 9,
+ "startIndex": 0,
+ },
+ {
+ "$event": "text",
+ "data": [
+ "safe",
+ ],
+ "endIndex": 13,
+ "startIndex": 10,
+ },
+ {
+ "$event": "closetag",
+ "data": [
+ "textarea",
+ false,
+ ],
+ "endIndex": 24,
+ "startIndex": 14,
+ },
+ {
+ "$event": "opentagname",
+ "data": [
+ "img",
+ ],
+ "endIndex": 30,
+ "startIndex": 25,
+ },
+ {
+ "$event": "opentag",
+ "data": [
+ "img",
+ {},
+ false,
+ ],
+ "endIndex": 30,
+ "startIndex": 25,
+ },
+ {
+ "$event": "closetag",
+ "data": [
+ "img",
+ true,
+ ],
+ "endIndex": 30,
+ "startIndex": 25,
+ },
+]
+`;
+
+exports[`Events > Special end tags ending with /> in title 1`] = `
+[
+ {
+ "$event": "opentagname",
+ "data": [
+ "title",
+ ],
+ "endIndex": 6,
+ "startIndex": 0,
+ },
+ {
+ "$event": "opentag",
+ "data": [
+ "title",
+ {},
+ false,
+ ],
+ "endIndex": 6,
+ "startIndex": 0,
+ },
+ {
+ "$event": "text",
+ "data": [
+ "safe",
+ ],
+ "endIndex": 10,
+ "startIndex": 7,
+ },
+ {
+ "$event": "closetag",
+ "data": [
+ "title",
+ false,
+ ],
+ "endIndex": 18,
+ "startIndex": 11,
+ },
+ {
+ "$event": "opentagname",
+ "data": [
+ "img",
+ ],
+ "endIndex": 24,
+ "startIndex": 19,
+ },
+ {
+ "$event": "opentag",
+ "data": [
+ "img",
+ {},
+ false,
+ ],
+ "endIndex": 24,
+ "startIndex": 19,
+ },
+ {
+ "$event": "closetag",
+ "data": [
+ "img",
+ true,
+ ],
+ "endIndex": 24,
+ "startIndex": 19,
+ },
+]
+`;
+
exports[`Events > Special special tags 1`] = `
[
{
diff --git a/src/__snapshots__/Tokenizer.spec.ts.snap b/src/__snapshots__/Tokenizer.spec.ts.snap
index 2e7e9f3b..d81daf97 100644
--- a/src/__snapshots__/Tokenizer.spec.ts.snap
+++ b/src/__snapshots__/Tokenizer.spec.ts.snap
@@ -1,5 +1,207 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
+exports[`Tokenizer > should close comments on --!> 1`] = `
+[
+ [
+ "oncomment",
+ 4,
+ 8,
+ 3,
+ ],
+ [
+ "onopentagname",
+ 10,
+ 13,
+ ],
+ [
+ "onattribname",
+ 14,
+ 17,
+ ],
+ [
+ "onattribdata",
+ 18,
+ 19,
+ ],
+ [
+ "onattribend",
+ 1,
+ 19,
+ ],
+ [
+ "onattribname",
+ 20,
+ 27,
+ ],
+ [
+ "onattribdata",
+ 28,
+ 36,
+ ],
+ [
+ "onattribend",
+ 1,
+ 36,
+ ],
+ [
+ "onopentagend",
+ 36,
+ ],
+ [
+ "ontext",
+ 37,
+ 40,
+ ],
+ [
+ "onend",
+ ],
+]
+`;
+
+exports[`Tokenizer > should close special tags on end tags ending with /> > for script tag 1`] = `
+[
+ [
+ "onopentagname",
+ 1,
+ 7,
+ ],
+ [
+ "onopentagend",
+ 7,
+ ],
+ [
+ "ontext",
+ 8,
+ 12,
+ ],
+ [
+ "onclosetag",
+ 14,
+ 20,
+ ],
+ [
+ "onopentagname",
+ 23,
+ 26,
+ ],
+ [
+ "onopentagend",
+ 26,
+ ],
+ [
+ "onend",
+ ],
+]
+`;
+
+exports[`Tokenizer > should close special tags on end tags ending with /> > for style tag 1`] = `
+[
+ [
+ "onopentagname",
+ 1,
+ 6,
+ ],
+ [
+ "onopentagend",
+ 6,
+ ],
+ [
+ "ontext",
+ 7,
+ 11,
+ ],
+ [
+ "onclosetag",
+ 13,
+ 18,
+ ],
+ [
+ "onopentagname",
+ 21,
+ 24,
+ ],
+ [
+ "onopentagend",
+ 24,
+ ],
+ [
+ "onend",
+ ],
+]
+`;
+
+exports[`Tokenizer > should close special tags on end tags ending with /> > for textarea tag 1`] = `
+[
+ [
+ "onopentagname",
+ 1,
+ 9,
+ ],
+ [
+ "onopentagend",
+ 9,
+ ],
+ [
+ "ontext",
+ 10,
+ 14,
+ ],
+ [
+ "onclosetag",
+ 16,
+ 24,
+ ],
+ [
+ "onopentagname",
+ 27,
+ 30,
+ ],
+ [
+ "onopentagend",
+ 30,
+ ],
+ [
+ "onend",
+ ],
+]
+`;
+
+exports[`Tokenizer > should close special tags on end tags ending with /> > for title tag 1`] = `
+[
+ [
+ "onopentagname",
+ 1,
+ 6,
+ ],
+ [
+ "onopentagend",
+ 6,
+ ],
+ [
+ "ontext",
+ 7,
+ 11,
+ ],
+ [
+ "onclosetag",
+ 13,
+ 18,
+ ],
+ [
+ "onopentagname",
+ 21,
+ 24,
+ ],
+ [
+ "onopentagend",
+ 24,
+ ],
+ [
+ "onend",
+ ],
+]
+`;
+
exports[`Tokenizer > should correctly mark attributes > for double quotes attribute 1`] = `
[
[
@@ -698,6 +900,19 @@ exports[`Tokenizer > should support standard special tags > for normal xmp tag 1
]
`;
+exports[`Tokenizer > should terminate XML processing instructions on ?> 1`] = `
+[
+ [
+ "onprocessinginstruction",
+ 2,
+ 25,
+ ],
+ [
+ "onend",
+ ],
+]
+`;
+
exports[`Tokenizer > should treat html inside special tags as text > for div inside script tag 1`] = `
[
[
diff --git a/src/__snapshots__/WritableStream.spec.ts.snap b/src/__snapshots__/WritableStream.spec.ts.snap
index d2efaafd..fe2473e8 100644
--- a/src/__snapshots__/WritableStream.spec.ts.snap
+++ b/src/__snapshots__/WritableStream.spec.ts.snap
@@ -6,9 +6,9 @@ exports[`WritableStream > Atom feed 1`] = `
"$event": "processinginstruction",
"data": [
"?xml",
- "?xml version="1.0" encoding="utf-8"?",
+ "?xml version="1.0" encoding="utf-8"",
],
- "endIndex": 37,
+ "endIndex": 36,
"startIndex": 0,
},
{
@@ -18,7 +18,7 @@ exports[`WritableStream > Atom feed 1`] = `
",
],
"endIndex": 38,
- "startIndex": 38,
+ "startIndex": 37,
},
{
"$event": "comment",
@@ -1609,9 +1609,9 @@ exports[`WritableStream > RDF feed 1`] = `
"$event": "processinginstruction",
"data": [
"?xml",
- "?xml version="1.0" encoding="UTF-8"?",
+ "?xml version="1.0" encoding="UTF-8"",
],
- "endIndex": 37,
+ "endIndex": 36,
"startIndex": 0,
},
{
@@ -1621,7 +1621,7 @@ exports[`WritableStream > RDF feed 1`] = `
",
],
"endIndex": 38,
- "startIndex": 38,
+ "startIndex": 37,
},
{
"$event": "opentagname",
@@ -3609,9 +3609,9 @@ exports[`WritableStream > RSS feed 1`] = `
"$event": "processinginstruction",
"data": [
"?xml",
- "?xml version="1.0"?",
+ "?xml version="1.0"",
],
- "endIndex": 20,
+ "endIndex": 19,
"startIndex": 0,
},
{
@@ -3621,7 +3621,7 @@ exports[`WritableStream > RSS feed 1`] = `
",
],
"endIndex": 21,
- "startIndex": 21,
+ "startIndex": 20,
},
{
"$event": "comment",