From 0aa8694b46024185def1b60e91fdcf4446029819 Mon Sep 17 00:00:00 2001 From: Felix <188768+fb55@users.noreply.github.com> Date: Thu, 19 Mar 2026 07:53:47 +0000 Subject: [PATCH 1/2] fix(tokenizer): require full `-->` to close comments in xmlMode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In HTML, `` is a valid empty comment per the spec. However, in XML comments must be closed by `-->`, so `` should be treated as an opening — not a complete comment. Skip the short-comment shortcut when xmlMode is enabled. Fixes #1823 --- src/Tokenizer.spec.ts | 9 ++++ src/Tokenizer.ts | 5 +- src/__snapshots__/Tokenizer.spec.ts.snap | 66 ++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 2 deletions(-) diff --git a/src/Tokenizer.spec.ts b/src/Tokenizer.spec.ts index c817bb37..6a1792e1 100644 --- a/src/Tokenizer.spec.ts +++ b/src/Tokenizer.spec.ts @@ -158,6 +158,15 @@ describe("Tokenizer", () => { ).toMatchSnapshot(); }); + it("should not treat as a complete comment in xmlMode", () => { + expect( + tokenize( + "startshould ignore<-->end", + { xmlMode: true }, + ), + ).toMatchSnapshot(); + }); + it.each([ "script", "style", diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts index 4c631f29..8e5bb2d3 100644 --- a/src/Tokenizer.ts +++ b/src/Tokenizer.ts @@ -622,8 +622,9 @@ export default class Tokenizer { if (c === CharCodes.Dash) { this.state = State.InCommentLike; this.currentSequence = Sequences.CommentEnd; - // Allow short comments (eg. ) - this.sequenceIndex = 2; + // In HTML, `` is a valid empty comment. In XML, comments + // must be closed by `-->`, so we require the full sequence. + this.sequenceIndex = this.xmlMode ? 0 : 2; this.sectionStart = this.index + 1; } else { this.state = State.InDeclaration; diff --git a/src/__snapshots__/Tokenizer.spec.ts.snap b/src/__snapshots__/Tokenizer.spec.ts.snap index d81daf97..8ad0e859 100644 --- a/src/__snapshots__/Tokenizer.spec.ts.snap +++ b/src/__snapshots__/Tokenizer.spec.ts.snap @@ -565,6 +565,72 @@ exports[`Tokenizer > should not lose data when pausing 1`] = ` ] `; +exports[`Tokenizer > should not treat as a complete comment in xmlMode 1`] = ` +[ + [ + "onopentagname", + 1, + 5, + ], + [ + "onopentagend", + 5, + ], + [ + "onopentagname", + 7, + 11, + ], + [ + "onopentagend", + 11, + ], + [ + "ontext", + 12, + 17, + ], + [ + "onclosetag", + 19, + 23, + ], + [ + "oncomment", + 28, + 58, + 2, + ], + [ + "onopentagname", + 60, + 64, + ], + [ + "onopentagend", + 64, + ], + [ + "ontext", + 65, + 68, + ], + [ + "onclosetag", + 70, + 74, + ], + [ + "onclosetag", + 77, + 81, + ], + [ + "onend", + ], +] +`; + exports[`Tokenizer > should support self-closing special tags > for self-closing script tag 1`] = ` [ [ From ad9d01ea74ff853c46ff6fc4d46c98611ddadf95 Mon Sep 17 00:00:00 2001 From: Felix <188768+fb55@users.noreply.github.com> Date: Thu, 19 Mar 2026 07:55:27 +0000 Subject: [PATCH 2/2] style: use block comment to satisfy eslint multiline-comment-style --- src/Tokenizer.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts index 8e5bb2d3..83028a03 100644 --- a/src/Tokenizer.ts +++ b/src/Tokenizer.ts @@ -622,8 +622,10 @@ export default class Tokenizer { if (c === CharCodes.Dash) { this.state = State.InCommentLike; this.currentSequence = Sequences.CommentEnd; - // In HTML, `` is a valid empty comment. In XML, comments - // must be closed by `-->`, so we require the full sequence. + /* + * In HTML, `` is a valid empty comment. In XML, comments + * must be closed by `-->`, so we require the full sequence. + */ this.sequenceIndex = this.xmlMode ? 0 : 2; this.sectionStart = this.index + 1; } else {