From 30fb3c843365c8dfd923ab7a0c173ccdbd439e20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com>
Date: Wed, 10 Nov 2021 20:20:18 +0000
Subject: [PATCH 01/12] refactor(tokenizer): Introduce sequences

---
 src/Tokenizer.ts | 104 ++++++++++++++++++++---------------------------
 1 file changed, 45 insertions(+), 59 deletions(-)

diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
index 1651b2e3..abace737 100644
--- a/src/Tokenizer.ts
+++ b/src/Tokenizer.ts
@@ -76,12 +76,6 @@ const enum State {
     AfterComment2,
 
     // Cdata
-    BeforeCdata1, // [
-    BeforeCdata2, // C
-    BeforeCdata3, // D
-    BeforeCdata4, // A
-    BeforeCdata5, // T
-    BeforeCdata6, // A
     InCdata, // [
     AfterCdata1, // ]
     AfterCdata2, // ]
@@ -126,6 +120,9 @@ const enum State {
     InNamedEntity,
     InNumericEntity,
     InHexEntity, // X
+
+    // Sequences
+    CDATASequence,
 }
 
 const enum Special {
@@ -183,31 +180,17 @@ function ifElseState(upper: string, SUCCESS: State, FAILURE: State) {
     };
 }
 
-const stateBeforeCdata1 = ifElseState(
-    "C",
-    State.BeforeCdata2,
-    State.InDeclaration
-);
-const stateBeforeCdata2 = ifElseState(
-    "D",
-    State.BeforeCdata3,
-    State.InDeclaration
-);
-const stateBeforeCdata3 = ifElseState(
-    "A",
-    State.BeforeCdata4,
-    State.InDeclaration
-);
-const stateBeforeCdata4 = ifElseState(
-    "T",
-    State.BeforeCdata5,
-    State.InDeclaration
-);
-const stateBeforeCdata5 = ifElseState(
-    "A",
-    State.BeforeCdata6,
-    State.InDeclaration
-);
+const SEQUENCES = {
+    CDATA: new Uint16Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), // CDATA[
+    SCRIPT: new Uint16Array([0x73, 0x63, 0x72, 0x69, 0x70, 0x74]), // `script`
+    SCRIPT_END: new Uint16Array([
+        0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74,
+    ]), // `</script`
+    STYLE: new Uint16Array([0x73, 0x74, 0x79, 0x6c, 0x65]), // `style`
+    TITLE: new Uint16Array([0x74, 0x69, 0x74, 0x6c, 0x65]), // `title`
+
+    CDATA_END: new Uint16Array([0x5d, 0x5d, 0x3e]), // ]]>
+};
 
 const stateBeforeScript1 = ifElseState(
     "R",
@@ -376,6 +359,21 @@ export default class Tokenizer {
             this.sectionStart = this._index;
         }
     }
+    private sequenceIndex = 0;
+    private stateCDATASequence(c: number) {
+        if (c === SEQUENCES.CDATA[this.sequenceIndex]) {
+            if (++this.sequenceIndex === SEQUENCES.CDATA.length) {
+                this._state = State.InCdata;
+                this.sectionStart = this._index + 1;
+                return;
+            }
+        } else {
+            this.sequenceIndex = 0;
+            this._state = State.InDeclaration;
+            this.stateInDeclaration(c); // Reconsume the character
+        }
+    }
+
     /**
      * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
      *
@@ -570,12 +568,15 @@ export default class Tokenizer {
         }
     }
     private stateBeforeDeclaration(c: number) {
-        this._state =
-            c === CharCodes.OpeningSquareBracket
-                ? State.BeforeCdata1
-                : c === CharCodes.Dash
-                ? State.BeforeComment
-                : State.InDeclaration;
+        if (c === CharCodes.OpeningSquareBracket) {
+            this._state = State.CDATASequence;
+            this.sequenceIndex = 0;
+        } else {
+            this._state =
+                c === CharCodes.Dash
+                    ? State.BeforeComment
+                    : State.InDeclaration;
+        }
     }
     private stateInDeclaration(c: number) {
         if (c === CharCodes.Gt) {
@@ -631,15 +632,6 @@ export default class Tokenizer {
         }
         // Else: stay in AFTER_COMMENT_2 (`--->`)
     }
-    private stateBeforeCdata6(c: number) {
-        if (c === CharCodes.OpeningSquareBracket) {
-            this._state = State.InCdata;
-            this.sectionStart = this._index + 1;
-        } else {
-            this._state = State.InDeclaration;
-            this.stateInDeclaration(c);
-        }
-    }
     private stateInCdata(c: number) {
         if (c === CharCodes.ClosingSquareBracket)
             this._state = State.AfterCdata1;
@@ -843,16 +835,22 @@ export default class Tokenizer {
         }
     }
 
+    private shouldContinue() {
+        return this._index < this.buffer.length && this.running;
+    }
+
     /**
      * Iterates through the buffer, calling the function corresponding to the current state.
      *
      * States that are more likely to be hit are higher up, as a performance improvement.
      */
     private parse() {
-        while (this._index < this.buffer.length && this.running) {
+        while (this.shouldContinue()) {
             const c = this.buffer.charCodeAt(this._index);
             if (this._state === State.Text) {
                 this.stateText(c);
+            } else if (this._state === State.CDATASequence) {
+                this.stateCDATASequence(c);
             } else if (this._state === State.InAttributeValueDq) {
                 this.stateInAttributeValueDoubleQuotes(c);
             } else if (this._state === State.InAttributeName) {
@@ -959,24 +957,12 @@ export default class Tokenizer {
                 this.stateInProcessingInstruction(c);
             } else if (this._state === State.InNamedEntity) {
                 this.stateInNamedEntity(c);
-            } else if (this._state === State.BeforeCdata1) {
-                stateBeforeCdata1(this, c);
             } else if (this._state === State.BeforeEntity) {
                 this.stateBeforeEntity(c);
-            } else if (this._state === State.BeforeCdata2) {
-                stateBeforeCdata2(this, c);
-            } else if (this._state === State.BeforeCdata3) {
-                stateBeforeCdata3(this, c);
             } else if (this._state === State.AfterCdata1) {
                 this.stateAfterCdata1(c);
             } else if (this._state === State.AfterCdata2) {
                 this.stateAfterCdata2(c);
-            } else if (this._state === State.BeforeCdata4) {
-                stateBeforeCdata4(this, c);
-            } else if (this._state === State.BeforeCdata5) {
-                stateBeforeCdata5(this, c);
-            } else if (this._state === State.BeforeCdata6) {
-                this.stateBeforeCdata6(c);
             } else if (this._state === State.InHexEntity) {
                 this.stateInHexEntity(c);
             } else if (this._state === State.InNumericEntity) {

From 3a55fa0983c69f31fd984b96ddd82c27bc1621c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com>
Date: Wed, 10 Nov 2021 20:48:09 +0000
Subject: [PATCH 02/12] Add `specialStartSequence`

---
 src/Tokenizer.ts | 147 +++++++++++++++++------------------------------
 1 file changed, 52 insertions(+), 95 deletions(-)

diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
index abace737..a4a7b92b 100644
--- a/src/Tokenizer.ts
+++ b/src/Tokenizer.ts
@@ -84,32 +84,18 @@ const enum State {
     BeforeSpecialS, // S
     BeforeSpecialSEnd, // S
 
-    BeforeScript1, // C
-    BeforeScript2, // R
-    BeforeScript3, // I
-    BeforeScript4, // P
-    BeforeScript5, // T
     AfterScript1, // C
     AfterScript2, // R
     AfterScript3, // I
     AfterScript4, // P
     AfterScript5, // T
 
-    BeforeStyle1, // T
-    BeforeStyle2, // Y
-    BeforeStyle3, // L
-    BeforeStyle4, // E
     AfterStyle1, // T
     AfterStyle2, // Y
     AfterStyle3, // L
     AfterStyle4, // E
 
-    BeforeSpecialT, // T
     BeforeSpecialTEnd, // T
-    BeforeTitle1, // I
-    BeforeTitle2, // T
-    BeforeTitle3, // L
-    BeforeTitle4, // E
     AfterTitle1, // I
     AfterTitle2, // T
     AfterTitle3, // L
@@ -123,6 +109,8 @@ const enum State {
 
     // Sequences
     CDATASequence,
+    SpecialStartSequence,
+    SpecialEndSequence,
 }
 
 const enum Special {
@@ -192,49 +180,15 @@ const SEQUENCES = {
     CDATA_END: new Uint16Array([0x5d, 0x5d, 0x3e]), // ]]>
 };
 
-const stateBeforeScript1 = ifElseState(
-    "R",
-    State.BeforeScript2,
-    State.InTagName
-);
-const stateBeforeScript2 = ifElseState(
-    "I",
-    State.BeforeScript3,
-    State.InTagName
-);
-const stateBeforeScript3 = ifElseState(
-    "P",
-    State.BeforeScript4,
-    State.InTagName
-);
-const stateBeforeScript4 = ifElseState(
-    "T",
-    State.BeforeScript5,
-    State.InTagName
-);
-
 const stateAfterScript1 = ifElseState("R", State.AfterScript2, State.Text);
 const stateAfterScript2 = ifElseState("I", State.AfterScript3, State.Text);
 const stateAfterScript3 = ifElseState("P", State.AfterScript4, State.Text);
 const stateAfterScript4 = ifElseState("T", State.AfterScript5, State.Text);
 
-const stateBeforeStyle1 = ifElseState("Y", State.BeforeStyle2, State.InTagName);
-const stateBeforeStyle2 = ifElseState("L", State.BeforeStyle3, State.InTagName);
-const stateBeforeStyle3 = ifElseState("E", State.BeforeStyle4, State.InTagName);
-
 const stateAfterStyle1 = ifElseState("Y", State.AfterStyle2, State.Text);
 const stateAfterStyle2 = ifElseState("L", State.AfterStyle3, State.Text);
 const stateAfterStyle3 = ifElseState("E", State.AfterStyle4, State.Text);
 
-const stateBeforeSpecialT = ifElseState(
-    "I",
-    State.BeforeTitle1,
-    State.InTagName
-);
-const stateBeforeTitle1 = ifElseState("T", State.BeforeTitle2, State.InTagName);
-const stateBeforeTitle2 = ifElseState("L", State.BeforeTitle3, State.InTagName);
-const stateBeforeTitle3 = ifElseState("E", State.BeforeTitle4, State.InTagName);
-
 const stateBeforeSpecialTEnd = ifElseState("I", State.AfterTitle1, State.Text);
 const stateAfterTitle1 = ifElseState("T", State.AfterTitle2, State.Text);
 const stateAfterTitle2 = ifElseState("L", State.AfterTitle3, State.Text);
@@ -359,16 +313,37 @@ export default class Tokenizer {
             this.sectionStart = this._index;
         }
     }
+
+    private currentSequence!: Uint16Array;
     private sequenceIndex = 0;
+    private stateSpecialStartSequence(c: number) {
+        const isMatch =
+            this.sequenceIndex === this.currentSequence.length
+                ? // If we are at the end of the sequence, make sure the tag name has ended
+                  c === CharCodes.Slash || c === CharCodes.Gt || whitespace(c)
+                : // Otherwise, do a case-insensitive comparison
+                  (c | 0x20) === this.currentSequence[this.sequenceIndex];
+
+        if (!isMatch) {
+            this.special = Special.None;
+        } else if (this.sequenceIndex < this.currentSequence.length) {
+            this.sequenceIndex++;
+            return;
+        }
+
+        this._state = State.InTagName;
+        this.stateInTagName(c); // Reconsume the character
+    }
+
+    private stateSpecialEndSequence(_c: number) {}
+
     private stateCDATASequence(c: number) {
         if (c === SEQUENCES.CDATA[this.sequenceIndex]) {
             if (++this.sequenceIndex === SEQUENCES.CDATA.length) {
                 this._state = State.InCdata;
                 this.sectionStart = this._index + 1;
-                return;
             }
         } else {
-            this.sequenceIndex = 0;
             this._state = State.InDeclaration;
             this.stateInDeclaration(c); // Reconsume the character
         }
@@ -410,15 +385,22 @@ export default class Tokenizer {
         } else if (!this.isTagStartChar(c)) {
             this._state = State.Text;
         } else {
-            this._state =
-                !this.xmlMode &&
-                (c === CharCodes.LowerS || c === CharCodes.UpperS)
-                    ? State.BeforeSpecialS
-                    : !this.xmlMode &&
-                      (c === CharCodes.LowerT || c === CharCodes.UpperT)
-                    ? State.BeforeSpecialT
-                    : State.InTagName;
             this.sectionStart = this._index;
+            if (
+                !this.xmlMode &&
+                (c === CharCodes.LowerT || c === CharCodes.UpperT)
+            ) {
+                this.special = Special.Title;
+                this.currentSequence = SEQUENCES.TITLE;
+                this.sequenceIndex = 1;
+                this._state = State.SpecialStartSequence;
+            } else {
+                this._state =
+                    !this.xmlMode &&
+                    (c === CharCodes.LowerS || c === CharCodes.UpperS)
+                        ? State.BeforeSpecialS
+                        : State.InTagName;
+            }
         }
     }
     private stateInTagName(c: number) {
@@ -656,9 +638,15 @@ export default class Tokenizer {
     }
     private stateBeforeSpecialS(c: number) {
         if (c === CharCodes.LowerC || c === CharCodes.UpperC) {
-            this._state = State.BeforeScript1;
+            this.special = Special.Script;
+            this.currentSequence = SEQUENCES.SCRIPT;
+            this.sequenceIndex = 2;
+            this._state = State.SpecialStartSequence;
         } else if (c === CharCodes.LowerT || c === CharCodes.UpperT) {
-            this._state = State.BeforeStyle1;
+            this.special = Special.Style;
+            this.currentSequence = SEQUENCES.STYLE;
+            this.sequenceIndex = 2;
+            this._state = State.SpecialStartSequence;
         } else {
             this._state = State.InTagName;
             this.stateInTagName(c); // Consume the token again
@@ -677,13 +665,6 @@ export default class Tokenizer {
             this._state = State.AfterStyle1;
         } else this._state = State.Text;
     }
-    private stateBeforeSpecialLast(c: number, special: Special) {
-        if (c === CharCodes.Slash || c === CharCodes.Gt || whitespace(c)) {
-            this.special = special;
-        }
-        this._state = State.InTagName;
-        this.stateInTagName(c); // Consume the token again
-    }
     private stateAfterSpecialLast(c: number, sectionStartOffset: number) {
         if (c === CharCodes.Gt || whitespace(c)) {
             this.sectionStart = this._index - sectionStartOffset;
@@ -849,6 +830,10 @@ export default class Tokenizer {
             const c = this.buffer.charCodeAt(this._index);
             if (this._state === State.Text) {
                 this.stateText(c);
+            } else if (this._state === State.SpecialStartSequence) {
+                this.stateSpecialStartSequence(c);
+            } else if (this._state === State.SpecialEndSequence) {
+                this.stateSpecialEndSequence(c);
             } else if (this._state === State.CDATASequence) {
                 this.stateCDATASequence(c);
             } else if (this._state === State.InAttributeValueDq) {
@@ -903,30 +888,12 @@ export default class Tokenizer {
                 stateAfterScript2(this, c);
             } else if (this._state === State.AfterScript3) {
                 stateAfterScript3(this, c);
-            } else if (this._state === State.BeforeScript1) {
-                stateBeforeScript1(this, c);
-            } else if (this._state === State.BeforeScript2) {
-                stateBeforeScript2(this, c);
-            } else if (this._state === State.BeforeScript3) {
-                stateBeforeScript3(this, c);
-            } else if (this._state === State.BeforeScript4) {
-                stateBeforeScript4(this, c);
-            } else if (this._state === State.BeforeScript5) {
-                this.stateBeforeSpecialLast(c, Special.Script);
             } else if (this._state === State.AfterScript4) {
                 stateAfterScript4(this, c);
             } else if (this._state === State.AfterScript5) {
                 this.stateAfterSpecialLast(c, 6);
-            } else if (this._state === State.BeforeStyle1) {
-                stateBeforeStyle1(this, c);
             } else if (this._state === State.InCdata) {
                 this.stateInCdata(c);
-            } else if (this._state === State.BeforeStyle2) {
-                stateBeforeStyle2(this, c);
-            } else if (this._state === State.BeforeStyle3) {
-                stateBeforeStyle3(this, c);
-            } else if (this._state === State.BeforeStyle4) {
-                this.stateBeforeSpecialLast(c, Special.Style);
             } else if (this._state === State.AfterStyle1) {
                 stateAfterStyle1(this, c);
             } else if (this._state === State.AfterStyle2) {
@@ -935,16 +902,6 @@ export default class Tokenizer {
                 stateAfterStyle3(this, c);
             } else if (this._state === State.AfterStyle4) {
                 this.stateAfterSpecialLast(c, 5);
-            } else if (this._state === State.BeforeSpecialT) {
-                stateBeforeSpecialT(this, c);
-            } else if (this._state === State.BeforeTitle1) {
-                stateBeforeTitle1(this, c);
-            } else if (this._state === State.BeforeTitle2) {
-                stateBeforeTitle2(this, c);
-            } else if (this._state === State.BeforeTitle3) {
-                stateBeforeTitle3(this, c);
-            } else if (this._state === State.BeforeTitle4) {
-                this.stateBeforeSpecialLast(c, Special.Title);
             } else if (this._state === State.AfterTitle1) {
                 stateAfterTitle1(this, c);
             } else if (this._state === State.AfterTitle2) {

From 422a0b743a5e9c353e343f2febd9d660b810dc40 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com>
Date: Wed, 10 Nov 2021 21:54:29 +0000
Subject: [PATCH 03/12] Add `stateInCommentLike`

---
 src/Tokenizer.ts | 150 ++++++++++++++++++++++-------------------------
 1 file changed, 71 insertions(+), 79 deletions(-)

diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
index a4a7b92b..9e71740a 100644
--- a/src/Tokenizer.ts
+++ b/src/Tokenizer.ts
@@ -70,15 +70,8 @@ const enum State {
 
     // Comments
     BeforeComment,
-    InComment,
     InSpecialComment,
-    AfterComment1,
-    AfterComment2,
-
-    // Cdata
-    InCdata, // [
-    AfterCdata1, // ]
-    AfterCdata2, // ]
+    InCommentLike,
 
     // Special tags
     BeforeSpecialS, // S
@@ -177,6 +170,7 @@ const SEQUENCES = {
     STYLE: new Uint16Array([0x73, 0x74, 0x79, 0x6c, 0x65]), // `style`
     TITLE: new Uint16Array([0x74, 0x69, 0x74, 0x6c, 0x65]), // `title`
 
+    COMMENT_END: new Uint16Array([0x2d, 0x2d, 0x3e]), // `-->`
     CDATA_END: new Uint16Array([0x5d, 0x5d, 0x3e]), // ]]>
 };
 
@@ -340,7 +334,9 @@ export default class Tokenizer {
     private stateCDATASequence(c: number) {
         if (c === SEQUENCES.CDATA[this.sequenceIndex]) {
             if (++this.sequenceIndex === SEQUENCES.CDATA.length) {
-                this._state = State.InCdata;
+                this._state = State.InCommentLike;
+                this.currentSequence = SEQUENCES.CDATA_END;
+                this.sequenceIndex = 0;
                 this.sectionStart = this._index + 1;
             }
         } else {
@@ -349,6 +345,60 @@ export default class Tokenizer {
         }
     }
 
+    /**
+     * When we wait for one specific character, we can speed things up
+     * by skipping through the buffer until we find it.
+     *
+     * @returns Whether the character was found.
+     */
+    private fastForwardTo(_c: number): boolean {
+        // TODO: Refactor `parse` to increment index before calling states.
+        while (this._index < this.buffer.length - 1) {
+            if (this.buffer.charCodeAt(this._index) === _c) {
+                return true;
+            }
+            this._index++;
+        }
+        return false;
+    }
+
+    /**
+     * Comments and CDATA end with `-->` and `]]>`.
+     *
+     * Their common qualities are:
+     * - Their end sequences have a distinct character they start with.
+     * - That character is then repeated, so we have to check multiple repeats.
+     * - All characters but the start character of the sequence can be skipped.
+     */
+    private stateInCommentLike(c: number) {
+        if (c === this.currentSequence[this.sequenceIndex]) {
+            if (++this.sequenceIndex === this.currentSequence.length) {
+                // Remove 2 trailing chars
+                const section = this.buffer.slice(
+                    this.sectionStart,
+                    this._index - 2
+                );
+
+                if (this.currentSequence === SEQUENCES.CDATA_END) {
+                    this.cbs.oncdata(section);
+                } else {
+                    this.cbs.oncomment(section);
+                }
+
+                this.sectionStart = this._index + 1;
+                this._state = State.Text;
+            }
+        } else if (this.sequenceIndex === 0) {
+            // Fast-forward to the first character of the sequence
+            if (this.fastForwardTo(this.currentSequence[0])) {
+                this.sequenceIndex = 1;
+            }
+        } else if (c !== this.currentSequence[this.sequenceIndex - 1]) {
+            // Allow long sequences, eg. --->, ]]]>
+            this.sequenceIndex = 0;
+        }
+    }
+
     /**
      * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
      *
@@ -576,15 +626,15 @@ export default class Tokenizer {
     }
     private stateBeforeComment(c: number) {
         if (c === CharCodes.Dash) {
-            this._state = State.InComment;
+            this._state = State.InCommentLike;
+            this.currentSequence = SEQUENCES.COMMENT_END;
+            // Allow short comments (eg. <!-->)
+            this.sequenceIndex = 2;
             this.sectionStart = this._index + 1;
         } else {
             this._state = State.InDeclaration;
         }
     }
-    private stateInComment(c: number) {
-        if (c === CharCodes.Dash) this._state = State.AfterComment1;
-    }
     private stateInSpecialComment(c: number) {
         if (c === CharCodes.Gt) {
             this.cbs.oncomment(
@@ -594,48 +644,6 @@ export default class Tokenizer {
             this.sectionStart = this._index + 1;
         }
     }
-    private stateAfterComment1(c: number) {
-        if (c === CharCodes.Dash) {
-            this._state = State.AfterComment2;
-        } else {
-            this._state = State.InComment;
-        }
-    }
-    private stateAfterComment2(c: number) {
-        if (c === CharCodes.Gt) {
-            // Remove 2 trailing chars
-            this.cbs.oncomment(
-                this.buffer.substring(this.sectionStart, this._index - 2)
-            );
-            this._state = State.Text;
-            this.sectionStart = this._index + 1;
-        } else if (c !== CharCodes.Dash) {
-            this._state = State.InComment;
-        }
-        // Else: stay in AFTER_COMMENT_2 (`--->`)
-    }
-    private stateInCdata(c: number) {
-        if (c === CharCodes.ClosingSquareBracket)
-            this._state = State.AfterCdata1;
-    }
-    private stateAfterCdata1(c: number) {
-        if (c === CharCodes.ClosingSquareBracket)
-            this._state = State.AfterCdata2;
-        else this._state = State.InCdata;
-    }
-    private stateAfterCdata2(c: number) {
-        if (c === CharCodes.Gt) {
-            // Remove 2 trailing chars
-            this.cbs.oncdata(
-                this.buffer.substring(this.sectionStart, this._index - 2)
-            );
-            this._state = State.Text;
-            this.sectionStart = this._index + 1;
-        } else if (c !== CharCodes.ClosingSquareBracket) {
-            this._state = State.InCdata;
-        }
-        // Else: stay in AFTER_CDATA_2 (`]]]>`)
-    }
     private stateBeforeSpecialS(c: number) {
         if (c === CharCodes.LowerC || c === CharCodes.UpperC) {
             this.special = Special.Script;
@@ -840,8 +848,8 @@ export default class Tokenizer {
                 this.stateInAttributeValueDoubleQuotes(c);
             } else if (this._state === State.InAttributeName) {
                 this.stateInAttributeName(c);
-            } else if (this._state === State.InComment) {
-                this.stateInComment(c);
+            } else if (this._state === State.InCommentLike) {
+                this.stateInCommentLike(c);
             } else if (this._state === State.InSpecialComment) {
                 this.stateInSpecialComment(c);
             } else if (this._state === State.BeforeAttributeName) {
@@ -864,8 +872,6 @@ export default class Tokenizer {
                 this.stateAfterClosingTagName(c);
             } else if (this._state === State.BeforeSpecialS) {
                 this.stateBeforeSpecialS(c);
-            } else if (this._state === State.AfterComment1) {
-                this.stateAfterComment1(c);
             } else if (this._state === State.InAttributeValueNq) {
                 this.stateInAttributeValueNoQuotes(c);
             } else if (this._state === State.InSelfClosingTag) {
@@ -874,8 +880,6 @@ export default class Tokenizer {
                 this.stateInDeclaration(c);
             } else if (this._state === State.BeforeDeclaration) {
                 this.stateBeforeDeclaration(c);
-            } else if (this._state === State.AfterComment2) {
-                this.stateAfterComment2(c);
             } else if (this._state === State.BeforeComment) {
                 this.stateBeforeComment(c);
             } else if (this._state === State.BeforeSpecialSEnd) {
@@ -892,8 +896,6 @@ export default class Tokenizer {
                 stateAfterScript4(this, c);
             } else if (this._state === State.AfterScript5) {
                 this.stateAfterSpecialLast(c, 6);
-            } else if (this._state === State.InCdata) {
-                this.stateInCdata(c);
             } else if (this._state === State.AfterStyle1) {
                 stateAfterStyle1(this, c);
             } else if (this._state === State.AfterStyle2) {
@@ -916,10 +918,6 @@ export default class Tokenizer {
                 this.stateInNamedEntity(c);
             } else if (this._state === State.BeforeEntity) {
                 this.stateBeforeEntity(c);
-            } else if (this._state === State.AfterCdata1) {
-                this.stateAfterCdata1(c);
-            } else if (this._state === State.AfterCdata2) {
-                this.stateAfterCdata2(c);
             } else if (this._state === State.InHexEntity) {
                 this.stateInHexEntity(c);
             } else if (this._state === State.InNumericEntity) {
@@ -944,18 +942,12 @@ export default class Tokenizer {
     /** Handle any trailing data. */
     private handleTrailingData() {
         const data = this.buffer.substr(this.sectionStart);
-        if (
-            this._state === State.InCdata ||
-            this._state === State.AfterCdata1 ||
-            this._state === State.AfterCdata2
-        ) {
-            this.cbs.oncdata(data);
-        } else if (
-            this._state === State.InComment ||
-            this._state === State.AfterComment1 ||
-            this._state === State.AfterComment2
-        ) {
-            this.cbs.oncomment(data);
+        if (this._state === State.InCommentLike) {
+            if (this.currentSequence === SEQUENCES.CDATA_END) {
+                this.cbs.oncdata(data);
+            } else {
+                this.cbs.oncomment(data);
+            }
         } else if (this._state === State.InNamedEntity && !this.xmlMode) {
             // Increase excess for EOF
             this.trieExcess++;

From 198b21c5c011a13bd92776a754af143e4211e3c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com>
Date: Wed, 10 Nov 2021 22:34:01 +0000
Subject: [PATCH 04/12] Fast forward in more places

---
 src/Tokenizer.ts | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
index 3b362f2b..29df46e0 100644
--- a/src/Tokenizer.ts
+++ b/src/Tokenizer.ts
@@ -320,7 +320,7 @@ export default class Tokenizer {
         const isMatch =
             this.sequenceIndex === this.currentSequence.length
                 ? // If we are at the end of the sequence, make sure the tag name has ended
-                  c === CharCodes.Slash || c === CharCodes.Gt || whitespace(c)
+                  endOfTagSectionChars.has(c)
                 : // Otherwise, do a case-insensitive comparison
                   (c | 0x20) === this.currentSequence[this.sequenceIndex];
 
@@ -357,14 +357,21 @@ export default class Tokenizer {
      *
      * @returns Whether the character was found.
      */
-    private fastForwardTo(_c: number): boolean {
-        // TODO: Refactor `parse` to increment index before calling states.
-        while (this._index < this.buffer.length - 1) {
-            if (this.buffer.charCodeAt(this._index) === _c) {
+    private fastForwardTo(c: number): boolean {
+        while (++this._index < this.buffer.length) {
+            if (this.buffer.charCodeAt(this._index) === c) {
                 return true;
             }
-            this._index++;
         }
+
+        /*
+         * We increment the index at the end of the `parse` loop,
+         * so set it to `buffer.length - 1` here.
+         *
+         * TODO: Refactor `parse` to increment index before calling states.
+         */
+        this._index = this.buffer.length - 1;
+
         return false;
     }
 
@@ -501,7 +508,7 @@ export default class Tokenizer {
     }
     private stateAfterClosingTagName(c: number) {
         // Skip everything until ">"
-        if (c === CharCodes.Gt) {
+        if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
             this._state = State.Text;
             this.sectionStart = this._index + 1;
         }
@@ -608,14 +615,14 @@ export default class Tokenizer {
         }
     }
     private stateInDeclaration(c: number) {
-        if (c === CharCodes.Gt) {
+        if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
             this.cbs.ondeclaration(this.getSection());
             this._state = State.Text;
             this.sectionStart = this._index + 1;
         }
     }
     private stateInProcessingInstruction(c: number) {
-        if (c === CharCodes.Gt) {
+        if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
             this.cbs.onprocessinginstruction(this.getSection());
             this._state = State.Text;
             this.sectionStart = this._index + 1;
@@ -633,7 +640,7 @@ export default class Tokenizer {
         }
     }
     private stateInSpecialComment(c: number) {
-        if (c === CharCodes.Gt) {
+        if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
             this.cbs.oncomment(this.getSection());
             this._state = State.Text;
             this.sectionStart = this._index + 1;

From fccfb031dbd1ce5d6ce0e80871b73161a9bba1f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com>
Date: Wed, 10 Nov 2021 23:25:04 +0000
Subject: [PATCH 05/12] And some more places, when entities don't matter

---
 src/Tokenizer.ts | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
index 29df46e0..97ec283f 100644
--- a/src/Tokenizer.ts
+++ b/src/Tokenizer.ts
@@ -294,7 +294,10 @@ export default class Tokenizer {
     }
 
     private stateText(c: number) {
-        if (c === CharCodes.Lt) {
+        if (
+            c === CharCodes.Lt ||
+            (!this.decodeEntities && this.fastForwardTo(CharCodes.Lt))
+        ) {
             if (this._index > this.sectionStart) {
                 this.cbs.ontext(this.getSection());
             }
@@ -571,7 +574,10 @@ export default class Tokenizer {
         }
     }
     private handleInAttributeValue(c: number, quote: number) {
-        if (c === quote) {
+        if (
+            c === quote ||
+            (!this.decodeEntities && this.fastForwardTo(quote))
+        ) {
             this.cbs.onattribdata(this.getSection());
             this.sectionStart = -1;
             this.cbs.onattribend(String.fromCharCode(quote));

From 33ae3f3922e542192e75c7aef1bc19d80658de98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com>
Date: Thu, 11 Nov 2021 00:00:49 +0000
Subject: [PATCH 06/12] Introduce `InSpecialTag`

---
 src/Tokenizer.ts | 227 ++++++++++++++++++++---------------------------
 1 file changed, 96 insertions(+), 131 deletions(-)

diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
index 97ec283f..a0421944 100644
--- a/src/Tokenizer.ts
+++ b/src/Tokenizer.ts
@@ -26,11 +26,8 @@ const enum CharCodes {
     Eq = 0x3d, // "="
     Gt = 0x3e, // ">"
     Questionmark = 0x3f, // "?"
-    UpperC = 0x43, // "C"
     LowerC = 0x63, // "c"
-    UpperS = 0x53, // "S"
     LowerS = 0x73, // "s"
-    UpperT = 0x54, // "T"
     LowerT = 0x74, // "t"
     UpperA = 0x41, // "A"
     LowerA = 0x61, // "a"
@@ -75,24 +72,7 @@ const enum State {
 
     // Special tags
     BeforeSpecialS, // S
-    BeforeSpecialSEnd, // S
-
-    AfterScript1, // C
-    AfterScript2, // R
-    AfterScript3, // I
-    AfterScript4, // P
-    AfterScript5, // T
-
-    AfterStyle1, // T
-    AfterStyle2, // Y
-    AfterStyle3, // L
-    AfterStyle4, // E
-
-    BeforeSpecialTEnd, // T
-    AfterTitle1, // I
-    AfterTitle2, // T
-    AfterTitle3, // L
-    AfterTitle4, // E
+    InSpecialTag,
 
     BeforeEntity, // &
     BeforeNumericEntity, // #
@@ -103,7 +83,6 @@ const enum State {
     // Sequences
     CDATASequence,
     SpecialStartSequence,
-    SpecialEndSequence,
 }
 
 const enum Special {
@@ -174,26 +153,14 @@ const SEQUENCES = {
         0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74,
     ]), // `</script`
     STYLE: new Uint16Array([0x73, 0x74, 0x79, 0x6c, 0x65]), // `style`
+    STYLE_END: new Uint16Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]), // `</style`
     TITLE: new Uint16Array([0x74, 0x69, 0x74, 0x6c, 0x65]), // `title`
+    TITLE_END: new Uint16Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), // `</title`
 
     COMMENT_END: new Uint16Array([0x2d, 0x2d, 0x3e]), // `-->`
     CDATA_END: new Uint16Array([0x5d, 0x5d, 0x3e]), // ]]>
 };
 
-const stateAfterScript1 = ifElseState("R", State.AfterScript2, State.Text);
-const stateAfterScript2 = ifElseState("I", State.AfterScript3, State.Text);
-const stateAfterScript3 = ifElseState("P", State.AfterScript4, State.Text);
-const stateAfterScript4 = ifElseState("T", State.AfterScript5, State.Text);
-
-const stateAfterStyle1 = ifElseState("Y", State.AfterStyle2, State.Text);
-const stateAfterStyle2 = ifElseState("L", State.AfterStyle3, State.Text);
-const stateAfterStyle3 = ifElseState("E", State.AfterStyle4, State.Text);
-
-const stateBeforeSpecialTEnd = ifElseState("I", State.AfterTitle1, State.Text);
-const stateAfterTitle1 = ifElseState("T", State.AfterTitle2, State.Text);
-const stateAfterTitle2 = ifElseState("L", State.AfterTitle3, State.Text);
-const stateAfterTitle3 = ifElseState("E", State.AfterTitle4, State.Text);
-
 const stateBeforeNumericEntity = ifElseState(
     "X",
     State.InHexEntity,
@@ -303,11 +270,7 @@ export default class Tokenizer {
             }
             this._state = State.BeforeTagName;
             this.sectionStart = this._index;
-        } else if (
-            this.decodeEntities &&
-            c === CharCodes.Amp &&
-            (this.special === Special.None || this.special === Special.Title)
-        ) {
+        } else if (this.decodeEntities && c === CharCodes.Amp) {
             if (this._index > this.sectionStart) {
                 this.cbs.ontext(this.getSection());
             }
@@ -320,25 +283,69 @@ export default class Tokenizer {
     private currentSequence!: Uint16Array;
     private sequenceIndex = 0;
     private stateSpecialStartSequence(c: number) {
-        const isMatch =
-            this.sequenceIndex === this.currentSequence.length
-                ? // If we are at the end of the sequence, make sure the tag name has ended
-                  endOfTagSectionChars.has(c)
-                : // Otherwise, do a case-insensitive comparison
-                  (c | 0x20) === this.currentSequence[this.sequenceIndex];
+        const isEnd = this.sequenceIndex === this.currentSequence.length;
+        const isMatch = isEnd
+            ? // If we are at the end of the sequence, make sure the tag name has ended
+              endOfTagSectionChars.has(c)
+            : // Otherwise, do a case-insensitive comparison
+              (c | 0x20) === this.currentSequence[this.sequenceIndex];
 
         if (!isMatch) {
             this.special = Special.None;
-        } else if (this.sequenceIndex < this.currentSequence.length) {
+        } else if (!isEnd) {
             this.sequenceIndex++;
             return;
         }
 
         this._state = State.InTagName;
-        this.stateInTagName(c); // Reconsume the character
+        this.stateInTagName(c);
     }
 
-    private stateSpecialEndSequence(_c: number) {}
+    /** Look for an end tag. For <title> tags, also decode entities. */
+    private stateInSpecialTag(c: number) {
+        if (this.sequenceIndex === this.currentSequence.length) {
+            if (c === CharCodes.Gt || whitespaceChars.has(c)) {
+                const endOfText = this._index - this.currentSequence.length;
+
+                if (this.sectionStart < endOfText) {
+                    // Spoof the index so that reported locations match up.
+                    const actualIndex = this._index;
+                    this._index = endOfText;
+                    this.cbs.ontext(this.getSection());
+                    this._index = actualIndex;
+                }
+
+                this.special = Special.None;
+                this.sectionStart = endOfText + 2; // Skip over the `</`
+                this.stateInClosingTagName(c);
+                return; // We are done; skip the rest of the function.
+            }
+
+            this.sequenceIndex = 0;
+        }
+
+        if ((c | 0x20) === this.currentSequence[this.sequenceIndex]) {
+            this.sequenceIndex += 1;
+        } else if (this.sequenceIndex === 0) {
+            if (this.special === Special.Title) {
+                // We have to parse entities in <title> tags.
+                if (this.decodeEntities && c === CharCodes.Amp) {
+                    if (this._index > this.sectionStart) {
+                        this.cbs.ontext(this.getSection());
+                    }
+                    this.baseState = State.InSpecialTag;
+                    this._state = State.BeforeEntity;
+                    this.sectionStart = this._index;
+                }
+            } else if (this.fastForwardTo(CharCodes.Lt)) {
+                // Outside of <title> tags, we can fast-forward.
+                this.sequenceIndex = 1;
+            }
+        } else {
+            // If we see a `<`, set the sequence index to 1; useful for eg. `<</script>`.
+            this.sequenceIndex = Number(c === CharCodes.Lt);
+        }
+    }
 
     private stateCDATASequence(c: number) {
         if (c === SEQUENCES.CDATA[this.sequenceIndex]) {
@@ -447,19 +454,16 @@ export default class Tokenizer {
         } else if (!this.isTagStartChar(c)) {
             this._state = State.Text;
         } else {
+            const lower = c | 0x20;
             this.sectionStart = this._index;
-            if (
-                !this.xmlMode &&
-                (c === CharCodes.LowerT || c === CharCodes.UpperT)
-            ) {
+            if (!this.xmlMode && lower === CharCodes.LowerT) {
                 this.special = Special.Title;
                 this.currentSequence = SEQUENCES.TITLE;
                 this.sequenceIndex = 1;
                 this._state = State.SpecialStartSequence;
             } else {
                 this._state =
-                    !this.xmlMode &&
-                    (c === CharCodes.LowerS || c === CharCodes.UpperS)
+                    !this.xmlMode && lower === CharCodes.LowerS
                         ? State.BeforeSpecialS
                         : State.InTagName;
             }
@@ -478,21 +482,6 @@ export default class Tokenizer {
             // Ignore
         } else if (c === CharCodes.Gt) {
             this._state = State.Text;
-        } else if (this.special !== Special.None) {
-            if (
-                this.special !== Special.Title &&
-                (c === CharCodes.LowerS || c === CharCodes.UpperS)
-            ) {
-                this._state = State.BeforeSpecialSEnd;
-            } else if (
-                this.special === Special.Title &&
-                (c === CharCodes.LowerT || c === CharCodes.UpperT)
-            ) {
-                this._state = State.BeforeSpecialTEnd;
-            } else {
-                this._state = State.Text;
-                this.stateText(c);
-            }
         } else if (!this.isTagStartChar(c)) {
             this._state = State.InSpecialComment;
             this.sectionStart = this._index;
@@ -519,7 +508,24 @@ export default class Tokenizer {
     private stateBeforeAttributeName(c: number) {
         if (c === CharCodes.Gt) {
             this.cbs.onopentagend();
-            this._state = State.Text;
+            if (this.special !== Special.None) {
+                this._state = State.InSpecialTag;
+                this.sequenceIndex = 0;
+
+                switch (this.special) {
+                    case Special.Script:
+                        this.currentSequence = SEQUENCES.SCRIPT_END;
+                        break;
+                    case Special.Style:
+                        this.currentSequence = SEQUENCES.STYLE_END;
+                        break;
+                    case Special.Title:
+                        this.currentSequence = SEQUENCES.TITLE_END;
+                        break;
+                }
+            } else {
+                this._state = State.Text;
+            }
             this.sectionStart = this._index + 1;
         } else if (c === CharCodes.Slash) {
             this._state = State.InSelfClosingTag;
@@ -653,12 +659,13 @@ export default class Tokenizer {
         }
     }
     private stateBeforeSpecialS(c: number) {
-        if (c === CharCodes.LowerC || c === CharCodes.UpperC) {
+        const lower = c | 0x20;
+        if (lower === CharCodes.LowerC) {
             this.special = Special.Script;
             this.currentSequence = SEQUENCES.SCRIPT;
             this.sequenceIndex = 2;
             this._state = State.SpecialStartSequence;
-        } else if (c === CharCodes.LowerT || c === CharCodes.UpperT) {
+        } else if (lower === CharCodes.LowerT) {
             this.special = Special.Style;
             this.currentSequence = SEQUENCES.STYLE;
             this.sequenceIndex = 2;
@@ -668,27 +675,6 @@ export default class Tokenizer {
             this.stateInTagName(c); // Consume the token again
         }
     }
-    private stateBeforeSpecialSEnd(c: number) {
-        if (
-            this.special === Special.Script &&
-            (c === CharCodes.LowerC || c === CharCodes.UpperC)
-        ) {
-            this._state = State.AfterScript1;
-        } else if (
-            this.special === Special.Style &&
-            (c === CharCodes.LowerT || c === CharCodes.UpperT)
-        ) {
-            this._state = State.AfterStyle1;
-        } else this._state = State.Text;
-    }
-    private stateAfterSpecialLast(c: number, sectionStartOffset: number) {
-        if (c === CharCodes.Gt || whitespaceChars.has(c)) {
-            this.sectionStart = this._index - sectionStartOffset;
-            this.special = Special.None;
-            this._state = State.InClosingTagName;
-            this.stateInClosingTagName(c); // Reconsume the token
-        } else this._state = State.Text;
-    }
 
     private trieIndex = 0;
     private trieCurrent = 0;
@@ -803,7 +789,11 @@ export default class Tokenizer {
     }
 
     private allowLegacyEntity() {
-        return !this.xmlMode && this.baseState === State.Text;
+        return (
+            !this.xmlMode &&
+            (this.baseState === State.Text ||
+                this.baseState === State.InSpecialTag)
+        );
     }
 
     /**
@@ -813,8 +803,10 @@ export default class Tokenizer {
         // If we are inside of text, emit what we already have.
         if (
             this.running &&
-            this._state === State.Text &&
-            this.sectionStart !== this._index
+            this.sectionStart !== this._index &&
+            (this._state === State.Text ||
+                (this._state === State.InSpecialTag &&
+                    this.sequenceIndex === 0))
         ) {
             // TODO: We could emit attribute data here as well.
             this.cbs.ontext(this.buffer.substr(this.sectionStart));
@@ -848,8 +840,8 @@ export default class Tokenizer {
                 this.stateText(c);
             } else if (this._state === State.SpecialStartSequence) {
                 this.stateSpecialStartSequence(c);
-            } else if (this._state === State.SpecialEndSequence) {
-                this.stateSpecialEndSequence(c);
+            } else if (this._state === State.InSpecialTag) {
+                this.stateInSpecialTag(c);
             } else if (this._state === State.CDATASequence) {
                 this.stateCDATASequence(c);
             } else if (this._state === State.InAttributeValueDq) {
@@ -890,36 +882,6 @@ export default class Tokenizer {
                 this.stateBeforeDeclaration(c);
             } else if (this._state === State.BeforeComment) {
                 this.stateBeforeComment(c);
-            } else if (this._state === State.BeforeSpecialSEnd) {
-                this.stateBeforeSpecialSEnd(c);
-            } else if (this._state === State.BeforeSpecialTEnd) {
-                stateBeforeSpecialTEnd(this, c);
-            } else if (this._state === State.AfterScript1) {
-                stateAfterScript1(this, c);
-            } else if (this._state === State.AfterScript2) {
-                stateAfterScript2(this, c);
-            } else if (this._state === State.AfterScript3) {
-                stateAfterScript3(this, c);
-            } else if (this._state === State.AfterScript4) {
-                stateAfterScript4(this, c);
-            } else if (this._state === State.AfterScript5) {
-                this.stateAfterSpecialLast(c, 6);
-            } else if (this._state === State.AfterStyle1) {
-                stateAfterStyle1(this, c);
-            } else if (this._state === State.AfterStyle2) {
-                stateAfterStyle2(this, c);
-            } else if (this._state === State.AfterStyle3) {
-                stateAfterStyle3(this, c);
-            } else if (this._state === State.AfterStyle4) {
-                this.stateAfterSpecialLast(c, 5);
-            } else if (this._state === State.AfterTitle1) {
-                stateAfterTitle1(this, c);
-            } else if (this._state === State.AfterTitle2) {
-                stateAfterTitle2(this, c);
-            } else if (this._state === State.AfterTitle3) {
-                stateAfterTitle3(this, c);
-            } else if (this._state === State.AfterTitle4) {
-                this.stateAfterSpecialLast(c, 5);
             } else if (this._state === State.InProcessingInstruction) {
                 this.stateInProcessingInstruction(c);
             } else if (this._state === State.InNamedEntity) {
@@ -994,7 +956,10 @@ export default class Tokenizer {
         return this.buffer.substring(this.sectionStart, this._index);
     }
     private emitPartial(value: string) {
-        if (this.baseState !== State.Text) {
+        if (
+            this.baseState !== State.Text &&
+            this.baseState !== State.InSpecialTag
+        ) {
             this.cbs.onattribdata(value);
         } else {
             this.cbs.ontext(value);

From 8fcd6164268d4fd2ebbfa69ba6f4e4eabe9d165f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com>
Date: Thu, 11 Nov 2021 00:22:55 +0000
Subject: [PATCH 07/12] Remove remaining `ifElseState`

---
 src/Tokenizer.ts | 39 +++++++++++++++++----------------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
index a0421944..229ae294 100644
--- a/src/Tokenizer.ts
+++ b/src/Tokenizer.ts
@@ -35,6 +35,7 @@ const enum CharCodes {
     LowerF = 0x66, // "f"
     UpperZ = 0x5a, // "Z"
     LowerZ = 0x7a, // "z"
+    LowerX = 0x78, // "x"
     OpeningSquareBracket = 0x5b, // "["
     ClosingSquareBracket = 0x5d, // "]"
 }
@@ -132,20 +133,6 @@ export interface Callbacks {
     ontext(value: string): void;
 }
 
-function ifElseState(upper: string, SUCCESS: State, FAILURE: State) {
-    const upperCode = upper.charCodeAt(0);
-    const lowerCode = upper.toLowerCase().charCodeAt(0);
-
-    return (t: Tokenizer, c: number) => {
-        if (c === lowerCode || c === upperCode) {
-            t._state = SUCCESS;
-        } else {
-            t._state = FAILURE;
-            t._index--;
-        }
-    };
-}
-
 const SEQUENCES = {
     CDATA: new Uint16Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), // CDATA[
     SCRIPT: new Uint16Array([0x73, 0x63, 0x72, 0x69, 0x70, 0x74]), // `script`
@@ -161,12 +148,6 @@ const SEQUENCES = {
     CDATA_END: new Uint16Array([0x5d, 0x5d, 0x3e]), // ]]>
 };
 
-const stateBeforeNumericEntity = ifElseState(
-    "X",
-    State.InHexEntity,
-    State.InNumericEntity
-);
-
 export default class Tokenizer {
     /** The current state the tokenizer is in. */
     _state = State.Text;
@@ -297,6 +278,7 @@ export default class Tokenizer {
             return;
         }
 
+        this.sequenceIndex = 0;
         this._state = State.InTagName;
         this.stateInTagName(c);
     }
@@ -356,6 +338,7 @@ export default class Tokenizer {
                 this.sectionStart = this._index + 1;
             }
         } else {
+            this.sequenceIndex = 0;
             this._state = State.InDeclaration;
             this.stateInDeclaration(c); // Reconsume the character
         }
@@ -408,6 +391,7 @@ export default class Tokenizer {
                     this.cbs.oncomment(section);
                 }
 
+                this.sequenceIndex = 0;
                 this.sectionStart = this._index + 1;
                 this._state = State.Text;
             }
@@ -525,6 +509,7 @@ export default class Tokenizer {
                 }
             } else {
                 this._state = State.Text;
+                this.baseState = State.Text;
             }
             this.sectionStart = this._index + 1;
         } else if (c === CharCodes.Slash) {
@@ -538,6 +523,7 @@ export default class Tokenizer {
         if (c === CharCodes.Gt) {
             this.cbs.onselfclosingtag();
             this._state = State.Text;
+            this.baseState = State.Text;
             this.sectionStart = this._index + 1;
             this.special = Special.None; // Reset special state, in case of self-closing special tags
         } else if (!whitespaceChars.has(c)) {
@@ -748,6 +734,15 @@ export default class Tokenizer {
         this._state = this.baseState;
     }
 
+    private stateBeforeNumericEntity(c: number) {
+        if ((c | 0x20) === CharCodes.LowerX) {
+            this._state = State.InHexEntity;
+        } else {
+            this._state = State.InNumericEntity;
+            this.stateInNumericEntity(c);
+        }
+    }
+
     private decodeNumericEntity(base: 10 | 16, strict: boolean) {
         const sectionStart = this.sectionStart + 2 + (base >> 4);
         if (sectionStart !== this._index) {
@@ -809,7 +804,7 @@ export default class Tokenizer {
                     this.sequenceIndex === 0))
         ) {
             // TODO: We could emit attribute data here as well.
-            this.cbs.ontext(this.buffer.substr(this.sectionStart));
+            this.emitPartial(this.buffer.substr(this.sectionStart));
             this.sectionStart = this._index;
         }
 
@@ -894,7 +889,7 @@ export default class Tokenizer {
                 this.stateInNumericEntity(c);
             } else {
                 // `this._state === State.BeforeNumericEntity`
-                stateBeforeNumericEntity(this, c);
+                this.stateBeforeNumericEntity(c);
             }
             this._index++;
         }

From a13f3dcfd6d3c6cbbef6994316ebdf67154fb6e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com>
Date: Thu, 11 Nov 2021 00:36:03 +0000
Subject: [PATCH 08/12] Drop `Special` enum

In favour of a boolean
---
 src/Tokenizer.ts | 49 +++++++++++++++++-------------------------------
 1 file changed, 17 insertions(+), 32 deletions(-)

diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
index 229ae294..b48b1367 100644
--- a/src/Tokenizer.ts
+++ b/src/Tokenizer.ts
@@ -86,13 +86,6 @@ const enum State {
     SpecialStartSequence,
 }
 
-const enum Special {
-    None = 1,
-    Script,
-    Style,
-    Title,
-}
-
 // Maintained as an array to keep TS at ES5
 const whitespaceCharArray = [
     CharCodes.Space,
@@ -165,7 +158,7 @@ export default class Tokenizer {
     /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
     private baseState = State.Text;
     /** For special parsing behavior inside of script and style tags. */
-    private special = Special.None;
+    private isSpecial = false;
     /** Indicates whether the tokenizer has been paused. */
     private running = true;
     /** Indicates whether the tokenizer has finished running / `.end` has been called. */
@@ -194,7 +187,7 @@ export default class Tokenizer {
         this._index = 0;
         this.bufferOffset = 0;
         this.baseState = State.Text;
-        this.special = Special.None;
+        this.currentSequence = undefined!;
         this.running = true;
         this.ended = false;
     }
@@ -272,7 +265,7 @@ export default class Tokenizer {
               (c | 0x20) === this.currentSequence[this.sequenceIndex];
 
         if (!isMatch) {
-            this.special = Special.None;
+            this.isSpecial = false;
         } else if (!isEnd) {
             this.sequenceIndex++;
             return;
@@ -297,7 +290,7 @@ export default class Tokenizer {
                     this._index = actualIndex;
                 }
 
-                this.special = Special.None;
+                this.isSpecial = false;
                 this.sectionStart = endOfText + 2; // Skip over the `</`
                 this.stateInClosingTagName(c);
                 return; // We are done; skip the rest of the function.
@@ -309,7 +302,7 @@ export default class Tokenizer {
         if ((c | 0x20) === this.currentSequence[this.sequenceIndex]) {
             this.sequenceIndex += 1;
         } else if (this.sequenceIndex === 0) {
-            if (this.special === Special.Title) {
+            if (this.currentSequence === SEQUENCES.TITLE_END) {
                 // We have to parse entities in <title> tags.
                 if (this.decodeEntities && c === CharCodes.Amp) {
                     if (this._index > this.sectionStart) {
@@ -423,11 +416,7 @@ export default class Tokenizer {
         } else if (c === CharCodes.Lt) {
             this.cbs.ontext(this.getSection());
             this.sectionStart = this._index;
-        } else if (
-            c === CharCodes.Gt ||
-            this.special !== Special.None ||
-            whitespaceChars.has(c)
-        ) {
+        } else if (c === CharCodes.Gt || whitespaceChars.has(c)) {
             this._state = State.Text;
         } else if (c === CharCodes.ExclamationMark) {
             this._state = State.BeforeDeclaration;
@@ -441,7 +430,7 @@ export default class Tokenizer {
             const lower = c | 0x20;
             this.sectionStart = this._index;
             if (!this.xmlMode && lower === CharCodes.LowerT) {
-                this.special = Special.Title;
+                this.isSpecial = true;
                 this.currentSequence = SEQUENCES.TITLE;
                 this.sequenceIndex = 1;
                 this._state = State.SpecialStartSequence;
@@ -492,20 +481,16 @@ export default class Tokenizer {
     private stateBeforeAttributeName(c: number) {
         if (c === CharCodes.Gt) {
             this.cbs.onopentagend();
-            if (this.special !== Special.None) {
+            if (this.isSpecial) {
                 this._state = State.InSpecialTag;
                 this.sequenceIndex = 0;
 
-                switch (this.special) {
-                    case Special.Script:
-                        this.currentSequence = SEQUENCES.SCRIPT_END;
-                        break;
-                    case Special.Style:
-                        this.currentSequence = SEQUENCES.STYLE_END;
-                        break;
-                    case Special.Title:
-                        this.currentSequence = SEQUENCES.TITLE_END;
-                        break;
+                if (this.currentSequence === SEQUENCES.SCRIPT) {
+                    this.currentSequence = SEQUENCES.SCRIPT_END;
+                } else if (this.currentSequence === SEQUENCES.STYLE) {
+                    this.currentSequence = SEQUENCES.STYLE_END;
+                } else if (this.currentSequence === SEQUENCES.TITLE) {
+                    this.currentSequence = SEQUENCES.TITLE_END;
                 }
             } else {
                 this._state = State.Text;
@@ -525,7 +510,7 @@ export default class Tokenizer {
             this._state = State.Text;
             this.baseState = State.Text;
             this.sectionStart = this._index + 1;
-            this.special = Special.None; // Reset special state, in case of self-closing special tags
+            this.isSpecial = false; // Reset special state, in case of self-closing special tags
         } else if (!whitespaceChars.has(c)) {
             this._state = State.BeforeAttributeName;
             this.stateBeforeAttributeName(c);
@@ -647,12 +632,12 @@ export default class Tokenizer {
     private stateBeforeSpecialS(c: number) {
         const lower = c | 0x20;
         if (lower === CharCodes.LowerC) {
-            this.special = Special.Script;
+            this.isSpecial = true;
             this.currentSequence = SEQUENCES.SCRIPT;
             this.sequenceIndex = 2;
             this._state = State.SpecialStartSequence;
         } else if (lower === CharCodes.LowerT) {
-            this.special = Special.Style;
+            this.isSpecial = true;
             this.currentSequence = SEQUENCES.STYLE;
             this.sequenceIndex = 2;
             this._state = State.SpecialStartSequence;

From 3a47bfbe04997be0df2dae1334ab50af01691753 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com>
Date: Thu, 11 Nov 2021 00:36:34 +0000
Subject: [PATCH 09/12] revert a change

---
 src/Tokenizer.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
index b48b1367..bb0ab0b0 100644
--- a/src/Tokenizer.ts
+++ b/src/Tokenizer.ts
@@ -789,7 +789,7 @@ export default class Tokenizer {
                     this.sequenceIndex === 0))
         ) {
             // TODO: We could emit attribute data here as well.
-            this.emitPartial(this.buffer.substr(this.sectionStart));
+            this.cbs.ontext(this.buffer.substr(this.sectionStart));
             this.sectionStart = this._index;
         }
 

From b3067fa7da8d73734acd680d868f94b57131131e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com>
Date: Thu, 11 Nov 2021 00:54:40 +0000
Subject: [PATCH 10/12] Restore checking fns

---
 src/Tokenizer.ts | 64 ++++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 35 deletions(-)

diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
index bb0ab0b0..16870d08 100644
--- a/src/Tokenizer.ts
+++ b/src/Tokenizer.ts
@@ -66,13 +66,15 @@ const enum State {
     // Processing instructions
     InProcessingInstruction, // ?
 
-    // Comments
+    // Comments & CDATA
     BeforeComment,
+    CDATASequence,
     InSpecialComment,
     InCommentLike,
 
     // Special tags
-    BeforeSpecialS, // S
+    BeforeSpecialS, // Decide if we deal with `<script` or `<style`
+    SpecialStartSequence,
     InSpecialTag,
 
     BeforeEntity, // &
@@ -80,27 +82,21 @@ const enum State {
     InNamedEntity,
     InNumericEntity,
     InHexEntity, // X
+}
 
-    // Sequences
-    CDATASequence,
-    SpecialStartSequence,
+function isWhitespace(c: number): boolean {
+    return (
+        c === CharCodes.Space ||
+        c === CharCodes.NewLine ||
+        c === CharCodes.Tab ||
+        c === CharCodes.FormFeed ||
+        c === CharCodes.CarriageReturn
+    );
 }
 
-// Maintained as an array to keep TS at ES5
-const whitespaceCharArray = [
-    CharCodes.Space,
-    CharCodes.NewLine,
-    CharCodes.Tab,
-    CharCodes.FormFeed,
-    CharCodes.CarriageReturn,
-];
-
-const whitespaceChars = new Set(whitespaceCharArray);
-const endOfTagSectionChars = new Set([
-    ...whitespaceCharArray,
-    CharCodes.Slash,
-    CharCodes.Gt,
-]);
+function isEndOfTagSection(c: number): boolean {
+    return c === CharCodes.Slash || c === CharCodes.Gt || isWhitespace(c);
+}
 
 function isASCIIAlpha(c: number): boolean {
     return (
@@ -260,7 +256,7 @@ export default class Tokenizer {
         const isEnd = this.sequenceIndex === this.currentSequence.length;
         const isMatch = isEnd
             ? // If we are at the end of the sequence, make sure the tag name has ended
-              endOfTagSectionChars.has(c)
+              isEndOfTagSection(c)
             : // Otherwise, do a case-insensitive comparison
               (c | 0x20) === this.currentSequence[this.sequenceIndex];
 
@@ -279,7 +275,7 @@ export default class Tokenizer {
     /** Look for an end tag. For <title> tags, also decode entities. */
     private stateInSpecialTag(c: number) {
         if (this.sequenceIndex === this.currentSequence.length) {
-            if (c === CharCodes.Gt || whitespaceChars.has(c)) {
+            if (c === CharCodes.Gt || isWhitespace(c)) {
                 const endOfText = this._index - this.currentSequence.length;
 
                 if (this.sectionStart < endOfText) {
@@ -406,9 +402,7 @@ export default class Tokenizer {
      * We allow anything that wouldn't end the tag.
      */
     private isTagStartChar(c: number) {
-        return (
-            isASCIIAlpha(c) || (this.xmlMode && !endOfTagSectionChars.has(c))
-        );
+        return isASCIIAlpha(c) || (this.xmlMode && !isEndOfTagSection(c));
     }
     private stateBeforeTagName(c: number) {
         if (c === CharCodes.Slash) {
@@ -416,7 +410,7 @@ export default class Tokenizer {
         } else if (c === CharCodes.Lt) {
             this.cbs.ontext(this.getSection());
             this.sectionStart = this._index;
-        } else if (c === CharCodes.Gt || whitespaceChars.has(c)) {
+        } else if (c === CharCodes.Gt || isWhitespace(c)) {
             this._state = State.Text;
         } else if (c === CharCodes.ExclamationMark) {
             this._state = State.BeforeDeclaration;
@@ -443,7 +437,7 @@ export default class Tokenizer {
         }
     }
     private stateInTagName(c: number) {
-        if (endOfTagSectionChars.has(c)) {
+        if (isEndOfTagSection(c)) {
             this.cbs.onopentagname(this.getSection());
             this.sectionStart = -1;
             this._state = State.BeforeAttributeName;
@@ -451,7 +445,7 @@ export default class Tokenizer {
         }
     }
     private stateBeforeClosingTagName(c: number) {
-        if (whitespaceChars.has(c)) {
+        if (isWhitespace(c)) {
             // Ignore
         } else if (c === CharCodes.Gt) {
             this._state = State.Text;
@@ -464,7 +458,7 @@ export default class Tokenizer {
         }
     }
     private stateInClosingTagName(c: number) {
-        if (c === CharCodes.Gt || whitespaceChars.has(c)) {
+        if (c === CharCodes.Gt || isWhitespace(c)) {
             this.cbs.onclosetag(this.getSection());
             this.sectionStart = -1;
             this._state = State.AfterClosingTagName;
@@ -499,7 +493,7 @@ export default class Tokenizer {
             this.sectionStart = this._index + 1;
         } else if (c === CharCodes.Slash) {
             this._state = State.InSelfClosingTag;
-        } else if (!whitespaceChars.has(c)) {
+        } else if (!isWhitespace(c)) {
             this._state = State.InAttributeName;
             this.sectionStart = this._index;
         }
@@ -511,13 +505,13 @@ export default class Tokenizer {
             this.baseState = State.Text;
             this.sectionStart = this._index + 1;
             this.isSpecial = false; // Reset special state, in case of self-closing special tags
-        } else if (!whitespaceChars.has(c)) {
+        } else if (!isWhitespace(c)) {
             this._state = State.BeforeAttributeName;
             this.stateBeforeAttributeName(c);
         }
     }
     private stateInAttributeName(c: number) {
-        if (c === CharCodes.Eq || endOfTagSectionChars.has(c)) {
+        if (c === CharCodes.Eq || isEndOfTagSection(c)) {
             this.cbs.onattribname(this.getSection());
             this.sectionStart = -1;
             this._state = State.AfterAttributeName;
@@ -531,7 +525,7 @@ export default class Tokenizer {
             this.cbs.onattribend(undefined);
             this._state = State.BeforeAttributeName;
             this.stateBeforeAttributeName(c);
-        } else if (!whitespaceChars.has(c)) {
+        } else if (!isWhitespace(c)) {
             this.cbs.onattribend(undefined);
             this._state = State.InAttributeName;
             this.sectionStart = this._index;
@@ -544,7 +538,7 @@ export default class Tokenizer {
         } else if (c === CharCodes.SingleQuote) {
             this._state = State.InAttributeValueSq;
             this.sectionStart = this._index + 1;
-        } else if (!whitespaceChars.has(c)) {
+        } else if (!isWhitespace(c)) {
             this.sectionStart = this._index;
             this._state = State.InAttributeValueNq;
             this.stateInAttributeValueNoQuotes(c); // Reconsume token
@@ -573,7 +567,7 @@ export default class Tokenizer {
         this.handleInAttributeValue(c, CharCodes.SingleQuote);
     }
     private stateInAttributeValueNoQuotes(c: number) {
-        if (whitespaceChars.has(c) || c === CharCodes.Gt) {
+        if (isWhitespace(c) || c === CharCodes.Gt) {
             this.cbs.onattribdata(this.getSection());
             this.sectionStart = -1;
             this.cbs.onattribend(null);

From ba12e37eb9dbf3fa490537c217bf803e96618ffb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com>
Date: Thu, 11 Nov 2021 01:04:39 +0000
Subject: [PATCH 11/12] Re-use *End sequences, rename new things

---
 src/Tokenizer.ts | 64 ++++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 35 deletions(-)

diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
index 16870d08..c966bb39 100644
--- a/src/Tokenizer.ts
+++ b/src/Tokenizer.ts
@@ -122,30 +122,32 @@ export interface Callbacks {
     ontext(value: string): void;
 }
 
-const SEQUENCES = {
-    CDATA: new Uint16Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), // CDATA[
-    SCRIPT: new Uint16Array([0x73, 0x63, 0x72, 0x69, 0x70, 0x74]), // `script`
-    SCRIPT_END: new Uint16Array([
+/**
+ * Sequences used to match longer strings.
+ *
+ * We don't have `Script`, `Style`, or `Title` here. Instead, we re-use the *End
+ * sequences with an increased offset.
+ */
+const Sequences = {
+    Cdata: new Uint16Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), // CDATA[
+    CdataEnd: new Uint16Array([0x5d, 0x5d, 0x3e]), // ]]>
+    CommentEnd: new Uint16Array([0x2d, 0x2d, 0x3e]), // `-->`
+    ScriptEnd: new Uint16Array([
         0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74,
     ]), // `</script`
-    STYLE: new Uint16Array([0x73, 0x74, 0x79, 0x6c, 0x65]), // `style`
-    STYLE_END: new Uint16Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]), // `</style`
-    TITLE: new Uint16Array([0x74, 0x69, 0x74, 0x6c, 0x65]), // `title`
-    TITLE_END: new Uint16Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), // `</title`
-
-    COMMENT_END: new Uint16Array([0x2d, 0x2d, 0x3e]), // `-->`
-    CDATA_END: new Uint16Array([0x5d, 0x5d, 0x3e]), // ]]>
+    StyleEnd: new Uint16Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]), // `</style`
+    TitleEnd: new Uint16Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), // `</title`
 };
 
 export default class Tokenizer {
     /** The current state the tokenizer is in. */
-    _state = State.Text;
+    private _state = State.Text;
     /** The read buffer. */
     private buffer = "";
     /** The beginning of the section that is currently being read. */
     public sectionStart = 0;
     /** The index within the buffer that we are currently looking at. */
-    _index = 0;
+    private _index = 0;
     /**
      * Data that has already been processed will be removed from the buffer occasionally.
      * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
@@ -298,7 +300,7 @@ export default class Tokenizer {
         if ((c | 0x20) === this.currentSequence[this.sequenceIndex]) {
             this.sequenceIndex += 1;
         } else if (this.sequenceIndex === 0) {
-            if (this.currentSequence === SEQUENCES.TITLE_END) {
+            if (this.currentSequence === Sequences.TitleEnd) {
                 // We have to parse entities in <title> tags.
                 if (this.decodeEntities && c === CharCodes.Amp) {
                     if (this._index > this.sectionStart) {
@@ -319,10 +321,10 @@ export default class Tokenizer {
     }
 
     private stateCDATASequence(c: number) {
-        if (c === SEQUENCES.CDATA[this.sequenceIndex]) {
-            if (++this.sequenceIndex === SEQUENCES.CDATA.length) {
+        if (c === Sequences.Cdata[this.sequenceIndex]) {
+            if (++this.sequenceIndex === Sequences.Cdata.length) {
                 this._state = State.InCommentLike;
-                this.currentSequence = SEQUENCES.CDATA_END;
+                this.currentSequence = Sequences.CdataEnd;
                 this.sequenceIndex = 0;
                 this.sectionStart = this._index + 1;
             }
@@ -374,7 +376,7 @@ export default class Tokenizer {
                     this._index - 2
                 );
 
-                if (this.currentSequence === SEQUENCES.CDATA_END) {
+                if (this.currentSequence === Sequences.CdataEnd) {
                     this.cbs.oncdata(section);
                 } else {
                     this.cbs.oncomment(section);
@@ -425,8 +427,8 @@ export default class Tokenizer {
             this.sectionStart = this._index;
             if (!this.xmlMode && lower === CharCodes.LowerT) {
                 this.isSpecial = true;
-                this.currentSequence = SEQUENCES.TITLE;
-                this.sequenceIndex = 1;
+                this.currentSequence = Sequences.TitleEnd;
+                this.sequenceIndex = 3;
                 this._state = State.SpecialStartSequence;
             } else {
                 this._state =
@@ -478,18 +480,10 @@ export default class Tokenizer {
             if (this.isSpecial) {
                 this._state = State.InSpecialTag;
                 this.sequenceIndex = 0;
-
-                if (this.currentSequence === SEQUENCES.SCRIPT) {
-                    this.currentSequence = SEQUENCES.SCRIPT_END;
-                } else if (this.currentSequence === SEQUENCES.STYLE) {
-                    this.currentSequence = SEQUENCES.STYLE_END;
-                } else if (this.currentSequence === SEQUENCES.TITLE) {
-                    this.currentSequence = SEQUENCES.TITLE_END;
-                }
             } else {
                 this._state = State.Text;
-                this.baseState = State.Text;
             }
+            this.baseState = this._state;
             this.sectionStart = this._index + 1;
         } else if (c === CharCodes.Slash) {
             this._state = State.InSelfClosingTag;
@@ -608,7 +602,7 @@ export default class Tokenizer {
     private stateBeforeComment(c: number) {
         if (c === CharCodes.Dash) {
             this._state = State.InCommentLike;
-            this.currentSequence = SEQUENCES.COMMENT_END;
+            this.currentSequence = Sequences.CommentEnd;
             // Allow short comments (eg. <!-->)
             this.sequenceIndex = 2;
             this.sectionStart = this._index + 1;
@@ -627,13 +621,13 @@ export default class Tokenizer {
         const lower = c | 0x20;
         if (lower === CharCodes.LowerC) {
             this.isSpecial = true;
-            this.currentSequence = SEQUENCES.SCRIPT;
-            this.sequenceIndex = 2;
+            this.currentSequence = Sequences.ScriptEnd;
+            this.sequenceIndex = 4;
             this._state = State.SpecialStartSequence;
         } else if (lower === CharCodes.LowerT) {
             this.isSpecial = true;
-            this.currentSequence = SEQUENCES.STYLE;
-            this.sequenceIndex = 2;
+            this.currentSequence = Sequences.StyleEnd;
+            this.sequenceIndex = 4;
             this._state = State.SpecialStartSequence;
         } else {
             this._state = State.InTagName;
@@ -887,7 +881,7 @@ export default class Tokenizer {
     private handleTrailingData() {
         const data = this.buffer.substr(this.sectionStart);
         if (this._state === State.InCommentLike) {
-            if (this.currentSequence === SEQUENCES.CDATA_END) {
+            if (this.currentSequence === Sequences.CdataEnd) {
                 this.cbs.oncdata(data);
             } else {
                 this.cbs.oncomment(data);

From 27cf34dbb12b187bf8e7fed24ed0b6426d0d486f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com>
Date: Thu, 11 Nov 2021 01:06:01 +0000
Subject: [PATCH 12/12] Don't set `baseState` if already set

---
 src/Tokenizer.ts | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
index c966bb39..06d3f008 100644
--- a/src/Tokenizer.ts
+++ b/src/Tokenizer.ts
@@ -246,7 +246,6 @@ export default class Tokenizer {
             if (this._index > this.sectionStart) {
                 this.cbs.ontext(this.getSection());
             }
-            this.baseState = State.Text;
             this._state = State.BeforeEntity;
             this.sectionStart = this._index;
         }
@@ -306,7 +305,6 @@ export default class Tokenizer {
                     if (this._index > this.sectionStart) {
                         this.cbs.ontext(this.getSection());
                     }
-                    this.baseState = State.InSpecialTag;
                     this._state = State.BeforeEntity;
                     this.sectionStart = this._index;
                 }