diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
index 66c077bd..06d3f008 100644
--- a/src/Tokenizer.ts
+++ b/src/Tokenizer.ts
@@ -26,11 +26,8 @@ const enum CharCodes {
Eq = 0x3d, // "="
Gt = 0x3e, // ">"
Questionmark = 0x3f, // "?"
- UpperC = 0x43, // "C"
LowerC = 0x63, // "c"
- UpperS = 0x53, // "S"
LowerS = 0x73, // "s"
- UpperT = 0x54, // "T"
LowerT = 0x74, // "t"
UpperA = 0x41, // "A"
LowerA = 0x61, // "a"
@@ -38,6 +35,7 @@ const enum CharCodes {
LowerF = 0x66, // "f"
UpperZ = 0x5a, // "Z"
LowerZ = 0x7a, // "z"
+ LowerX = 0x78, // "x"
OpeningSquareBracket = 0x5b, // "["
ClosingSquareBracket = 0x5d, // "]"
}
@@ -68,58 +66,16 @@ const enum State {
// Processing instructions
InProcessingInstruction, // ?
- // Comments
+ // Comments & CDATA
BeforeComment,
- InComment,
+ CDATASequence,
InSpecialComment,
- AfterComment1,
- AfterComment2,
-
- // Cdata
- BeforeCdata1, // [
- BeforeCdata2, // C
- BeforeCdata3, // D
- BeforeCdata4, // A
- BeforeCdata5, // T
- BeforeCdata6, // A
- InCdata, // [
- AfterCdata1, // ]
- AfterCdata2, // ]
+ InCommentLike,
// Special tags
- BeforeSpecialS, // S
- BeforeSpecialSEnd, // S
-
- BeforeScript1, // C
- BeforeScript2, // R
- BeforeScript3, // I
- BeforeScript4, // P
- BeforeScript5, // T
- AfterScript1, // C
- AfterScript2, // R
- AfterScript3, // I
- AfterScript4, // P
- AfterScript5, // T
-
- BeforeStyle1, // T
- BeforeStyle2, // Y
- BeforeStyle3, // L
- BeforeStyle4, // E
- AfterStyle1, // T
- AfterStyle2, // Y
- AfterStyle3, // L
- AfterStyle4, // E
-
- BeforeSpecialT, // T
- BeforeSpecialTEnd, // T
- BeforeTitle1, // I
- BeforeTitle2, // T
- BeforeTitle3, // L
- BeforeTitle4, // E
- AfterTitle1, // I
- AfterTitle2, // T
- AfterTitle3, // L
- AfterTitle4, // E
+ BeforeSpecialS, // Decide if we deal with ` this.sectionStart) {
this.cbs.ontext(this.getSection());
}
this._state = State.BeforeTagName;
this.sectionStart = this._index;
- } else if (
- this.decodeEntities &&
- c === CharCodes.Amp &&
- (this.special === Special.None || this.special === Special.Title)
- ) {
+ } else if (this.decodeEntities && c === CharCodes.Amp) {
if (this._index > this.sectionStart) {
this.cbs.ontext(this.getSection());
}
- this.baseState = State.Text;
this._state = State.BeforeEntity;
this.sectionStart = this._index;
}
}
+
+ private currentSequence!: Uint16Array;
+ private sequenceIndex = 0;
+ private stateSpecialStartSequence(c: number) {
+ const isEnd = this.sequenceIndex === this.currentSequence.length;
+ const isMatch = isEnd
+ ? // If we are at the end of the sequence, make sure the tag name has ended
+ isEndOfTagSection(c)
+ : // Otherwise, do a case-insensitive comparison
+ (c | 0x20) === this.currentSequence[this.sequenceIndex];
+
+ if (!isMatch) {
+ this.isSpecial = false;
+ } else if (!isEnd) {
+ this.sequenceIndex++;
+ return;
+ }
+
+ this.sequenceIndex = 0;
+ this._state = State.InTagName;
+ this.stateInTagName(c);
+ }
+
+ /** Look for an end tag. For
tags, also decode entities. */
+ private stateInSpecialTag(c: number) {
+ if (this.sequenceIndex === this.currentSequence.length) {
+ if (c === CharCodes.Gt || isWhitespace(c)) {
+ const endOfText = this._index - this.currentSequence.length;
+
+ if (this.sectionStart < endOfText) {
+ // Spoof the index so that reported locations match up.
+ const actualIndex = this._index;
+ this._index = endOfText;
+ this.cbs.ontext(this.getSection());
+ this._index = actualIndex;
+ }
+
+ this.isSpecial = false;
+ this.sectionStart = endOfText + 2; // Skip over the ``
+ this.stateInClosingTagName(c);
+ return; // We are done; skip the rest of the function.
+ }
+
+ this.sequenceIndex = 0;
+ }
+
+ if ((c | 0x20) === this.currentSequence[this.sequenceIndex]) {
+ this.sequenceIndex += 1;
+ } else if (this.sequenceIndex === 0) {
+ if (this.currentSequence === Sequences.TitleEnd) {
+ // We have to parse entities in tags.
+ if (this.decodeEntities && c === CharCodes.Amp) {
+ if (this._index > this.sectionStart) {
+ this.cbs.ontext(this.getSection());
+ }
+ this._state = State.BeforeEntity;
+ this.sectionStart = this._index;
+ }
+ } else if (this.fastForwardTo(CharCodes.Lt)) {
+ // Outside of tags, we can fast-forward.
+ this.sequenceIndex = 1;
+ }
+ } else {
+ // If we see a `<`, set the sequence index to 1; useful for eg. `<`.
+ this.sequenceIndex = Number(c === CharCodes.Lt);
+ }
+ }
+
+ private stateCDATASequence(c: number) {
+ if (c === Sequences.Cdata[this.sequenceIndex]) {
+ if (++this.sequenceIndex === Sequences.Cdata.length) {
+ this._state = State.InCommentLike;
+ this.currentSequence = Sequences.CdataEnd;
+ this.sequenceIndex = 0;
+ this.sectionStart = this._index + 1;
+ }
+ } else {
+ this.sequenceIndex = 0;
+ this._state = State.InDeclaration;
+ this.stateInDeclaration(c); // Reconsume the character
+ }
+ }
+
+ /**
+ * When we wait for one specific character, we can speed things up
+ * by skipping through the buffer until we find it.
+ *
+ * @returns Whether the character was found.
+ */
+ private fastForwardTo(c: number): boolean {
+ while (++this._index < this.buffer.length) {
+ if (this.buffer.charCodeAt(this._index) === c) {
+ return true;
+ }
+ }
+
+ /*
+ * We increment the index at the end of the `parse` loop,
+ * so set it to `buffer.length - 1` here.
+ *
+ * TODO: Refactor `parse` to increment index before calling states.
+ */
+ this._index = this.buffer.length - 1;
+
+ return false;
+ }
+
+ /**
+ * Comments and CDATA end with `-->` and `]]>`.
+ *
+ * Their common qualities are:
+ * - Their end sequences have a distinct character they start with.
+ * - That character is then repeated, so we have to check multiple repeats.
+ * - All characters but the start character of the sequence can be skipped.
+ */
+ private stateInCommentLike(c: number) {
+ if (c === this.currentSequence[this.sequenceIndex]) {
+ if (++this.sequenceIndex === this.currentSequence.length) {
+ // Remove 2 trailing chars
+ const section = this.buffer.slice(
+ this.sectionStart,
+ this._index - 2
+ );
+
+ if (this.currentSequence === Sequences.CdataEnd) {
+ this.cbs.oncdata(section);
+ } else {
+ this.cbs.oncomment(section);
+ }
+
+ this.sequenceIndex = 0;
+ this.sectionStart = this._index + 1;
+ this._state = State.Text;
+ }
+ } else if (this.sequenceIndex === 0) {
+ // Fast-forward to the first character of the sequence
+ if (this.fastForwardTo(this.currentSequence[0])) {
+ this.sequenceIndex = 1;
+ }
+ } else if (c !== this.currentSequence[this.sequenceIndex - 1]) {
+ // Allow long sequences, eg. --->, ]]]>
+ this.sequenceIndex = 0;
+ }
+ }
+
/**
* HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
*
@@ -389,9 +402,7 @@ export default class Tokenizer {
* We allow anything that wouldn't end the tag.
*/
private isTagStartChar(c: number) {
- return (
- isASCIIAlpha(c) || (this.xmlMode && !endOfTagSectionChars.has(c))
- );
+ return isASCIIAlpha(c) || (this.xmlMode && !isEndOfTagSection(c));
}
private stateBeforeTagName(c: number) {
if (c === CharCodes.Slash) {
@@ -399,11 +410,7 @@ export default class Tokenizer {
} else if (c === CharCodes.Lt) {
this.cbs.ontext(this.getSection());
this.sectionStart = this._index;
- } else if (
- c === CharCodes.Gt ||
- this.special !== Special.None ||
- whitespaceChars.has(c)
- ) {
+ } else if (c === CharCodes.Gt || isWhitespace(c)) {
this._state = State.Text;
} else if (c === CharCodes.ExclamationMark) {
this._state = State.BeforeDeclaration;
@@ -414,19 +421,23 @@ export default class Tokenizer {
} else if (!this.isTagStartChar(c)) {
this._state = State.Text;
} else {
- this._state =
- !this.xmlMode &&
- (c === CharCodes.LowerS || c === CharCodes.UpperS)
- ? State.BeforeSpecialS
- : !this.xmlMode &&
- (c === CharCodes.LowerT || c === CharCodes.UpperT)
- ? State.BeforeSpecialT
- : State.InTagName;
+ const lower = c | 0x20;
this.sectionStart = this._index;
+ if (!this.xmlMode && lower === CharCodes.LowerT) {
+ this.isSpecial = true;
+ this.currentSequence = Sequences.TitleEnd;
+ this.sequenceIndex = 3;
+ this._state = State.SpecialStartSequence;
+ } else {
+ this._state =
+ !this.xmlMode && lower === CharCodes.LowerS
+ ? State.BeforeSpecialS
+ : State.InTagName;
+ }
}
}
private stateInTagName(c: number) {
- if (endOfTagSectionChars.has(c)) {
+ if (isEndOfTagSection(c)) {
this.cbs.onopentagname(this.getSection());
this.sectionStart = -1;
this._state = State.BeforeAttributeName;
@@ -434,25 +445,10 @@ export default class Tokenizer {
}
}
private stateBeforeClosingTagName(c: number) {
- if (whitespaceChars.has(c)) {
+ if (isWhitespace(c)) {
// Ignore
} else if (c === CharCodes.Gt) {
this._state = State.Text;
- } else if (this.special !== Special.None) {
- if (
- this.special !== Special.Title &&
- (c === CharCodes.LowerS || c === CharCodes.UpperS)
- ) {
- this._state = State.BeforeSpecialSEnd;
- } else if (
- this.special === Special.Title &&
- (c === CharCodes.LowerT || c === CharCodes.UpperT)
- ) {
- this._state = State.BeforeSpecialTEnd;
- } else {
- this._state = State.Text;
- this.stateText(c);
- }
} else if (!this.isTagStartChar(c)) {
this._state = State.InSpecialComment;
this.sectionStart = this._index;
@@ -462,7 +458,7 @@ export default class Tokenizer {
}
}
private stateInClosingTagName(c: number) {
- if (c === CharCodes.Gt || whitespaceChars.has(c)) {
+ if (c === CharCodes.Gt || isWhitespace(c)) {
this.cbs.onclosetag(this.getSection());
this.sectionStart = -1;
this._state = State.AfterClosingTagName;
@@ -471,7 +467,7 @@ export default class Tokenizer {
}
private stateAfterClosingTagName(c: number) {
// Skip everything until ">"
- if (c === CharCodes.Gt) {
+ if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this._state = State.Text;
this.sectionStart = this._index + 1;
}
@@ -479,11 +475,17 @@ export default class Tokenizer {
private stateBeforeAttributeName(c: number) {
if (c === CharCodes.Gt) {
this.cbs.onopentagend();
- this._state = State.Text;
+ if (this.isSpecial) {
+ this._state = State.InSpecialTag;
+ this.sequenceIndex = 0;
+ } else {
+ this._state = State.Text;
+ }
+ this.baseState = this._state;
this.sectionStart = this._index + 1;
} else if (c === CharCodes.Slash) {
this._state = State.InSelfClosingTag;
- } else if (!whitespaceChars.has(c)) {
+ } else if (!isWhitespace(c)) {
this._state = State.InAttributeName;
this.sectionStart = this._index;
}
@@ -492,15 +494,16 @@ export default class Tokenizer {
if (c === CharCodes.Gt) {
this.cbs.onselfclosingtag();
this._state = State.Text;
+ this.baseState = State.Text;
this.sectionStart = this._index + 1;
- this.special = Special.None; // Reset special state, in case of self-closing special tags
- } else if (!whitespaceChars.has(c)) {
+ this.isSpecial = false; // Reset special state, in case of self-closing special tags
+ } else if (!isWhitespace(c)) {
this._state = State.BeforeAttributeName;
this.stateBeforeAttributeName(c);
}
}
private stateInAttributeName(c: number) {
- if (c === CharCodes.Eq || endOfTagSectionChars.has(c)) {
+ if (c === CharCodes.Eq || isEndOfTagSection(c)) {
this.cbs.onattribname(this.getSection());
this.sectionStart = -1;
this._state = State.AfterAttributeName;
@@ -514,7 +517,7 @@ export default class Tokenizer {
this.cbs.onattribend(undefined);
this._state = State.BeforeAttributeName;
this.stateBeforeAttributeName(c);
- } else if (!whitespaceChars.has(c)) {
+ } else if (!isWhitespace(c)) {
this.cbs.onattribend(undefined);
this._state = State.InAttributeName;
this.sectionStart = this._index;
@@ -527,14 +530,17 @@ export default class Tokenizer {
} else if (c === CharCodes.SingleQuote) {
this._state = State.InAttributeValueSq;
this.sectionStart = this._index + 1;
- } else if (!whitespaceChars.has(c)) {
+ } else if (!isWhitespace(c)) {
this.sectionStart = this._index;
this._state = State.InAttributeValueNq;
this.stateInAttributeValueNoQuotes(c); // Reconsume token
}
}
private handleInAttributeValue(c: number, quote: number) {
- if (c === quote) {
+ if (
+ c === quote ||
+ (!this.decodeEntities && this.fastForwardTo(quote))
+ ) {
this.cbs.onattribdata(this.getSection());
this.sectionStart = -1;
this.cbs.onattribend(String.fromCharCode(quote));
@@ -553,7 +559,7 @@ export default class Tokenizer {
this.handleInAttributeValue(c, CharCodes.SingleQuote);
}
private stateInAttributeValueNoQuotes(c: number) {
- if (whitespaceChars.has(c) || c === CharCodes.Gt) {
+ if (isWhitespace(c) || c === CharCodes.Gt) {
this.cbs.onattribdata(this.getSection());
this.sectionStart = -1;
this.cbs.onattribend(null);
@@ -567,22 +573,25 @@ export default class Tokenizer {
}
}
private stateBeforeDeclaration(c: number) {
- this._state =
- c === CharCodes.OpeningSquareBracket
- ? State.BeforeCdata1
- : c === CharCodes.Dash
- ? State.BeforeComment
- : State.InDeclaration;
+ if (c === CharCodes.OpeningSquareBracket) {
+ this._state = State.CDATASequence;
+ this.sequenceIndex = 0;
+ } else {
+ this._state =
+ c === CharCodes.Dash
+ ? State.BeforeComment
+ : State.InDeclaration;
+ }
}
private stateInDeclaration(c: number) {
- if (c === CharCodes.Gt) {
+ if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.ondeclaration(this.getSection());
this._state = State.Text;
this.sectionStart = this._index + 1;
}
}
private stateInProcessingInstruction(c: number) {
- if (c === CharCodes.Gt) {
+ if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.onprocessinginstruction(this.getSection());
this._state = State.Text;
this.sectionStart = this._index + 1;
@@ -590,111 +599,39 @@ export default class Tokenizer {
}
private stateBeforeComment(c: number) {
if (c === CharCodes.Dash) {
- this._state = State.InComment;
+ this._state = State.InCommentLike;
+ this.currentSequence = Sequences.CommentEnd;
+ // Allow short comments (eg. )
+ this.sequenceIndex = 2;
this.sectionStart = this._index + 1;
} else {
this._state = State.InDeclaration;
}
}
- private stateInComment(c: number) {
- if (c === CharCodes.Dash) this._state = State.AfterComment1;
- }
private stateInSpecialComment(c: number) {
- if (c === CharCodes.Gt) {
+ if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.oncomment(this.getSection());
this._state = State.Text;
this.sectionStart = this._index + 1;
}
}
- private stateAfterComment1(c: number) {
- if (c === CharCodes.Dash) {
- this._state = State.AfterComment2;
- } else {
- this._state = State.InComment;
- }
- }
- private stateAfterComment2(c: number) {
- if (c === CharCodes.Gt) {
- // Remove 2 trailing chars
- this.cbs.oncomment(
- this.buffer.substring(this.sectionStart, this._index - 2)
- );
- this._state = State.Text;
- this.sectionStart = this._index + 1;
- } else if (c !== CharCodes.Dash) {
- this._state = State.InComment;
- }
- // Else: stay in AFTER_COMMENT_2 (`--->`)
- }
- private stateBeforeCdata6(c: number) {
- if (c === CharCodes.OpeningSquareBracket) {
- this._state = State.InCdata;
- this.sectionStart = this._index + 1;
- } else {
- this._state = State.InDeclaration;
- this.stateInDeclaration(c);
- }
- }
- private stateInCdata(c: number) {
- if (c === CharCodes.ClosingSquareBracket)
- this._state = State.AfterCdata1;
- }
- private stateAfterCdata1(c: number) {
- if (c === CharCodes.ClosingSquareBracket)
- this._state = State.AfterCdata2;
- else this._state = State.InCdata;
- }
- private stateAfterCdata2(c: number) {
- if (c === CharCodes.Gt) {
- // Remove 2 trailing chars
- this.cbs.oncdata(
- this.buffer.substring(this.sectionStart, this._index - 2)
- );
- this._state = State.Text;
- this.sectionStart = this._index + 1;
- } else if (c !== CharCodes.ClosingSquareBracket) {
- this._state = State.InCdata;
- }
- // Else: stay in AFTER_CDATA_2 (`]]]>`)
- }
private stateBeforeSpecialS(c: number) {
- if (c === CharCodes.LowerC || c === CharCodes.UpperC) {
- this._state = State.BeforeScript1;
- } else if (c === CharCodes.LowerT || c === CharCodes.UpperT) {
- this._state = State.BeforeStyle1;
+ const lower = c | 0x20;
+ if (lower === CharCodes.LowerC) {
+ this.isSpecial = true;
+ this.currentSequence = Sequences.ScriptEnd;
+ this.sequenceIndex = 4;
+ this._state = State.SpecialStartSequence;
+ } else if (lower === CharCodes.LowerT) {
+ this.isSpecial = true;
+ this.currentSequence = Sequences.StyleEnd;
+ this.sequenceIndex = 4;
+ this._state = State.SpecialStartSequence;
} else {
this._state = State.InTagName;
this.stateInTagName(c); // Consume the token again
}
}
- private stateBeforeSpecialSEnd(c: number) {
- if (
- this.special === Special.Script &&
- (c === CharCodes.LowerC || c === CharCodes.UpperC)
- ) {
- this._state = State.AfterScript1;
- } else if (
- this.special === Special.Style &&
- (c === CharCodes.LowerT || c === CharCodes.UpperT)
- ) {
- this._state = State.AfterStyle1;
- } else this._state = State.Text;
- }
- private stateBeforeSpecialLast(c: number, special: Special) {
- if (endOfTagSectionChars.has(c)) {
- this.special = special;
- }
- this._state = State.InTagName;
- this.stateInTagName(c); // Consume the token again
- }
- private stateAfterSpecialLast(c: number, sectionStartOffset: number) {
- if (c === CharCodes.Gt || whitespaceChars.has(c)) {
- this.sectionStart = this._index - sectionStartOffset;
- this.special = Special.None;
- this._state = State.InClosingTagName;
- this.stateInClosingTagName(c); // Reconsume the token
- } else this._state = State.Text;
- }
private trieIndex = 0;
private trieCurrent = 0;
@@ -768,6 +705,15 @@ export default class Tokenizer {
this._state = this.baseState;
}
+ private stateBeforeNumericEntity(c: number) {
+ if ((c | 0x20) === CharCodes.LowerX) {
+ this._state = State.InHexEntity;
+ } else {
+ this._state = State.InNumericEntity;
+ this.stateInNumericEntity(c);
+ }
+ }
+
private decodeNumericEntity(base: 10 | 16, strict: boolean) {
const sectionStart = this.sectionStart + 2 + (base >> 4);
if (sectionStart !== this._index) {
@@ -809,7 +755,11 @@ export default class Tokenizer {
}
private allowLegacyEntity() {
- return !this.xmlMode && this.baseState === State.Text;
+ return (
+ !this.xmlMode &&
+ (this.baseState === State.Text ||
+ this.baseState === State.InSpecialTag)
+ );
}
/**
@@ -819,8 +769,10 @@ export default class Tokenizer {
// If we are inside of text, emit what we already have.
if (
this.running &&
- this._state === State.Text &&
- this.sectionStart !== this._index
+ this.sectionStart !== this._index &&
+ (this._state === State.Text ||
+ (this._state === State.InSpecialTag &&
+ this.sequenceIndex === 0))
) {
// TODO: We could emit attribute data here as well.
this.cbs.ontext(this.buffer.substr(this.sectionStart));
@@ -838,22 +790,32 @@ export default class Tokenizer {
}
}
+ private shouldContinue() {
+ return this._index < this.buffer.length && this.running;
+ }
+
/**
* Iterates through the buffer, calling the function corresponding to the current state.
*
* States that are more likely to be hit are higher up, as a performance improvement.
*/
private parse() {
- while (this._index < this.buffer.length && this.running) {
+ while (this.shouldContinue()) {
const c = this.buffer.charCodeAt(this._index);
if (this._state === State.Text) {
this.stateText(c);
+ } else if (this._state === State.SpecialStartSequence) {
+ this.stateSpecialStartSequence(c);
+ } else if (this._state === State.InSpecialTag) {
+ this.stateInSpecialTag(c);
+ } else if (this._state === State.CDATASequence) {
+ this.stateCDATASequence(c);
} else if (this._state === State.InAttributeValueDq) {
this.stateInAttributeValueDoubleQuotes(c);
} else if (this._state === State.InAttributeName) {
this.stateInAttributeName(c);
- } else if (this._state === State.InComment) {
- this.stateInComment(c);
+ } else if (this._state === State.InCommentLike) {
+ this.stateInCommentLike(c);
} else if (this._state === State.InSpecialComment) {
this.stateInSpecialComment(c);
} else if (this._state === State.BeforeAttributeName) {
@@ -876,8 +838,6 @@ export default class Tokenizer {
this.stateAfterClosingTagName(c);
} else if (this._state === State.BeforeSpecialS) {
this.stateBeforeSpecialS(c);
- } else if (this._state === State.AfterComment1) {
- this.stateAfterComment1(c);
} else if (this._state === State.InAttributeValueNq) {
this.stateInAttributeValueNoQuotes(c);
} else if (this._state === State.InSelfClosingTag) {
@@ -886,99 +846,21 @@ export default class Tokenizer {
this.stateInDeclaration(c);
} else if (this._state === State.BeforeDeclaration) {
this.stateBeforeDeclaration(c);
- } else if (this._state === State.AfterComment2) {
- this.stateAfterComment2(c);
} else if (this._state === State.BeforeComment) {
this.stateBeforeComment(c);
- } else if (this._state === State.BeforeSpecialSEnd) {
- this.stateBeforeSpecialSEnd(c);
- } else if (this._state === State.BeforeSpecialTEnd) {
- stateBeforeSpecialTEnd(this, c);
- } else if (this._state === State.AfterScript1) {
- stateAfterScript1(this, c);
- } else if (this._state === State.AfterScript2) {
- stateAfterScript2(this, c);
- } else if (this._state === State.AfterScript3) {
- stateAfterScript3(this, c);
- } else if (this._state === State.BeforeScript1) {
- stateBeforeScript1(this, c);
- } else if (this._state === State.BeforeScript2) {
- stateBeforeScript2(this, c);
- } else if (this._state === State.BeforeScript3) {
- stateBeforeScript3(this, c);
- } else if (this._state === State.BeforeScript4) {
- stateBeforeScript4(this, c);
- } else if (this._state === State.BeforeScript5) {
- this.stateBeforeSpecialLast(c, Special.Script);
- } else if (this._state === State.AfterScript4) {
- stateAfterScript4(this, c);
- } else if (this._state === State.AfterScript5) {
- this.stateAfterSpecialLast(c, 6);
- } else if (this._state === State.BeforeStyle1) {
- stateBeforeStyle1(this, c);
- } else if (this._state === State.InCdata) {
- this.stateInCdata(c);
- } else if (this._state === State.BeforeStyle2) {
- stateBeforeStyle2(this, c);
- } else if (this._state === State.BeforeStyle3) {
- stateBeforeStyle3(this, c);
- } else if (this._state === State.BeforeStyle4) {
- this.stateBeforeSpecialLast(c, Special.Style);
- } else if (this._state === State.AfterStyle1) {
- stateAfterStyle1(this, c);
- } else if (this._state === State.AfterStyle2) {
- stateAfterStyle2(this, c);
- } else if (this._state === State.AfterStyle3) {
- stateAfterStyle3(this, c);
- } else if (this._state === State.AfterStyle4) {
- this.stateAfterSpecialLast(c, 5);
- } else if (this._state === State.BeforeSpecialT) {
- stateBeforeSpecialT(this, c);
- } else if (this._state === State.BeforeTitle1) {
- stateBeforeTitle1(this, c);
- } else if (this._state === State.BeforeTitle2) {
- stateBeforeTitle2(this, c);
- } else if (this._state === State.BeforeTitle3) {
- stateBeforeTitle3(this, c);
- } else if (this._state === State.BeforeTitle4) {
- this.stateBeforeSpecialLast(c, Special.Title);
- } else if (this._state === State.AfterTitle1) {
- stateAfterTitle1(this, c);
- } else if (this._state === State.AfterTitle2) {
- stateAfterTitle2(this, c);
- } else if (this._state === State.AfterTitle3) {
- stateAfterTitle3(this, c);
- } else if (this._state === State.AfterTitle4) {
- this.stateAfterSpecialLast(c, 5);
} else if (this._state === State.InProcessingInstruction) {
this.stateInProcessingInstruction(c);
} else if (this._state === State.InNamedEntity) {
this.stateInNamedEntity(c);
- } else if (this._state === State.BeforeCdata1) {
- stateBeforeCdata1(this, c);
} else if (this._state === State.BeforeEntity) {
this.stateBeforeEntity(c);
- } else if (this._state === State.BeforeCdata2) {
- stateBeforeCdata2(this, c);
- } else if (this._state === State.BeforeCdata3) {
- stateBeforeCdata3(this, c);
- } else if (this._state === State.AfterCdata1) {
- this.stateAfterCdata1(c);
- } else if (this._state === State.AfterCdata2) {
- this.stateAfterCdata2(c);
- } else if (this._state === State.BeforeCdata4) {
- stateBeforeCdata4(this, c);
- } else if (this._state === State.BeforeCdata5) {
- stateBeforeCdata5(this, c);
- } else if (this._state === State.BeforeCdata6) {
- this.stateBeforeCdata6(c);
} else if (this._state === State.InHexEntity) {
this.stateInHexEntity(c);
} else if (this._state === State.InNumericEntity) {
this.stateInNumericEntity(c);
} else {
// `this._state === State.BeforeNumericEntity`
- stateBeforeNumericEntity(this, c);
+ this.stateBeforeNumericEntity(c);
}
this._index++;
}
@@ -996,18 +878,12 @@ export default class Tokenizer {
/** Handle any trailing data. */
private handleTrailingData() {
const data = this.buffer.substr(this.sectionStart);
- if (
- this._state === State.InCdata ||
- this._state === State.AfterCdata1 ||
- this._state === State.AfterCdata2
- ) {
- this.cbs.oncdata(data);
- } else if (
- this._state === State.InComment ||
- this._state === State.AfterComment1 ||
- this._state === State.AfterComment2
- ) {
- this.cbs.oncomment(data);
+ if (this._state === State.InCommentLike) {
+ if (this.currentSequence === Sequences.CdataEnd) {
+ this.cbs.oncdata(data);
+ } else {
+ this.cbs.oncomment(data);
+ }
} else if (this._state === State.InNamedEntity && !this.xmlMode) {
// Increase excess for EOF
this.trieExcess++;
@@ -1046,7 +922,10 @@ export default class Tokenizer {
return this.buffer.substring(this.sectionStart, this._index);
}
private emitPartial(value: string) {
- if (this.baseState !== State.Text) {
+ if (
+ this.baseState !== State.Text &&
+ this.baseState !== State.InSpecialTag
+ ) {
this.cbs.onattribdata(value);
} else {
this.cbs.ontext(value);