You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2710 lines
96 KiB
2710 lines
96 KiB
import { Preprocessor } from './preprocessor.js'; |
|
import { CODE_POINTS as $, SEQUENCES as $$, REPLACEMENT_CHARACTER, isSurrogate, isUndefinedCodePoint, isControlCodePoint, } from '../common/unicode.js'; |
|
import { TokenType, getTokenAttr, } from '../common/token.js'; |
|
import { htmlDecodeTree, EntityDecoder, DecodingMode } from 'entities/lib/decode.js'; |
|
import { ERR } from '../common/error-codes.js'; |
|
import { TAG_ID, getTagID } from '../common/html.js'; |
|
//States |
|
var State; |
|
(function (State) { |
|
State[State["DATA"] = 0] = "DATA"; |
|
State[State["RCDATA"] = 1] = "RCDATA"; |
|
State[State["RAWTEXT"] = 2] = "RAWTEXT"; |
|
State[State["SCRIPT_DATA"] = 3] = "SCRIPT_DATA"; |
|
State[State["PLAINTEXT"] = 4] = "PLAINTEXT"; |
|
State[State["TAG_OPEN"] = 5] = "TAG_OPEN"; |
|
State[State["END_TAG_OPEN"] = 6] = "END_TAG_OPEN"; |
|
State[State["TAG_NAME"] = 7] = "TAG_NAME"; |
|
State[State["RCDATA_LESS_THAN_SIGN"] = 8] = "RCDATA_LESS_THAN_SIGN"; |
|
State[State["RCDATA_END_TAG_OPEN"] = 9] = "RCDATA_END_TAG_OPEN"; |
|
State[State["RCDATA_END_TAG_NAME"] = 10] = "RCDATA_END_TAG_NAME"; |
|
State[State["RAWTEXT_LESS_THAN_SIGN"] = 11] = "RAWTEXT_LESS_THAN_SIGN"; |
|
State[State["RAWTEXT_END_TAG_OPEN"] = 12] = "RAWTEXT_END_TAG_OPEN"; |
|
State[State["RAWTEXT_END_TAG_NAME"] = 13] = "RAWTEXT_END_TAG_NAME"; |
|
State[State["SCRIPT_DATA_LESS_THAN_SIGN"] = 14] = "SCRIPT_DATA_LESS_THAN_SIGN"; |
|
State[State["SCRIPT_DATA_END_TAG_OPEN"] = 15] = "SCRIPT_DATA_END_TAG_OPEN"; |
|
State[State["SCRIPT_DATA_END_TAG_NAME"] = 16] = "SCRIPT_DATA_END_TAG_NAME"; |
|
State[State["SCRIPT_DATA_ESCAPE_START"] = 17] = "SCRIPT_DATA_ESCAPE_START"; |
|
State[State["SCRIPT_DATA_ESCAPE_START_DASH"] = 18] = "SCRIPT_DATA_ESCAPE_START_DASH"; |
|
State[State["SCRIPT_DATA_ESCAPED"] = 19] = "SCRIPT_DATA_ESCAPED"; |
|
State[State["SCRIPT_DATA_ESCAPED_DASH"] = 20] = "SCRIPT_DATA_ESCAPED_DASH"; |
|
State[State["SCRIPT_DATA_ESCAPED_DASH_DASH"] = 21] = "SCRIPT_DATA_ESCAPED_DASH_DASH"; |
|
State[State["SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN"] = 22] = "SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN"; |
|
State[State["SCRIPT_DATA_ESCAPED_END_TAG_OPEN"] = 23] = "SCRIPT_DATA_ESCAPED_END_TAG_OPEN"; |
|
State[State["SCRIPT_DATA_ESCAPED_END_TAG_NAME"] = 24] = "SCRIPT_DATA_ESCAPED_END_TAG_NAME"; |
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPE_START"] = 25] = "SCRIPT_DATA_DOUBLE_ESCAPE_START"; |
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPED"] = 26] = "SCRIPT_DATA_DOUBLE_ESCAPED"; |
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH"] = 27] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH"; |
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH"] = 28] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH"; |
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN"] = 29] = "SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN"; |
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPE_END"] = 30] = "SCRIPT_DATA_DOUBLE_ESCAPE_END"; |
|
State[State["BEFORE_ATTRIBUTE_NAME"] = 31] = "BEFORE_ATTRIBUTE_NAME"; |
|
State[State["ATTRIBUTE_NAME"] = 32] = "ATTRIBUTE_NAME"; |
|
State[State["AFTER_ATTRIBUTE_NAME"] = 33] = "AFTER_ATTRIBUTE_NAME"; |
|
State[State["BEFORE_ATTRIBUTE_VALUE"] = 34] = "BEFORE_ATTRIBUTE_VALUE"; |
|
State[State["ATTRIBUTE_VALUE_DOUBLE_QUOTED"] = 35] = "ATTRIBUTE_VALUE_DOUBLE_QUOTED"; |
|
State[State["ATTRIBUTE_VALUE_SINGLE_QUOTED"] = 36] = "ATTRIBUTE_VALUE_SINGLE_QUOTED"; |
|
State[State["ATTRIBUTE_VALUE_UNQUOTED"] = 37] = "ATTRIBUTE_VALUE_UNQUOTED"; |
|
State[State["AFTER_ATTRIBUTE_VALUE_QUOTED"] = 38] = "AFTER_ATTRIBUTE_VALUE_QUOTED"; |
|
State[State["SELF_CLOSING_START_TAG"] = 39] = "SELF_CLOSING_START_TAG"; |
|
State[State["BOGUS_COMMENT"] = 40] = "BOGUS_COMMENT"; |
|
State[State["MARKUP_DECLARATION_OPEN"] = 41] = "MARKUP_DECLARATION_OPEN"; |
|
State[State["COMMENT_START"] = 42] = "COMMENT_START"; |
|
State[State["COMMENT_START_DASH"] = 43] = "COMMENT_START_DASH"; |
|
State[State["COMMENT"] = 44] = "COMMENT"; |
|
State[State["COMMENT_LESS_THAN_SIGN"] = 45] = "COMMENT_LESS_THAN_SIGN"; |
|
State[State["COMMENT_LESS_THAN_SIGN_BANG"] = 46] = "COMMENT_LESS_THAN_SIGN_BANG"; |
|
State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH"] = 47] = "COMMENT_LESS_THAN_SIGN_BANG_DASH"; |
|
State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH"] = 48] = "COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH"; |
|
State[State["COMMENT_END_DASH"] = 49] = "COMMENT_END_DASH"; |
|
State[State["COMMENT_END"] = 50] = "COMMENT_END"; |
|
State[State["COMMENT_END_BANG"] = 51] = "COMMENT_END_BANG"; |
|
State[State["DOCTYPE"] = 52] = "DOCTYPE"; |
|
State[State["BEFORE_DOCTYPE_NAME"] = 53] = "BEFORE_DOCTYPE_NAME"; |
|
State[State["DOCTYPE_NAME"] = 54] = "DOCTYPE_NAME"; |
|
State[State["AFTER_DOCTYPE_NAME"] = 55] = "AFTER_DOCTYPE_NAME"; |
|
State[State["AFTER_DOCTYPE_PUBLIC_KEYWORD"] = 56] = "AFTER_DOCTYPE_PUBLIC_KEYWORD"; |
|
State[State["BEFORE_DOCTYPE_PUBLIC_IDENTIFIER"] = 57] = "BEFORE_DOCTYPE_PUBLIC_IDENTIFIER"; |
|
State[State["DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED"] = 58] = "DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED"; |
|
State[State["DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED"] = 59] = "DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED"; |
|
State[State["AFTER_DOCTYPE_PUBLIC_IDENTIFIER"] = 60] = "AFTER_DOCTYPE_PUBLIC_IDENTIFIER"; |
|
State[State["BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS"] = 61] = "BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS"; |
|
State[State["AFTER_DOCTYPE_SYSTEM_KEYWORD"] = 62] = "AFTER_DOCTYPE_SYSTEM_KEYWORD"; |
|
State[State["BEFORE_DOCTYPE_SYSTEM_IDENTIFIER"] = 63] = "BEFORE_DOCTYPE_SYSTEM_IDENTIFIER"; |
|
State[State["DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED"] = 64] = "DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED"; |
|
State[State["DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED"] = 65] = "DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED"; |
|
State[State["AFTER_DOCTYPE_SYSTEM_IDENTIFIER"] = 66] = "AFTER_DOCTYPE_SYSTEM_IDENTIFIER"; |
|
State[State["BOGUS_DOCTYPE"] = 67] = "BOGUS_DOCTYPE"; |
|
State[State["CDATA_SECTION"] = 68] = "CDATA_SECTION"; |
|
State[State["CDATA_SECTION_BRACKET"] = 69] = "CDATA_SECTION_BRACKET"; |
|
State[State["CDATA_SECTION_END"] = 70] = "CDATA_SECTION_END"; |
|
State[State["CHARACTER_REFERENCE"] = 71] = "CHARACTER_REFERENCE"; |
|
State[State["AMBIGUOUS_AMPERSAND"] = 72] = "AMBIGUOUS_AMPERSAND"; |
|
})(State || (State = {})); |
|
//Tokenizer initial states for different modes |
|
export const TokenizerMode = { |
|
DATA: State.DATA, |
|
RCDATA: State.RCDATA, |
|
RAWTEXT: State.RAWTEXT, |
|
SCRIPT_DATA: State.SCRIPT_DATA, |
|
PLAINTEXT: State.PLAINTEXT, |
|
CDATA_SECTION: State.CDATA_SECTION, |
|
}; |
|
//Utils |
|
//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline |
|
//this functions if they will be situated in another module due to context switch. |
|
//Always perform inlining check before modifying this functions ('node --trace-inlining'). |
|
function isAsciiDigit(cp) { |
|
return cp >= $.DIGIT_0 && cp <= $.DIGIT_9; |
|
} |
|
function isAsciiUpper(cp) { |
|
return cp >= $.LATIN_CAPITAL_A && cp <= $.LATIN_CAPITAL_Z; |
|
} |
|
function isAsciiLower(cp) { |
|
return cp >= $.LATIN_SMALL_A && cp <= $.LATIN_SMALL_Z; |
|
} |
|
function isAsciiLetter(cp) { |
|
return isAsciiLower(cp) || isAsciiUpper(cp); |
|
} |
|
function isAsciiAlphaNumeric(cp) { |
|
return isAsciiLetter(cp) || isAsciiDigit(cp); |
|
} |
|
function toAsciiLower(cp) { |
|
return cp + 32; |
|
} |
|
function isWhitespace(cp) { |
|
return cp === $.SPACE || cp === $.LINE_FEED || cp === $.TABULATION || cp === $.FORM_FEED; |
|
} |
|
function isScriptDataDoubleEscapeSequenceEnd(cp) { |
|
return isWhitespace(cp) || cp === $.SOLIDUS || cp === $.GREATER_THAN_SIGN; |
|
} |
|
function getErrorForNumericCharacterReference(code) { |
|
if (code === $.NULL) { |
|
return ERR.nullCharacterReference; |
|
} |
|
else if (code > 1114111) { |
|
return ERR.characterReferenceOutsideUnicodeRange; |
|
} |
|
else if (isSurrogate(code)) { |
|
return ERR.surrogateCharacterReference; |
|
} |
|
else if (isUndefinedCodePoint(code)) { |
|
return ERR.noncharacterCharacterReference; |
|
} |
|
else if (isControlCodePoint(code) || code === $.CARRIAGE_RETURN) { |
|
return ERR.controlCharacterReference; |
|
} |
|
return null; |
|
} |
|
//Tokenizer |
|
export class Tokenizer { |
|
constructor(options, handler) { |
|
this.options = options; |
|
this.handler = handler; |
|
this.paused = false; |
|
/** Ensures that the parsing loop isn't run multiple times at once. */ |
|
this.inLoop = false; |
|
/** |
|
* Indicates that the current adjusted node exists, is not an element in the HTML namespace, |
|
* and that it is not an integration point for either MathML or HTML. |
|
* |
|
* @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction} |
|
*/ |
|
this.inForeignNode = false; |
|
this.lastStartTagName = ''; |
|
this.active = false; |
|
this.state = State.DATA; |
|
this.returnState = State.DATA; |
|
this.entityStartPos = 0; |
|
this.consumedAfterSnapshot = -1; |
|
this.currentCharacterToken = null; |
|
this.currentToken = null; |
|
this.currentAttr = { name: '', value: '' }; |
|
this.preprocessor = new Preprocessor(handler); |
|
this.currentLocation = this.getCurrentLocation(-1); |
|
this.entityDecoder = new EntityDecoder(htmlDecodeTree, (cp, consumed) => { |
|
// Note: Set `pos` _before_ flushing, as flushing might drop |
|
// the current chunk and invalidate `entityStartPos`. |
|
this.preprocessor.pos = this.entityStartPos + consumed - 1; |
|
this._flushCodePointConsumedAsCharacterReference(cp); |
|
}, handler.onParseError |
|
? { |
|
missingSemicolonAfterCharacterReference: () => { |
|
this._err(ERR.missingSemicolonAfterCharacterReference, 1); |
|
}, |
|
absenceOfDigitsInNumericCharacterReference: (consumed) => { |
|
this._err(ERR.absenceOfDigitsInNumericCharacterReference, this.entityStartPos - this.preprocessor.pos + consumed); |
|
}, |
|
validateNumericCharacterReference: (code) => { |
|
const error = getErrorForNumericCharacterReference(code); |
|
if (error) |
|
this._err(error, 1); |
|
}, |
|
} |
|
: undefined); |
|
} |
|
//Errors |
|
_err(code, cpOffset = 0) { |
|
var _a, _b; |
|
(_b = (_a = this.handler).onParseError) === null || _b === void 0 ? void 0 : _b.call(_a, this.preprocessor.getError(code, cpOffset)); |
|
} |
|
// NOTE: `offset` may never run across line boundaries. |
|
getCurrentLocation(offset) { |
|
if (!this.options.sourceCodeLocationInfo) { |
|
return null; |
|
} |
|
return { |
|
startLine: this.preprocessor.line, |
|
startCol: this.preprocessor.col - offset, |
|
startOffset: this.preprocessor.offset - offset, |
|
endLine: -1, |
|
endCol: -1, |
|
endOffset: -1, |
|
}; |
|
} |
|
_runParsingLoop() { |
|
if (this.inLoop) |
|
return; |
|
this.inLoop = true; |
|
while (this.active && !this.paused) { |
|
this.consumedAfterSnapshot = 0; |
|
const cp = this._consume(); |
|
if (!this._ensureHibernation()) { |
|
this._callState(cp); |
|
} |
|
} |
|
this.inLoop = false; |
|
} |
|
//API |
|
pause() { |
|
this.paused = true; |
|
} |
|
resume(writeCallback) { |
|
if (!this.paused) { |
|
throw new Error('Parser was already resumed'); |
|
} |
|
this.paused = false; |
|
// Necessary for synchronous resume. |
|
if (this.inLoop) |
|
return; |
|
this._runParsingLoop(); |
|
if (!this.paused) { |
|
writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback(); |
|
} |
|
} |
|
write(chunk, isLastChunk, writeCallback) { |
|
this.active = true; |
|
this.preprocessor.write(chunk, isLastChunk); |
|
this._runParsingLoop(); |
|
if (!this.paused) { |
|
writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback(); |
|
} |
|
} |
|
insertHtmlAtCurrentPos(chunk) { |
|
this.active = true; |
|
this.preprocessor.insertHtmlAtCurrentPos(chunk); |
|
this._runParsingLoop(); |
|
} |
|
//Hibernation |
|
_ensureHibernation() { |
|
if (this.preprocessor.endOfChunkHit) { |
|
this.preprocessor.retreat(this.consumedAfterSnapshot); |
|
this.consumedAfterSnapshot = 0; |
|
this.active = false; |
|
return true; |
|
} |
|
return false; |
|
} |
|
//Consumption |
|
_consume() { |
|
this.consumedAfterSnapshot++; |
|
return this.preprocessor.advance(); |
|
} |
|
_advanceBy(count) { |
|
this.consumedAfterSnapshot += count; |
|
for (let i = 0; i < count; i++) { |
|
this.preprocessor.advance(); |
|
} |
|
} |
|
_consumeSequenceIfMatch(pattern, caseSensitive) { |
|
if (this.preprocessor.startsWith(pattern, caseSensitive)) { |
|
// We will already have consumed one character before calling this method. |
|
this._advanceBy(pattern.length - 1); |
|
return true; |
|
} |
|
return false; |
|
} |
|
//Token creation |
|
_createStartTagToken() { |
|
this.currentToken = { |
|
type: TokenType.START_TAG, |
|
tagName: '', |
|
tagID: TAG_ID.UNKNOWN, |
|
selfClosing: false, |
|
ackSelfClosing: false, |
|
attrs: [], |
|
location: this.getCurrentLocation(1), |
|
}; |
|
} |
|
_createEndTagToken() { |
|
this.currentToken = { |
|
type: TokenType.END_TAG, |
|
tagName: '', |
|
tagID: TAG_ID.UNKNOWN, |
|
selfClosing: false, |
|
ackSelfClosing: false, |
|
attrs: [], |
|
location: this.getCurrentLocation(2), |
|
}; |
|
} |
|
_createCommentToken(offset) { |
|
this.currentToken = { |
|
type: TokenType.COMMENT, |
|
data: '', |
|
location: this.getCurrentLocation(offset), |
|
}; |
|
} |
|
_createDoctypeToken(initialName) { |
|
this.currentToken = { |
|
type: TokenType.DOCTYPE, |
|
name: initialName, |
|
forceQuirks: false, |
|
publicId: null, |
|
systemId: null, |
|
location: this.currentLocation, |
|
}; |
|
} |
|
_createCharacterToken(type, chars) { |
|
this.currentCharacterToken = { |
|
type, |
|
chars, |
|
location: this.currentLocation, |
|
}; |
|
} |
|
//Tag attributes |
|
_createAttr(attrNameFirstCh) { |
|
this.currentAttr = { |
|
name: attrNameFirstCh, |
|
value: '', |
|
}; |
|
this.currentLocation = this.getCurrentLocation(0); |
|
} |
|
_leaveAttrName() { |
|
var _a; |
|
var _b; |
|
const token = this.currentToken; |
|
if (getTokenAttr(token, this.currentAttr.name) === null) { |
|
token.attrs.push(this.currentAttr); |
|
if (token.location && this.currentLocation) { |
|
const attrLocations = ((_a = (_b = token.location).attrs) !== null && _a !== void 0 ? _a : (_b.attrs = Object.create(null))); |
|
attrLocations[this.currentAttr.name] = this.currentLocation; |
|
// Set end location |
|
this._leaveAttrValue(); |
|
} |
|
} |
|
else { |
|
this._err(ERR.duplicateAttribute); |
|
} |
|
} |
|
_leaveAttrValue() { |
|
if (this.currentLocation) { |
|
this.currentLocation.endLine = this.preprocessor.line; |
|
this.currentLocation.endCol = this.preprocessor.col; |
|
this.currentLocation.endOffset = this.preprocessor.offset; |
|
} |
|
} |
|
//Token emission |
|
prepareToken(ct) { |
|
this._emitCurrentCharacterToken(ct.location); |
|
this.currentToken = null; |
|
if (ct.location) { |
|
ct.location.endLine = this.preprocessor.line; |
|
ct.location.endCol = this.preprocessor.col + 1; |
|
ct.location.endOffset = this.preprocessor.offset + 1; |
|
} |
|
this.currentLocation = this.getCurrentLocation(-1); |
|
} |
|
emitCurrentTagToken() { |
|
const ct = this.currentToken; |
|
this.prepareToken(ct); |
|
ct.tagID = getTagID(ct.tagName); |
|
if (ct.type === TokenType.START_TAG) { |
|
this.lastStartTagName = ct.tagName; |
|
this.handler.onStartTag(ct); |
|
} |
|
else { |
|
if (ct.attrs.length > 0) { |
|
this._err(ERR.endTagWithAttributes); |
|
} |
|
if (ct.selfClosing) { |
|
this._err(ERR.endTagWithTrailingSolidus); |
|
} |
|
this.handler.onEndTag(ct); |
|
} |
|
this.preprocessor.dropParsedChunk(); |
|
} |
|
emitCurrentComment(ct) { |
|
this.prepareToken(ct); |
|
this.handler.onComment(ct); |
|
this.preprocessor.dropParsedChunk(); |
|
} |
|
emitCurrentDoctype(ct) { |
|
this.prepareToken(ct); |
|
this.handler.onDoctype(ct); |
|
this.preprocessor.dropParsedChunk(); |
|
} |
|
_emitCurrentCharacterToken(nextLocation) { |
|
if (this.currentCharacterToken) { |
|
//NOTE: if we have a pending character token, make it's end location equal to the |
|
//current token's start location. |
|
if (nextLocation && this.currentCharacterToken.location) { |
|
this.currentCharacterToken.location.endLine = nextLocation.startLine; |
|
this.currentCharacterToken.location.endCol = nextLocation.startCol; |
|
this.currentCharacterToken.location.endOffset = nextLocation.startOffset; |
|
} |
|
switch (this.currentCharacterToken.type) { |
|
case TokenType.CHARACTER: { |
|
this.handler.onCharacter(this.currentCharacterToken); |
|
break; |
|
} |
|
case TokenType.NULL_CHARACTER: { |
|
this.handler.onNullCharacter(this.currentCharacterToken); |
|
break; |
|
} |
|
case TokenType.WHITESPACE_CHARACTER: { |
|
this.handler.onWhitespaceCharacter(this.currentCharacterToken); |
|
break; |
|
} |
|
} |
|
this.currentCharacterToken = null; |
|
} |
|
} |
|
_emitEOFToken() { |
|
const location = this.getCurrentLocation(0); |
|
if (location) { |
|
location.endLine = location.startLine; |
|
location.endCol = location.startCol; |
|
location.endOffset = location.startOffset; |
|
} |
|
this._emitCurrentCharacterToken(location); |
|
this.handler.onEof({ type: TokenType.EOF, location }); |
|
this.active = false; |
|
} |
|
//Characters emission |
|
//OPTIMIZATION: The specification uses only one type of character token (one token per character). |
|
//This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters. |
|
//If we have a sequence of characters that belong to the same group, the parser can process it |
|
//as a single solid character token. |
|
//So, there are 3 types of character tokens in parse5: |
|
//1)TokenType.NULL_CHARACTER - \u0000-character sequences (e.g. '\u0000\u0000\u0000') |
|
//2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n \r\t \f') |
|
//3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^') |
|
_appendCharToCurrentCharacterToken(type, ch) { |
|
if (this.currentCharacterToken) { |
|
if (this.currentCharacterToken.type === type) { |
|
this.currentCharacterToken.chars += ch; |
|
return; |
|
} |
|
else { |
|
this.currentLocation = this.getCurrentLocation(0); |
|
this._emitCurrentCharacterToken(this.currentLocation); |
|
this.preprocessor.dropParsedChunk(); |
|
} |
|
} |
|
this._createCharacterToken(type, ch); |
|
} |
|
_emitCodePoint(cp) { |
|
const type = isWhitespace(cp) |
|
? TokenType.WHITESPACE_CHARACTER |
|
: cp === $.NULL |
|
? TokenType.NULL_CHARACTER |
|
: TokenType.CHARACTER; |
|
this._appendCharToCurrentCharacterToken(type, String.fromCodePoint(cp)); |
|
} |
|
//NOTE: used when we emit characters explicitly. |
|
//This is always for non-whitespace and non-null characters, which allows us to avoid additional checks. |
|
_emitChars(ch) { |
|
this._appendCharToCurrentCharacterToken(TokenType.CHARACTER, ch); |
|
} |
|
// Character reference helpers |
|
_startCharacterReference() { |
|
this.returnState = this.state; |
|
this.state = State.CHARACTER_REFERENCE; |
|
this.entityStartPos = this.preprocessor.pos; |
|
this.entityDecoder.startEntity(this._isCharacterReferenceInAttribute() ? DecodingMode.Attribute : DecodingMode.Legacy); |
|
} |
|
_isCharacterReferenceInAttribute() { |
|
return (this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED || |
|
this.returnState === State.ATTRIBUTE_VALUE_SINGLE_QUOTED || |
|
this.returnState === State.ATTRIBUTE_VALUE_UNQUOTED); |
|
} |
|
_flushCodePointConsumedAsCharacterReference(cp) { |
|
if (this._isCharacterReferenceInAttribute()) { |
|
this.currentAttr.value += String.fromCodePoint(cp); |
|
} |
|
else { |
|
this._emitCodePoint(cp); |
|
} |
|
} |
|
// Calling states this way turns out to be much faster than any other approach. |
|
_callState(cp) { |
|
switch (this.state) { |
|
case State.DATA: { |
|
this._stateData(cp); |
|
break; |
|
} |
|
case State.RCDATA: { |
|
this._stateRcdata(cp); |
|
break; |
|
} |
|
case State.RAWTEXT: { |
|
this._stateRawtext(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA: { |
|
this._stateScriptData(cp); |
|
break; |
|
} |
|
case State.PLAINTEXT: { |
|
this._statePlaintext(cp); |
|
break; |
|
} |
|
case State.TAG_OPEN: { |
|
this._stateTagOpen(cp); |
|
break; |
|
} |
|
case State.END_TAG_OPEN: { |
|
this._stateEndTagOpen(cp); |
|
break; |
|
} |
|
case State.TAG_NAME: { |
|
this._stateTagName(cp); |
|
break; |
|
} |
|
case State.RCDATA_LESS_THAN_SIGN: { |
|
this._stateRcdataLessThanSign(cp); |
|
break; |
|
} |
|
case State.RCDATA_END_TAG_OPEN: { |
|
this._stateRcdataEndTagOpen(cp); |
|
break; |
|
} |
|
case State.RCDATA_END_TAG_NAME: { |
|
this._stateRcdataEndTagName(cp); |
|
break; |
|
} |
|
case State.RAWTEXT_LESS_THAN_SIGN: { |
|
this._stateRawtextLessThanSign(cp); |
|
break; |
|
} |
|
case State.RAWTEXT_END_TAG_OPEN: { |
|
this._stateRawtextEndTagOpen(cp); |
|
break; |
|
} |
|
case State.RAWTEXT_END_TAG_NAME: { |
|
this._stateRawtextEndTagName(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_LESS_THAN_SIGN: { |
|
this._stateScriptDataLessThanSign(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_END_TAG_OPEN: { |
|
this._stateScriptDataEndTagOpen(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_END_TAG_NAME: { |
|
this._stateScriptDataEndTagName(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_ESCAPE_START: { |
|
this._stateScriptDataEscapeStart(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_ESCAPE_START_DASH: { |
|
this._stateScriptDataEscapeStartDash(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_ESCAPED: { |
|
this._stateScriptDataEscaped(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_ESCAPED_DASH: { |
|
this._stateScriptDataEscapedDash(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_ESCAPED_DASH_DASH: { |
|
this._stateScriptDataEscapedDashDash(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: { |
|
this._stateScriptDataEscapedLessThanSign(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN: { |
|
this._stateScriptDataEscapedEndTagOpen(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_ESCAPED_END_TAG_NAME: { |
|
this._stateScriptDataEscapedEndTagName(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_DOUBLE_ESCAPE_START: { |
|
this._stateScriptDataDoubleEscapeStart(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_DOUBLE_ESCAPED: { |
|
this._stateScriptDataDoubleEscaped(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH: { |
|
this._stateScriptDataDoubleEscapedDash(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: { |
|
this._stateScriptDataDoubleEscapedDashDash(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: { |
|
this._stateScriptDataDoubleEscapedLessThanSign(cp); |
|
break; |
|
} |
|
case State.SCRIPT_DATA_DOUBLE_ESCAPE_END: { |
|
this._stateScriptDataDoubleEscapeEnd(cp); |
|
break; |
|
} |
|
case State.BEFORE_ATTRIBUTE_NAME: { |
|
this._stateBeforeAttributeName(cp); |
|
break; |
|
} |
|
case State.ATTRIBUTE_NAME: { |
|
this._stateAttributeName(cp); |
|
break; |
|
} |
|
case State.AFTER_ATTRIBUTE_NAME: { |
|
this._stateAfterAttributeName(cp); |
|
break; |
|
} |
|
case State.BEFORE_ATTRIBUTE_VALUE: { |
|
this._stateBeforeAttributeValue(cp); |
|
break; |
|
} |
|
case State.ATTRIBUTE_VALUE_DOUBLE_QUOTED: { |
|
this._stateAttributeValueDoubleQuoted(cp); |
|
break; |
|
} |
|
case State.ATTRIBUTE_VALUE_SINGLE_QUOTED: { |
|
this._stateAttributeValueSingleQuoted(cp); |
|
break; |
|
} |
|
case State.ATTRIBUTE_VALUE_UNQUOTED: { |
|
this._stateAttributeValueUnquoted(cp); |
|
break; |
|
} |
|
case State.AFTER_ATTRIBUTE_VALUE_QUOTED: { |
|
this._stateAfterAttributeValueQuoted(cp); |
|
break; |
|
} |
|
case State.SELF_CLOSING_START_TAG: { |
|
this._stateSelfClosingStartTag(cp); |
|
break; |
|
} |
|
case State.BOGUS_COMMENT: { |
|
this._stateBogusComment(cp); |
|
break; |
|
} |
|
case State.MARKUP_DECLARATION_OPEN: { |
|
this._stateMarkupDeclarationOpen(cp); |
|
break; |
|
} |
|
case State.COMMENT_START: { |
|
this._stateCommentStart(cp); |
|
break; |
|
} |
|
case State.COMMENT_START_DASH: { |
|
this._stateCommentStartDash(cp); |
|
break; |
|
} |
|
case State.COMMENT: { |
|
this._stateComment(cp); |
|
break; |
|
} |
|
case State.COMMENT_LESS_THAN_SIGN: { |
|
this._stateCommentLessThanSign(cp); |
|
break; |
|
} |
|
case State.COMMENT_LESS_THAN_SIGN_BANG: { |
|
this._stateCommentLessThanSignBang(cp); |
|
break; |
|
} |
|
case State.COMMENT_LESS_THAN_SIGN_BANG_DASH: { |
|
this._stateCommentLessThanSignBangDash(cp); |
|
break; |
|
} |
|
case State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH: { |
|
this._stateCommentLessThanSignBangDashDash(cp); |
|
break; |
|
} |
|
case State.COMMENT_END_DASH: { |
|
this._stateCommentEndDash(cp); |
|
break; |
|
} |
|
case State.COMMENT_END: { |
|
this._stateCommentEnd(cp); |
|
break; |
|
} |
|
case State.COMMENT_END_BANG: { |
|
this._stateCommentEndBang(cp); |
|
break; |
|
} |
|
case State.DOCTYPE: { |
|
this._stateDoctype(cp); |
|
break; |
|
} |
|
case State.BEFORE_DOCTYPE_NAME: { |
|
this._stateBeforeDoctypeName(cp); |
|
break; |
|
} |
|
case State.DOCTYPE_NAME: { |
|
this._stateDoctypeName(cp); |
|
break; |
|
} |
|
case State.AFTER_DOCTYPE_NAME: { |
|
this._stateAfterDoctypeName(cp); |
|
break; |
|
} |
|
case State.AFTER_DOCTYPE_PUBLIC_KEYWORD: { |
|
this._stateAfterDoctypePublicKeyword(cp); |
|
break; |
|
} |
|
case State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: { |
|
this._stateBeforeDoctypePublicIdentifier(cp); |
|
break; |
|
} |
|
case State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: { |
|
this._stateDoctypePublicIdentifierDoubleQuoted(cp); |
|
break; |
|
} |
|
case State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: { |
|
this._stateDoctypePublicIdentifierSingleQuoted(cp); |
|
break; |
|
} |
|
case State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER: { |
|
this._stateAfterDoctypePublicIdentifier(cp); |
|
break; |
|
} |
|
case State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: { |
|
this._stateBetweenDoctypePublicAndSystemIdentifiers(cp); |
|
break; |
|
} |
|
case State.AFTER_DOCTYPE_SYSTEM_KEYWORD: { |
|
this._stateAfterDoctypeSystemKeyword(cp); |
|
break; |
|
} |
|
case State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: { |
|
this._stateBeforeDoctypeSystemIdentifier(cp); |
|
break; |
|
} |
|
case State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: { |
|
this._stateDoctypeSystemIdentifierDoubleQuoted(cp); |
|
break; |
|
} |
|
case State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: { |
|
this._stateDoctypeSystemIdentifierSingleQuoted(cp); |
|
break; |
|
} |
|
case State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER: { |
|
this._stateAfterDoctypeSystemIdentifier(cp); |
|
break; |
|
} |
|
case State.BOGUS_DOCTYPE: { |
|
this._stateBogusDoctype(cp); |
|
break; |
|
} |
|
case State.CDATA_SECTION: { |
|
this._stateCdataSection(cp); |
|
break; |
|
} |
|
case State.CDATA_SECTION_BRACKET: { |
|
this._stateCdataSectionBracket(cp); |
|
break; |
|
} |
|
case State.CDATA_SECTION_END: { |
|
this._stateCdataSectionEnd(cp); |
|
break; |
|
} |
|
case State.CHARACTER_REFERENCE: { |
|
this._stateCharacterReference(); |
|
break; |
|
} |
|
case State.AMBIGUOUS_AMPERSAND: { |
|
this._stateAmbiguousAmpersand(cp); |
|
break; |
|
} |
|
default: { |
|
throw new Error('Unknown state'); |
|
} |
|
} |
|
} |
|
// State machine |
|
// Data state |
|
//------------------------------------------------------------------ |
|
_stateData(cp) { |
|
switch (cp) { |
|
case $.LESS_THAN_SIGN: { |
|
this.state = State.TAG_OPEN; |
|
break; |
|
} |
|
case $.AMPERSAND: { |
|
this._startCharacterReference(); |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
this._emitCodePoint(cp); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._emitCodePoint(cp); |
|
} |
|
} |
|
} |
|
// RCDATA state |
|
//------------------------------------------------------------------ |
|
_stateRcdata(cp) { |
|
switch (cp) { |
|
case $.AMPERSAND: { |
|
this._startCharacterReference(); |
|
break; |
|
} |
|
case $.LESS_THAN_SIGN: { |
|
this.state = State.RCDATA_LESS_THAN_SIGN; |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
this._emitChars(REPLACEMENT_CHARACTER); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._emitCodePoint(cp); |
|
} |
|
} |
|
} |
|
// RAWTEXT state |
|
//------------------------------------------------------------------ |
|
_stateRawtext(cp) { |
|
switch (cp) { |
|
case $.LESS_THAN_SIGN: { |
|
this.state = State.RAWTEXT_LESS_THAN_SIGN; |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
this._emitChars(REPLACEMENT_CHARACTER); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._emitCodePoint(cp); |
|
} |
|
} |
|
} |
|
// Script data state |
|
//------------------------------------------------------------------ |
|
_stateScriptData(cp) { |
|
switch (cp) { |
|
case $.LESS_THAN_SIGN: { |
|
this.state = State.SCRIPT_DATA_LESS_THAN_SIGN; |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
this._emitChars(REPLACEMENT_CHARACTER); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._emitCodePoint(cp); |
|
} |
|
} |
|
} |
|
// PLAINTEXT state |
|
//------------------------------------------------------------------ |
|
_statePlaintext(cp) { |
|
switch (cp) { |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
this._emitChars(REPLACEMENT_CHARACTER); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._emitCodePoint(cp); |
|
} |
|
} |
|
} |
|
// Tag open state |
|
//------------------------------------------------------------------ |
|
_stateTagOpen(cp) { |
|
if (isAsciiLetter(cp)) { |
|
this._createStartTagToken(); |
|
this.state = State.TAG_NAME; |
|
this._stateTagName(cp); |
|
} |
|
else |
|
switch (cp) { |
|
case $.EXCLAMATION_MARK: { |
|
this.state = State.MARKUP_DECLARATION_OPEN; |
|
break; |
|
} |
|
case $.SOLIDUS: { |
|
this.state = State.END_TAG_OPEN; |
|
break; |
|
} |
|
case $.QUESTION_MARK: { |
|
this._err(ERR.unexpectedQuestionMarkInsteadOfTagName); |
|
this._createCommentToken(1); |
|
this.state = State.BOGUS_COMMENT; |
|
this._stateBogusComment(cp); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofBeforeTagName); |
|
this._emitChars('<'); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._err(ERR.invalidFirstCharacterOfTagName); |
|
this._emitChars('<'); |
|
this.state = State.DATA; |
|
this._stateData(cp); |
|
} |
|
} |
|
} |
|
// End tag open state |
|
//------------------------------------------------------------------ |
|
_stateEndTagOpen(cp) { |
|
if (isAsciiLetter(cp)) { |
|
this._createEndTagToken(); |
|
this.state = State.TAG_NAME; |
|
this._stateTagName(cp); |
|
} |
|
else |
|
switch (cp) { |
|
case $.GREATER_THAN_SIGN: { |
|
this._err(ERR.missingEndTagName); |
|
this.state = State.DATA; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofBeforeTagName); |
|
this._emitChars('</'); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._err(ERR.invalidFirstCharacterOfTagName); |
|
this._createCommentToken(2); |
|
this.state = State.BOGUS_COMMENT; |
|
this._stateBogusComment(cp); |
|
} |
|
} |
|
} |
|
// Tag name state |
|
//------------------------------------------------------------------ |
|
_stateTagName(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
this.state = State.BEFORE_ATTRIBUTE_NAME; |
|
break; |
|
} |
|
case $.SOLIDUS: { |
|
this.state = State.SELF_CLOSING_START_TAG; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this.state = State.DATA; |
|
this.emitCurrentTagToken(); |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
token.tagName += REPLACEMENT_CHARACTER; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInTag); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
token.tagName += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp); |
|
} |
|
} |
|
} |
|
// RCDATA less-than sign state |
|
//------------------------------------------------------------------ |
|
_stateRcdataLessThanSign(cp) { |
|
if (cp === $.SOLIDUS) { |
|
this.state = State.RCDATA_END_TAG_OPEN; |
|
} |
|
else { |
|
this._emitChars('<'); |
|
this.state = State.RCDATA; |
|
this._stateRcdata(cp); |
|
} |
|
} |
|
// RCDATA end tag open state |
|
//------------------------------------------------------------------ |
|
_stateRcdataEndTagOpen(cp) { |
|
if (isAsciiLetter(cp)) { |
|
this.state = State.RCDATA_END_TAG_NAME; |
|
this._stateRcdataEndTagName(cp); |
|
} |
|
else { |
|
this._emitChars('</'); |
|
this.state = State.RCDATA; |
|
this._stateRcdata(cp); |
|
} |
|
} |
|
handleSpecialEndTag(_cp) { |
|
if (!this.preprocessor.startsWith(this.lastStartTagName, false)) { |
|
return !this._ensureHibernation(); |
|
} |
|
this._createEndTagToken(); |
|
const token = this.currentToken; |
|
token.tagName = this.lastStartTagName; |
|
const cp = this.preprocessor.peek(this.lastStartTagName.length); |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
this._advanceBy(this.lastStartTagName.length); |
|
this.state = State.BEFORE_ATTRIBUTE_NAME; |
|
return false; |
|
} |
|
case $.SOLIDUS: { |
|
this._advanceBy(this.lastStartTagName.length); |
|
this.state = State.SELF_CLOSING_START_TAG; |
|
return false; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this._advanceBy(this.lastStartTagName.length); |
|
this.emitCurrentTagToken(); |
|
this.state = State.DATA; |
|
return false; |
|
} |
|
default: { |
|
return !this._ensureHibernation(); |
|
} |
|
} |
|
} |
|
// RCDATA end tag name state |
|
//------------------------------------------------------------------ |
|
_stateRcdataEndTagName(cp) { |
|
if (this.handleSpecialEndTag(cp)) { |
|
this._emitChars('</'); |
|
this.state = State.RCDATA; |
|
this._stateRcdata(cp); |
|
} |
|
} |
|
// RAWTEXT less-than sign state |
|
//------------------------------------------------------------------ |
|
_stateRawtextLessThanSign(cp) { |
|
if (cp === $.SOLIDUS) { |
|
this.state = State.RAWTEXT_END_TAG_OPEN; |
|
} |
|
else { |
|
this._emitChars('<'); |
|
this.state = State.RAWTEXT; |
|
this._stateRawtext(cp); |
|
} |
|
} |
|
// RAWTEXT end tag open state |
|
//------------------------------------------------------------------ |
|
_stateRawtextEndTagOpen(cp) { |
|
if (isAsciiLetter(cp)) { |
|
this.state = State.RAWTEXT_END_TAG_NAME; |
|
this._stateRawtextEndTagName(cp); |
|
} |
|
else { |
|
this._emitChars('</'); |
|
this.state = State.RAWTEXT; |
|
this._stateRawtext(cp); |
|
} |
|
} |
|
// RAWTEXT end tag name state |
|
//------------------------------------------------------------------ |
|
_stateRawtextEndTagName(cp) { |
|
if (this.handleSpecialEndTag(cp)) { |
|
this._emitChars('</'); |
|
this.state = State.RAWTEXT; |
|
this._stateRawtext(cp); |
|
} |
|
} |
|
// Script data less-than sign state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataLessThanSign(cp) { |
|
switch (cp) { |
|
case $.SOLIDUS: { |
|
this.state = State.SCRIPT_DATA_END_TAG_OPEN; |
|
break; |
|
} |
|
case $.EXCLAMATION_MARK: { |
|
this.state = State.SCRIPT_DATA_ESCAPE_START; |
|
this._emitChars('<!'); |
|
break; |
|
} |
|
default: { |
|
this._emitChars('<'); |
|
this.state = State.SCRIPT_DATA; |
|
this._stateScriptData(cp); |
|
} |
|
} |
|
} |
|
// Script data end tag open state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataEndTagOpen(cp) { |
|
if (isAsciiLetter(cp)) { |
|
this.state = State.SCRIPT_DATA_END_TAG_NAME; |
|
this._stateScriptDataEndTagName(cp); |
|
} |
|
else { |
|
this._emitChars('</'); |
|
this.state = State.SCRIPT_DATA; |
|
this._stateScriptData(cp); |
|
} |
|
} |
|
// Script data end tag name state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataEndTagName(cp) { |
|
if (this.handleSpecialEndTag(cp)) { |
|
this._emitChars('</'); |
|
this.state = State.SCRIPT_DATA; |
|
this._stateScriptData(cp); |
|
} |
|
} |
|
// Script data escape start state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataEscapeStart(cp) { |
|
if (cp === $.HYPHEN_MINUS) { |
|
this.state = State.SCRIPT_DATA_ESCAPE_START_DASH; |
|
this._emitChars('-'); |
|
} |
|
else { |
|
this.state = State.SCRIPT_DATA; |
|
this._stateScriptData(cp); |
|
} |
|
} |
|
// Script data escape start dash state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataEscapeStartDash(cp) { |
|
if (cp === $.HYPHEN_MINUS) { |
|
this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH; |
|
this._emitChars('-'); |
|
} |
|
else { |
|
this.state = State.SCRIPT_DATA; |
|
this._stateScriptData(cp); |
|
} |
|
} |
|
// Script data escaped state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataEscaped(cp) { |
|
switch (cp) { |
|
case $.HYPHEN_MINUS: { |
|
this.state = State.SCRIPT_DATA_ESCAPED_DASH; |
|
this._emitChars('-'); |
|
break; |
|
} |
|
case $.LESS_THAN_SIGN: { |
|
this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
this._emitChars(REPLACEMENT_CHARACTER); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInScriptHtmlCommentLikeText); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._emitCodePoint(cp); |
|
} |
|
} |
|
} |
|
// Script data escaped dash state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataEscapedDash(cp) { |
|
switch (cp) { |
|
case $.HYPHEN_MINUS: { |
|
this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH; |
|
this._emitChars('-'); |
|
break; |
|
} |
|
case $.LESS_THAN_SIGN: { |
|
this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
this.state = State.SCRIPT_DATA_ESCAPED; |
|
this._emitChars(REPLACEMENT_CHARACTER); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInScriptHtmlCommentLikeText); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this.state = State.SCRIPT_DATA_ESCAPED; |
|
this._emitCodePoint(cp); |
|
} |
|
} |
|
} |
|
// Script data escaped dash dash state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataEscapedDashDash(cp) { |
|
switch (cp) { |
|
case $.HYPHEN_MINUS: { |
|
this._emitChars('-'); |
|
break; |
|
} |
|
case $.LESS_THAN_SIGN: { |
|
this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this.state = State.SCRIPT_DATA; |
|
this._emitChars('>'); |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
this.state = State.SCRIPT_DATA_ESCAPED; |
|
this._emitChars(REPLACEMENT_CHARACTER); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInScriptHtmlCommentLikeText); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this.state = State.SCRIPT_DATA_ESCAPED; |
|
this._emitCodePoint(cp); |
|
} |
|
} |
|
} |
|
// Script data escaped less-than sign state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataEscapedLessThanSign(cp) { |
|
if (cp === $.SOLIDUS) { |
|
this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN; |
|
} |
|
else if (isAsciiLetter(cp)) { |
|
this._emitChars('<'); |
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_START; |
|
this._stateScriptDataDoubleEscapeStart(cp); |
|
} |
|
else { |
|
this._emitChars('<'); |
|
this.state = State.SCRIPT_DATA_ESCAPED; |
|
this._stateScriptDataEscaped(cp); |
|
} |
|
} |
|
// Script data escaped end tag open state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataEscapedEndTagOpen(cp) { |
|
if (isAsciiLetter(cp)) { |
|
this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_NAME; |
|
this._stateScriptDataEscapedEndTagName(cp); |
|
} |
|
else { |
|
this._emitChars('</'); |
|
this.state = State.SCRIPT_DATA_ESCAPED; |
|
this._stateScriptDataEscaped(cp); |
|
} |
|
} |
|
// Script data escaped end tag name state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataEscapedEndTagName(cp) { |
|
if (this.handleSpecialEndTag(cp)) { |
|
this._emitChars('</'); |
|
this.state = State.SCRIPT_DATA_ESCAPED; |
|
this._stateScriptDataEscaped(cp); |
|
} |
|
} |
|
// Script data double escape start state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataDoubleEscapeStart(cp) { |
|
if (this.preprocessor.startsWith($$.SCRIPT, false) && |
|
isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek($$.SCRIPT.length))) { |
|
this._emitCodePoint(cp); |
|
for (let i = 0; i < $$.SCRIPT.length; i++) { |
|
this._emitCodePoint(this._consume()); |
|
} |
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; |
|
} |
|
else if (!this._ensureHibernation()) { |
|
this.state = State.SCRIPT_DATA_ESCAPED; |
|
this._stateScriptDataEscaped(cp); |
|
} |
|
} |
|
// Script data double escaped state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataDoubleEscaped(cp) { |
|
switch (cp) { |
|
case $.HYPHEN_MINUS: { |
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH; |
|
this._emitChars('-'); |
|
break; |
|
} |
|
case $.LESS_THAN_SIGN: { |
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; |
|
this._emitChars('<'); |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
this._emitChars(REPLACEMENT_CHARACTER); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInScriptHtmlCommentLikeText); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._emitCodePoint(cp); |
|
} |
|
} |
|
} |
|
// Script data double escaped dash state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataDoubleEscapedDash(cp) { |
|
switch (cp) { |
|
case $.HYPHEN_MINUS: { |
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH; |
|
this._emitChars('-'); |
|
break; |
|
} |
|
case $.LESS_THAN_SIGN: { |
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; |
|
this._emitChars('<'); |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; |
|
this._emitChars(REPLACEMENT_CHARACTER); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInScriptHtmlCommentLikeText); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; |
|
this._emitCodePoint(cp); |
|
} |
|
} |
|
} |
|
// Script data double escaped dash dash state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataDoubleEscapedDashDash(cp) { |
|
switch (cp) { |
|
case $.HYPHEN_MINUS: { |
|
this._emitChars('-'); |
|
break; |
|
} |
|
case $.LESS_THAN_SIGN: { |
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; |
|
this._emitChars('<'); |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this.state = State.SCRIPT_DATA; |
|
this._emitChars('>'); |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; |
|
this._emitChars(REPLACEMENT_CHARACTER); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInScriptHtmlCommentLikeText); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; |
|
this._emitCodePoint(cp); |
|
} |
|
} |
|
} |
|
// Script data double escaped less-than sign state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataDoubleEscapedLessThanSign(cp) { |
|
if (cp === $.SOLIDUS) { |
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_END; |
|
this._emitChars('/'); |
|
} |
|
else { |
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; |
|
this._stateScriptDataDoubleEscaped(cp); |
|
} |
|
} |
|
// Script data double escape end state |
|
//------------------------------------------------------------------ |
|
_stateScriptDataDoubleEscapeEnd(cp) { |
|
if (this.preprocessor.startsWith($$.SCRIPT, false) && |
|
isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek($$.SCRIPT.length))) { |
|
this._emitCodePoint(cp); |
|
for (let i = 0; i < $$.SCRIPT.length; i++) { |
|
this._emitCodePoint(this._consume()); |
|
} |
|
this.state = State.SCRIPT_DATA_ESCAPED; |
|
} |
|
else if (!this._ensureHibernation()) { |
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; |
|
this._stateScriptDataDoubleEscaped(cp); |
|
} |
|
} |
|
// Before attribute name state |
|
//------------------------------------------------------------------ |
|
_stateBeforeAttributeName(cp) { |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
// Ignore whitespace |
|
break; |
|
} |
|
case $.SOLIDUS: |
|
case $.GREATER_THAN_SIGN: |
|
case $.EOF: { |
|
this.state = State.AFTER_ATTRIBUTE_NAME; |
|
this._stateAfterAttributeName(cp); |
|
break; |
|
} |
|
case $.EQUALS_SIGN: { |
|
this._err(ERR.unexpectedEqualsSignBeforeAttributeName); |
|
this._createAttr('='); |
|
this.state = State.ATTRIBUTE_NAME; |
|
break; |
|
} |
|
default: { |
|
this._createAttr(''); |
|
this.state = State.ATTRIBUTE_NAME; |
|
this._stateAttributeName(cp); |
|
} |
|
} |
|
} |
|
// Attribute name state |
|
//------------------------------------------------------------------ |
|
_stateAttributeName(cp) { |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: |
|
case $.SOLIDUS: |
|
case $.GREATER_THAN_SIGN: |
|
case $.EOF: { |
|
this._leaveAttrName(); |
|
this.state = State.AFTER_ATTRIBUTE_NAME; |
|
this._stateAfterAttributeName(cp); |
|
break; |
|
} |
|
case $.EQUALS_SIGN: { |
|
this._leaveAttrName(); |
|
this.state = State.BEFORE_ATTRIBUTE_VALUE; |
|
break; |
|
} |
|
case $.QUOTATION_MARK: |
|
case $.APOSTROPHE: |
|
case $.LESS_THAN_SIGN: { |
|
this._err(ERR.unexpectedCharacterInAttributeName); |
|
this.currentAttr.name += String.fromCodePoint(cp); |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
this.currentAttr.name += REPLACEMENT_CHARACTER; |
|
break; |
|
} |
|
default: { |
|
this.currentAttr.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp); |
|
} |
|
} |
|
} |
|
// After attribute name state |
|
//------------------------------------------------------------------ |
|
_stateAfterAttributeName(cp) { |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
// Ignore whitespace |
|
break; |
|
} |
|
case $.SOLIDUS: { |
|
this.state = State.SELF_CLOSING_START_TAG; |
|
break; |
|
} |
|
case $.EQUALS_SIGN: { |
|
this.state = State.BEFORE_ATTRIBUTE_VALUE; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this.state = State.DATA; |
|
this.emitCurrentTagToken(); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInTag); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._createAttr(''); |
|
this.state = State.ATTRIBUTE_NAME; |
|
this._stateAttributeName(cp); |
|
} |
|
} |
|
} |
|
// Before attribute value state |
|
//------------------------------------------------------------------ |
|
_stateBeforeAttributeValue(cp) { |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
// Ignore whitespace |
|
break; |
|
} |
|
case $.QUOTATION_MARK: { |
|
this.state = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED; |
|
break; |
|
} |
|
case $.APOSTROPHE: { |
|
this.state = State.ATTRIBUTE_VALUE_SINGLE_QUOTED; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this._err(ERR.missingAttributeValue); |
|
this.state = State.DATA; |
|
this.emitCurrentTagToken(); |
|
break; |
|
} |
|
default: { |
|
this.state = State.ATTRIBUTE_VALUE_UNQUOTED; |
|
this._stateAttributeValueUnquoted(cp); |
|
} |
|
} |
|
} |
|
// Attribute value (double-quoted) state |
|
//------------------------------------------------------------------ |
|
_stateAttributeValueDoubleQuoted(cp) { |
|
switch (cp) { |
|
case $.QUOTATION_MARK: { |
|
this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED; |
|
break; |
|
} |
|
case $.AMPERSAND: { |
|
this._startCharacterReference(); |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
this.currentAttr.value += REPLACEMENT_CHARACTER; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInTag); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this.currentAttr.value += String.fromCodePoint(cp); |
|
} |
|
} |
|
} |
|
// Attribute value (single-quoted) state |
|
//------------------------------------------------------------------ |
|
_stateAttributeValueSingleQuoted(cp) { |
|
switch (cp) { |
|
case $.APOSTROPHE: { |
|
this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED; |
|
break; |
|
} |
|
case $.AMPERSAND: { |
|
this._startCharacterReference(); |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
this.currentAttr.value += REPLACEMENT_CHARACTER; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInTag); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this.currentAttr.value += String.fromCodePoint(cp); |
|
} |
|
} |
|
} |
|
// Attribute value (unquoted) state |
|
//------------------------------------------------------------------ |
|
_stateAttributeValueUnquoted(cp) { |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
this._leaveAttrValue(); |
|
this.state = State.BEFORE_ATTRIBUTE_NAME; |
|
break; |
|
} |
|
case $.AMPERSAND: { |
|
this._startCharacterReference(); |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this._leaveAttrValue(); |
|
this.state = State.DATA; |
|
this.emitCurrentTagToken(); |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
this.currentAttr.value += REPLACEMENT_CHARACTER; |
|
break; |
|
} |
|
case $.QUOTATION_MARK: |
|
case $.APOSTROPHE: |
|
case $.LESS_THAN_SIGN: |
|
case $.EQUALS_SIGN: |
|
case $.GRAVE_ACCENT: { |
|
this._err(ERR.unexpectedCharacterInUnquotedAttributeValue); |
|
this.currentAttr.value += String.fromCodePoint(cp); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInTag); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this.currentAttr.value += String.fromCodePoint(cp); |
|
} |
|
} |
|
} |
|
// After attribute value (quoted) state |
|
//------------------------------------------------------------------ |
|
_stateAfterAttributeValueQuoted(cp) { |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
this._leaveAttrValue(); |
|
this.state = State.BEFORE_ATTRIBUTE_NAME; |
|
break; |
|
} |
|
case $.SOLIDUS: { |
|
this._leaveAttrValue(); |
|
this.state = State.SELF_CLOSING_START_TAG; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this._leaveAttrValue(); |
|
this.state = State.DATA; |
|
this.emitCurrentTagToken(); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInTag); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._err(ERR.missingWhitespaceBetweenAttributes); |
|
this.state = State.BEFORE_ATTRIBUTE_NAME; |
|
this._stateBeforeAttributeName(cp); |
|
} |
|
} |
|
} |
|
// Self-closing start tag state |
|
//------------------------------------------------------------------ |
|
_stateSelfClosingStartTag(cp) { |
|
switch (cp) { |
|
case $.GREATER_THAN_SIGN: { |
|
const token = this.currentToken; |
|
token.selfClosing = true; |
|
this.state = State.DATA; |
|
this.emitCurrentTagToken(); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInTag); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._err(ERR.unexpectedSolidusInTag); |
|
this.state = State.BEFORE_ATTRIBUTE_NAME; |
|
this._stateBeforeAttributeName(cp); |
|
} |
|
} |
|
} |
|
// Bogus comment state |
|
//------------------------------------------------------------------ |
|
_stateBogusComment(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.GREATER_THAN_SIGN: { |
|
this.state = State.DATA; |
|
this.emitCurrentComment(token); |
|
break; |
|
} |
|
case $.EOF: { |
|
this.emitCurrentComment(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
token.data += REPLACEMENT_CHARACTER; |
|
break; |
|
} |
|
default: { |
|
token.data += String.fromCodePoint(cp); |
|
} |
|
} |
|
} |
|
// Markup declaration open state |
|
//------------------------------------------------------------------ |
|
_stateMarkupDeclarationOpen(cp) { |
|
if (this._consumeSequenceIfMatch($$.DASH_DASH, true)) { |
|
this._createCommentToken($$.DASH_DASH.length + 1); |
|
this.state = State.COMMENT_START; |
|
} |
|
else if (this._consumeSequenceIfMatch($$.DOCTYPE, false)) { |
|
// NOTE: Doctypes tokens are created without fixed offsets. We keep track of the moment a doctype *might* start here. |
|
this.currentLocation = this.getCurrentLocation($$.DOCTYPE.length + 1); |
|
this.state = State.DOCTYPE; |
|
} |
|
else if (this._consumeSequenceIfMatch($$.CDATA_START, true)) { |
|
if (this.inForeignNode) { |
|
this.state = State.CDATA_SECTION; |
|
} |
|
else { |
|
this._err(ERR.cdataInHtmlContent); |
|
this._createCommentToken($$.CDATA_START.length + 1); |
|
this.currentToken.data = '[CDATA['; |
|
this.state = State.BOGUS_COMMENT; |
|
} |
|
} |
|
//NOTE: Sequence lookups can be abrupted by hibernation. In that case, lookup |
|
//results are no longer valid and we will need to start over. |
|
else if (!this._ensureHibernation()) { |
|
this._err(ERR.incorrectlyOpenedComment); |
|
this._createCommentToken(2); |
|
this.state = State.BOGUS_COMMENT; |
|
this._stateBogusComment(cp); |
|
} |
|
} |
|
// Comment start state |
|
//------------------------------------------------------------------ |
|
_stateCommentStart(cp) { |
|
switch (cp) { |
|
case $.HYPHEN_MINUS: { |
|
this.state = State.COMMENT_START_DASH; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this._err(ERR.abruptClosingOfEmptyComment); |
|
this.state = State.DATA; |
|
const token = this.currentToken; |
|
this.emitCurrentComment(token); |
|
break; |
|
} |
|
default: { |
|
this.state = State.COMMENT; |
|
this._stateComment(cp); |
|
} |
|
} |
|
} |
|
// Comment start dash state |
|
//------------------------------------------------------------------ |
|
_stateCommentStartDash(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.HYPHEN_MINUS: { |
|
this.state = State.COMMENT_END; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this._err(ERR.abruptClosingOfEmptyComment); |
|
this.state = State.DATA; |
|
this.emitCurrentComment(token); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInComment); |
|
this.emitCurrentComment(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
token.data += '-'; |
|
this.state = State.COMMENT; |
|
this._stateComment(cp); |
|
} |
|
} |
|
} |
|
// Comment state |
|
//------------------------------------------------------------------ |
|
_stateComment(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.HYPHEN_MINUS: { |
|
this.state = State.COMMENT_END_DASH; |
|
break; |
|
} |
|
case $.LESS_THAN_SIGN: { |
|
token.data += '<'; |
|
this.state = State.COMMENT_LESS_THAN_SIGN; |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
token.data += REPLACEMENT_CHARACTER; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInComment); |
|
this.emitCurrentComment(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
token.data += String.fromCodePoint(cp); |
|
} |
|
} |
|
} |
|
// Comment less-than sign state |
|
//------------------------------------------------------------------ |
|
_stateCommentLessThanSign(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.EXCLAMATION_MARK: { |
|
token.data += '!'; |
|
this.state = State.COMMENT_LESS_THAN_SIGN_BANG; |
|
break; |
|
} |
|
case $.LESS_THAN_SIGN: { |
|
token.data += '<'; |
|
break; |
|
} |
|
default: { |
|
this.state = State.COMMENT; |
|
this._stateComment(cp); |
|
} |
|
} |
|
} |
|
// Comment less-than sign bang state |
|
//------------------------------------------------------------------ |
|
_stateCommentLessThanSignBang(cp) { |
|
if (cp === $.HYPHEN_MINUS) { |
|
this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH; |
|
} |
|
else { |
|
this.state = State.COMMENT; |
|
this._stateComment(cp); |
|
} |
|
} |
|
// Comment less-than sign bang dash state |
|
//------------------------------------------------------------------ |
|
_stateCommentLessThanSignBangDash(cp) { |
|
if (cp === $.HYPHEN_MINUS) { |
|
this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH; |
|
} |
|
else { |
|
this.state = State.COMMENT_END_DASH; |
|
this._stateCommentEndDash(cp); |
|
} |
|
} |
|
// Comment less-than sign bang dash dash state |
|
//------------------------------------------------------------------ |
|
_stateCommentLessThanSignBangDashDash(cp) { |
|
if (cp !== $.GREATER_THAN_SIGN && cp !== $.EOF) { |
|
this._err(ERR.nestedComment); |
|
} |
|
this.state = State.COMMENT_END; |
|
this._stateCommentEnd(cp); |
|
} |
|
// Comment end dash state |
|
//------------------------------------------------------------------ |
|
_stateCommentEndDash(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.HYPHEN_MINUS: { |
|
this.state = State.COMMENT_END; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInComment); |
|
this.emitCurrentComment(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
token.data += '-'; |
|
this.state = State.COMMENT; |
|
this._stateComment(cp); |
|
} |
|
} |
|
} |
|
// Comment end state |
|
//------------------------------------------------------------------ |
|
_stateCommentEnd(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.GREATER_THAN_SIGN: { |
|
this.state = State.DATA; |
|
this.emitCurrentComment(token); |
|
break; |
|
} |
|
case $.EXCLAMATION_MARK: { |
|
this.state = State.COMMENT_END_BANG; |
|
break; |
|
} |
|
case $.HYPHEN_MINUS: { |
|
token.data += '-'; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInComment); |
|
this.emitCurrentComment(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
token.data += '--'; |
|
this.state = State.COMMENT; |
|
this._stateComment(cp); |
|
} |
|
} |
|
} |
|
// Comment end bang state |
|
//------------------------------------------------------------------ |
|
_stateCommentEndBang(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.HYPHEN_MINUS: { |
|
token.data += '--!'; |
|
this.state = State.COMMENT_END_DASH; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this._err(ERR.incorrectlyClosedComment); |
|
this.state = State.DATA; |
|
this.emitCurrentComment(token); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInComment); |
|
this.emitCurrentComment(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
token.data += '--!'; |
|
this.state = State.COMMENT; |
|
this._stateComment(cp); |
|
} |
|
} |
|
} |
|
// DOCTYPE state |
|
//------------------------------------------------------------------ |
|
_stateDoctype(cp) { |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
this.state = State.BEFORE_DOCTYPE_NAME; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this.state = State.BEFORE_DOCTYPE_NAME; |
|
this._stateBeforeDoctypeName(cp); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInDoctype); |
|
this._createDoctypeToken(null); |
|
const token = this.currentToken; |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._err(ERR.missingWhitespaceBeforeDoctypeName); |
|
this.state = State.BEFORE_DOCTYPE_NAME; |
|
this._stateBeforeDoctypeName(cp); |
|
} |
|
} |
|
} |
|
// Before DOCTYPE name state |
|
//------------------------------------------------------------------ |
|
_stateBeforeDoctypeName(cp) { |
|
if (isAsciiUpper(cp)) { |
|
this._createDoctypeToken(String.fromCharCode(toAsciiLower(cp))); |
|
this.state = State.DOCTYPE_NAME; |
|
} |
|
else |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
// Ignore whitespace |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
this._createDoctypeToken(REPLACEMENT_CHARACTER); |
|
this.state = State.DOCTYPE_NAME; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this._err(ERR.missingDoctypeName); |
|
this._createDoctypeToken(null); |
|
const token = this.currentToken; |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this.state = State.DATA; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInDoctype); |
|
this._createDoctypeToken(null); |
|
const token = this.currentToken; |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._createDoctypeToken(String.fromCodePoint(cp)); |
|
this.state = State.DOCTYPE_NAME; |
|
} |
|
} |
|
} |
|
// DOCTYPE name state |
|
//------------------------------------------------------------------ |
|
_stateDoctypeName(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
this.state = State.AFTER_DOCTYPE_NAME; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this.state = State.DATA; |
|
this.emitCurrentDoctype(token); |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
token.name += REPLACEMENT_CHARACTER; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInDoctype); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
token.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp); |
|
} |
|
} |
|
} |
|
// After DOCTYPE name state |
|
//------------------------------------------------------------------ |
|
_stateAfterDoctypeName(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
// Ignore whitespace |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this.state = State.DATA; |
|
this.emitCurrentDoctype(token); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInDoctype); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
if (this._consumeSequenceIfMatch($$.PUBLIC, false)) { |
|
this.state = State.AFTER_DOCTYPE_PUBLIC_KEYWORD; |
|
} |
|
else if (this._consumeSequenceIfMatch($$.SYSTEM, false)) { |
|
this.state = State.AFTER_DOCTYPE_SYSTEM_KEYWORD; |
|
} |
|
//NOTE: sequence lookup can be abrupted by hibernation. In that case lookup |
|
//results are no longer valid and we will need to start over. |
|
else if (!this._ensureHibernation()) { |
|
this._err(ERR.invalidCharacterSequenceAfterDoctypeName); |
|
token.forceQuirks = true; |
|
this.state = State.BOGUS_DOCTYPE; |
|
this._stateBogusDoctype(cp); |
|
} |
|
} |
|
} |
|
} |
|
// After DOCTYPE public keyword state |
|
//------------------------------------------------------------------ |
|
_stateAfterDoctypePublicKeyword(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
this.state = State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER; |
|
break; |
|
} |
|
case $.QUOTATION_MARK: { |
|
this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword); |
|
token.publicId = ''; |
|
this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; |
|
break; |
|
} |
|
case $.APOSTROPHE: { |
|
this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword); |
|
token.publicId = ''; |
|
this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this._err(ERR.missingDoctypePublicIdentifier); |
|
token.forceQuirks = true; |
|
this.state = State.DATA; |
|
this.emitCurrentDoctype(token); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInDoctype); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier); |
|
token.forceQuirks = true; |
|
this.state = State.BOGUS_DOCTYPE; |
|
this._stateBogusDoctype(cp); |
|
} |
|
} |
|
} |
|
// Before DOCTYPE public identifier state |
|
//------------------------------------------------------------------ |
|
_stateBeforeDoctypePublicIdentifier(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
// Ignore whitespace |
|
break; |
|
} |
|
case $.QUOTATION_MARK: { |
|
token.publicId = ''; |
|
this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; |
|
break; |
|
} |
|
case $.APOSTROPHE: { |
|
token.publicId = ''; |
|
this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this._err(ERR.missingDoctypePublicIdentifier); |
|
token.forceQuirks = true; |
|
this.state = State.DATA; |
|
this.emitCurrentDoctype(token); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInDoctype); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier); |
|
token.forceQuirks = true; |
|
this.state = State.BOGUS_DOCTYPE; |
|
this._stateBogusDoctype(cp); |
|
} |
|
} |
|
} |
|
// DOCTYPE public identifier (double-quoted) state |
|
//------------------------------------------------------------------ |
|
_stateDoctypePublicIdentifierDoubleQuoted(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.QUOTATION_MARK: { |
|
this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER; |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
token.publicId += REPLACEMENT_CHARACTER; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this._err(ERR.abruptDoctypePublicIdentifier); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this.state = State.DATA; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInDoctype); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
token.publicId += String.fromCodePoint(cp); |
|
} |
|
} |
|
} |
|
// DOCTYPE public identifier (single-quoted) state |
|
//------------------------------------------------------------------ |
|
_stateDoctypePublicIdentifierSingleQuoted(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.APOSTROPHE: { |
|
this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER; |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
token.publicId += REPLACEMENT_CHARACTER; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this._err(ERR.abruptDoctypePublicIdentifier); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this.state = State.DATA; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInDoctype); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
token.publicId += String.fromCodePoint(cp); |
|
} |
|
} |
|
} |
|
// After DOCTYPE public identifier state |
|
//------------------------------------------------------------------ |
|
_stateAfterDoctypePublicIdentifier(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
this.state = State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this.state = State.DATA; |
|
this.emitCurrentDoctype(token); |
|
break; |
|
} |
|
case $.QUOTATION_MARK: { |
|
this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers); |
|
token.systemId = ''; |
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; |
|
break; |
|
} |
|
case $.APOSTROPHE: { |
|
this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers); |
|
token.systemId = ''; |
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInDoctype); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); |
|
token.forceQuirks = true; |
|
this.state = State.BOGUS_DOCTYPE; |
|
this._stateBogusDoctype(cp); |
|
} |
|
} |
|
} |
|
// Between DOCTYPE public and system identifiers state |
|
//------------------------------------------------------------------ |
|
_stateBetweenDoctypePublicAndSystemIdentifiers(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
// Ignore whitespace |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this.emitCurrentDoctype(token); |
|
this.state = State.DATA; |
|
break; |
|
} |
|
case $.QUOTATION_MARK: { |
|
token.systemId = ''; |
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; |
|
break; |
|
} |
|
case $.APOSTROPHE: { |
|
token.systemId = ''; |
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInDoctype); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); |
|
token.forceQuirks = true; |
|
this.state = State.BOGUS_DOCTYPE; |
|
this._stateBogusDoctype(cp); |
|
} |
|
} |
|
} |
|
// After DOCTYPE system keyword state |
|
//------------------------------------------------------------------ |
|
_stateAfterDoctypeSystemKeyword(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
this.state = State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER; |
|
break; |
|
} |
|
case $.QUOTATION_MARK: { |
|
this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword); |
|
token.systemId = ''; |
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; |
|
break; |
|
} |
|
case $.APOSTROPHE: { |
|
this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword); |
|
token.systemId = ''; |
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this._err(ERR.missingDoctypeSystemIdentifier); |
|
token.forceQuirks = true; |
|
this.state = State.DATA; |
|
this.emitCurrentDoctype(token); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInDoctype); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); |
|
token.forceQuirks = true; |
|
this.state = State.BOGUS_DOCTYPE; |
|
this._stateBogusDoctype(cp); |
|
} |
|
} |
|
} |
|
// Before DOCTYPE system identifier state |
|
//------------------------------------------------------------------ |
|
_stateBeforeDoctypeSystemIdentifier(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
// Ignore whitespace |
|
break; |
|
} |
|
case $.QUOTATION_MARK: { |
|
token.systemId = ''; |
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; |
|
break; |
|
} |
|
case $.APOSTROPHE: { |
|
token.systemId = ''; |
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this._err(ERR.missingDoctypeSystemIdentifier); |
|
token.forceQuirks = true; |
|
this.state = State.DATA; |
|
this.emitCurrentDoctype(token); |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInDoctype); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); |
|
token.forceQuirks = true; |
|
this.state = State.BOGUS_DOCTYPE; |
|
this._stateBogusDoctype(cp); |
|
} |
|
} |
|
} |
|
// DOCTYPE system identifier (double-quoted) state |
|
//------------------------------------------------------------------ |
|
_stateDoctypeSystemIdentifierDoubleQuoted(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.QUOTATION_MARK: { |
|
this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER; |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
token.systemId += REPLACEMENT_CHARACTER; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this._err(ERR.abruptDoctypeSystemIdentifier); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this.state = State.DATA; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInDoctype); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
token.systemId += String.fromCodePoint(cp); |
|
} |
|
} |
|
} |
|
// DOCTYPE system identifier (single-quoted) state |
|
//------------------------------------------------------------------ |
|
_stateDoctypeSystemIdentifierSingleQuoted(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.APOSTROPHE: { |
|
this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER; |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
token.systemId += REPLACEMENT_CHARACTER; |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this._err(ERR.abruptDoctypeSystemIdentifier); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this.state = State.DATA; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInDoctype); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
token.systemId += String.fromCodePoint(cp); |
|
} |
|
} |
|
} |
|
// After DOCTYPE system identifier state |
|
//------------------------------------------------------------------ |
|
_stateAfterDoctypeSystemIdentifier(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.SPACE: |
|
case $.LINE_FEED: |
|
case $.TABULATION: |
|
case $.FORM_FEED: { |
|
// Ignore whitespace |
|
break; |
|
} |
|
case $.GREATER_THAN_SIGN: { |
|
this.emitCurrentDoctype(token); |
|
this.state = State.DATA; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInDoctype); |
|
token.forceQuirks = true; |
|
this.emitCurrentDoctype(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._err(ERR.unexpectedCharacterAfterDoctypeSystemIdentifier); |
|
this.state = State.BOGUS_DOCTYPE; |
|
this._stateBogusDoctype(cp); |
|
} |
|
} |
|
} |
|
// Bogus DOCTYPE state |
|
//------------------------------------------------------------------ |
|
_stateBogusDoctype(cp) { |
|
const token = this.currentToken; |
|
switch (cp) { |
|
case $.GREATER_THAN_SIGN: { |
|
this.emitCurrentDoctype(token); |
|
this.state = State.DATA; |
|
break; |
|
} |
|
case $.NULL: { |
|
this._err(ERR.unexpectedNullCharacter); |
|
break; |
|
} |
|
case $.EOF: { |
|
this.emitCurrentDoctype(token); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: |
|
// Do nothing |
|
} |
|
} |
|
// CDATA section state |
|
//------------------------------------------------------------------ |
|
_stateCdataSection(cp) { |
|
switch (cp) { |
|
case $.RIGHT_SQUARE_BRACKET: { |
|
this.state = State.CDATA_SECTION_BRACKET; |
|
break; |
|
} |
|
case $.EOF: { |
|
this._err(ERR.eofInCdata); |
|
this._emitEOFToken(); |
|
break; |
|
} |
|
default: { |
|
this._emitCodePoint(cp); |
|
} |
|
} |
|
} |
|
// CDATA section bracket state |
|
//------------------------------------------------------------------ |
|
_stateCdataSectionBracket(cp) { |
|
if (cp === $.RIGHT_SQUARE_BRACKET) { |
|
this.state = State.CDATA_SECTION_END; |
|
} |
|
else { |
|
this._emitChars(']'); |
|
this.state = State.CDATA_SECTION; |
|
this._stateCdataSection(cp); |
|
} |
|
} |
|
// CDATA section end state |
|
//------------------------------------------------------------------ |
|
_stateCdataSectionEnd(cp) { |
|
switch (cp) { |
|
case $.GREATER_THAN_SIGN: { |
|
this.state = State.DATA; |
|
break; |
|
} |
|
case $.RIGHT_SQUARE_BRACKET: { |
|
this._emitChars(']'); |
|
break; |
|
} |
|
default: { |
|
this._emitChars(']]'); |
|
this.state = State.CDATA_SECTION; |
|
this._stateCdataSection(cp); |
|
} |
|
} |
|
} |
|
// Character reference state |
|
//------------------------------------------------------------------ |
|
_stateCharacterReference() { |
|
let length = this.entityDecoder.write(this.preprocessor.html, this.preprocessor.pos); |
|
if (length < 0) { |
|
if (this.preprocessor.lastChunkWritten) { |
|
length = this.entityDecoder.end(); |
|
} |
|
else { |
|
// Wait for the rest of the entity. |
|
this.active = false; |
|
// Mark the entire buffer as read. |
|
this.preprocessor.pos = this.preprocessor.html.length - 1; |
|
this.consumedAfterSnapshot = 0; |
|
this.preprocessor.endOfChunkHit = true; |
|
return; |
|
} |
|
} |
|
if (length === 0) { |
|
// This was not a valid entity. Go back to the beginning, and |
|
// figure out what to do. |
|
this.preprocessor.pos = this.entityStartPos; |
|
this._flushCodePointConsumedAsCharacterReference($.AMPERSAND); |
|
this.state = |
|
!this._isCharacterReferenceInAttribute() && isAsciiAlphaNumeric(this.preprocessor.peek(1)) |
|
? State.AMBIGUOUS_AMPERSAND |
|
: this.returnState; |
|
} |
|
else { |
|
// We successfully parsed an entity. Switch to the return state. |
|
this.state = this.returnState; |
|
} |
|
} |
|
// Ambiguos ampersand state |
|
//------------------------------------------------------------------ |
|
_stateAmbiguousAmpersand(cp) { |
|
if (isAsciiAlphaNumeric(cp)) { |
|
this._flushCodePointConsumedAsCharacterReference(cp); |
|
} |
|
else { |
|
if (cp === $.SEMICOLON) { |
|
this._err(ERR.unknownNamedCharacterReference); |
|
} |
|
this.state = this.returnState; |
|
this._callState(cp); |
|
} |
|
} |
|
}
|
|
|