You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
211 lines
7.9 KiB
211 lines
7.9 KiB
import htmlDecodeTree from "./generated/decode-data-html.js"; |
|
import xmlDecodeTree from "./generated/decode-data-xml.js"; |
|
import decodeCodePoint from "./decode_codepoint.js"; |
|
export { htmlDecodeTree, xmlDecodeTree, decodeCodePoint }; |
|
export { replaceCodePoint, fromCodePoint } from "./decode_codepoint.js"; |
|
export declare enum BinTrieFlags { |
|
VALUE_LENGTH = 49152, |
|
BRANCH_LENGTH = 16256, |
|
JUMP_TABLE = 127 |
|
} |
|
export declare enum DecodingMode { |
|
/** Entities in text nodes that can end with any character. */ |
|
Legacy = 0, |
|
/** Only allow entities terminated with a semicolon. */ |
|
Strict = 1, |
|
/** Entities in attributes have limitations on ending characters. */ |
|
Attribute = 2 |
|
} |
|
/** |
|
* Producers for character reference errors as defined in the HTML spec. |
|
*/ |
|
export interface EntityErrorProducer { |
|
missingSemicolonAfterCharacterReference(): void; |
|
absenceOfDigitsInNumericCharacterReference(consumedCharacters: number): void; |
|
validateNumericCharacterReference(code: number): void; |
|
} |
|
/** |
|
* Token decoder with support of writing partial entities. |
|
*/ |
|
export declare class EntityDecoder { |
|
/** The tree used to decode entities. */ |
|
private readonly decodeTree; |
|
/** |
|
* The function that is called when a codepoint is decoded. |
|
* |
|
* For multi-byte named entities, this will be called multiple times, |
|
* with the second codepoint, and the same `consumed` value. |
|
* |
|
* @param codepoint The decoded codepoint. |
|
* @param consumed The number of bytes consumed by the decoder. |
|
*/ |
|
private readonly emitCodePoint; |
|
/** An object that is used to produce errors. */ |
|
private readonly errors?; |
|
constructor( |
|
/** The tree used to decode entities. */ |
|
decodeTree: Uint16Array, |
|
/** |
|
* The function that is called when a codepoint is decoded. |
|
* |
|
* For multi-byte named entities, this will be called multiple times, |
|
* with the second codepoint, and the same `consumed` value. |
|
* |
|
* @param codepoint The decoded codepoint. |
|
* @param consumed The number of bytes consumed by the decoder. |
|
*/ |
|
emitCodePoint: (cp: number, consumed: number) => void, |
|
/** An object that is used to produce errors. */ |
|
errors?: EntityErrorProducer | undefined); |
|
/** The current state of the decoder. */ |
|
private state; |
|
/** Characters that were consumed while parsing an entity. */ |
|
private consumed; |
|
/** |
|
* The result of the entity. |
|
* |
|
* Either the result index of a numeric entity, or the codepoint of a |
|
* numeric entity. |
|
*/ |
|
private result; |
|
/** The current index in the decode tree. */ |
|
private treeIndex; |
|
/** The number of characters that were consumed in excess. */ |
|
private excess; |
|
/** The mode in which the decoder is operating. */ |
|
private decodeMode; |
|
/** Resets the instance to make it reusable. */ |
|
startEntity(decodeMode: DecodingMode): void; |
|
/** |
|
* Write an entity to the decoder. This can be called multiple times with partial entities. |
|
* If the entity is incomplete, the decoder will return -1. |
|
* |
|
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the |
|
* entity is incomplete, and resume when the next string is written. |
|
* |
|
* @param string The string containing the entity (or a continuation of the entity). |
|
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call. |
|
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. |
|
*/ |
|
write(str: string, offset: number): number; |
|
/** |
|
* Switches between the numeric decimal and hexadecimal states. |
|
* |
|
* Equivalent to the `Numeric character reference state` in the HTML spec. |
|
* |
|
* @param str The string containing the entity (or a continuation of the entity). |
|
* @param offset The current offset. |
|
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. |
|
*/ |
|
private stateNumericStart; |
|
private addToNumericResult; |
|
/** |
|
* Parses a hexadecimal numeric entity. |
|
* |
|
* Equivalent to the `Hexademical character reference state` in the HTML spec. |
|
* |
|
* @param str The string containing the entity (or a continuation of the entity). |
|
* @param offset The current offset. |
|
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. |
|
*/ |
|
private stateNumericHex; |
|
/** |
|
* Parses a decimal numeric entity. |
|
* |
|
* Equivalent to the `Decimal character reference state` in the HTML spec. |
|
* |
|
* @param str The string containing the entity (or a continuation of the entity). |
|
* @param offset The current offset. |
|
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. |
|
*/ |
|
private stateNumericDecimal; |
|
/** |
|
* Validate and emit a numeric entity. |
|
* |
|
* Implements the logic from the `Hexademical character reference start |
|
* state` and `Numeric character reference end state` in the HTML spec. |
|
* |
|
* @param lastCp The last code point of the entity. Used to see if the |
|
* entity was terminated with a semicolon. |
|
* @param expectedLength The minimum number of characters that should be |
|
* consumed. Used to validate that at least one digit |
|
* was consumed. |
|
* @returns The number of characters that were consumed. |
|
*/ |
|
private emitNumericEntity; |
|
/** |
|
* Parses a named entity. |
|
* |
|
* Equivalent to the `Named character reference state` in the HTML spec. |
|
* |
|
* @param str The string containing the entity (or a continuation of the entity). |
|
* @param offset The current offset. |
|
* @returns The number of characters that were consumed, or -1 if the entity is incomplete. |
|
*/ |
|
private stateNamedEntity; |
|
/** |
|
* Emit a named entity that was not terminated with a semicolon. |
|
* |
|
* @returns The number of characters consumed. |
|
*/ |
|
private emitNotTerminatedNamedEntity; |
|
/** |
|
* Emit a named entity. |
|
* |
|
* @param result The index of the entity in the decode tree. |
|
* @param valueLength The number of bytes in the entity. |
|
* @param consumed The number of characters consumed. |
|
* |
|
* @returns The number of characters consumed. |
|
*/ |
|
private emitNamedEntityData; |
|
/** |
|
* Signal to the parser that the end of the input was reached. |
|
* |
|
* Remaining data will be emitted and relevant errors will be produced. |
|
* |
|
* @returns The number of characters consumed. |
|
*/ |
|
end(): number; |
|
} |
|
/** |
|
* Determines the branch of the current node that is taken given the current |
|
* character. This function is used to traverse the trie. |
|
* |
|
* @param decodeTree The trie. |
|
* @param current The current node. |
|
* @param nodeIdx The index right after the current node and its value. |
|
* @param char The current character. |
|
* @returns The index of the next node, or -1 if no branch is taken. |
|
*/ |
|
export declare function determineBranch(decodeTree: Uint16Array, current: number, nodeIdx: number, char: number): number; |
|
/** |
|
* Decodes an HTML string. |
|
* |
|
* @param str The string to decode. |
|
* @param mode The decoding mode. |
|
* @returns The decoded string. |
|
*/ |
|
export declare function decodeHTML(str: string, mode?: DecodingMode): string; |
|
/** |
|
* Decodes an HTML string in an attribute. |
|
* |
|
* @param str The string to decode. |
|
* @returns The decoded string. |
|
*/ |
|
export declare function decodeHTMLAttribute(str: string): string; |
|
/** |
|
* Decodes an HTML string, requiring all entities to be terminated by a semicolon. |
|
* |
|
* @param str The string to decode. |
|
* @returns The decoded string. |
|
*/ |
|
export declare function decodeHTMLStrict(str: string): string; |
|
/** |
|
* Decodes an XML string, requiring all entities to be terminated by a semicolon. |
|
* |
|
* @param str The string to decode. |
|
* @returns The decoded string. |
|
*/ |
|
export declare function decodeXML(str: string): string; |
|
//# sourceMappingURL=decode.d.ts.map
|