?class-wp-html-unsupported-exception.php000064400000007026147333266670014360 0ustar00token_name = $token_name; $this->token_at = $token_at; $this->token = $token; $this->stack_of_open_elements = $stack_of_open_elements; $this->active_formatting_elements = $active_formatting_elements; } } class-wp-html-tag-processor.php000064400000436444147333266670012576 0ustar00 "c" not " c". * This would increase the size of the changes for some operations but leave more * natural-looking output HTML. * * @package WordPress * @subpackage HTML-API * @since 6.2.0 */ /** * Core class used to modify attributes in an HTML document for tags matching a query. * * ## Usage * * Use of this class requires three steps: * * 1. Create a new class instance with your input HTML document. * 2. Find the tag(s) you are looking for. * 3. Request changes to the attributes in those tag(s). * * Example: * * $tags = new WP_HTML_Tag_Processor( $html ); * if ( $tags->next_tag( 'option' ) ) { * $tags->set_attribute( 'selected', true ); * } * * ### Finding tags * * The `next_tag()` function moves the internal cursor through * your input HTML document until it finds a tag meeting any of * the supplied restrictions in the optional query argument. If * no argument is provided then it will find the next HTML tag, * regardless of what kind it is. * * If you want to _find whatever the next tag is_: * * $tags->next_tag(); * * | Goal | Query | * |-----------------------------------------------------------|---------------------------------------------------------------------------------| * | Find any tag. | `$tags->next_tag();` | * | Find next image tag. | `$tags->next_tag( array( 'tag_name' => 'img' ) );` | * | Find next image tag (without passing the array). | `$tags->next_tag( 'img' );` | * | Find next tag containing the `fullwidth` CSS class. | `$tags->next_tag( array( 'class_name' => 'fullwidth' ) );` | * | Find next image tag containing the `fullwidth` CSS class. | `$tags->next_tag( array( 'tag_name' => 'img', 'class_name' => 'fullwidth' ) );` | * * If a tag was found meeting your criteria then `next_tag()` * will return `true` and you can proceed to modify it. If it * returns `false`, however, it failed to find the tag and * moved the cursor to the end of the file. * * Once the cursor reaches the end of the file the processor * is done and if you want to reach an earlier tag you will * need to recreate the processor and start over, as it's * unable to back up or move in reverse. * * See the section on bookmarks for an exception to this * no-backing-up rule. * * #### Custom queries * * Sometimes it's necessary to further inspect an HTML tag than * the query syntax here permits. In these cases one may further * inspect the search results using the read-only functions * provided by the processor or external state or variables. * * Example: * * // Paint up to the first five DIV or SPAN tags marked with the "jazzy" style. * $remaining_count = 5; * while ( $remaining_count > 0 && $tags->next_tag() ) { * if ( * ( 'DIV' === $tags->get_tag() || 'SPAN' === $tags->get_tag() ) && * 'jazzy' === $tags->get_attribute( 'data-style' ) * ) { * $tags->add_class( 'theme-style-everest-jazz' ); * $remaining_count--; * } * } * * `get_attribute()` will return `null` if the attribute wasn't present * on the tag when it was called. It may return `""` (the empty string) * in cases where the attribute was present but its value was empty. * For boolean attributes, those whose name is present but no value is * given, it will return `true` (the only way to set `false` for an * attribute is to remove it). * * #### When matching fails * * When `next_tag()` returns `false` it could mean different things: * * - The requested tag wasn't found in the input document. * - The input document ended in the middle of an HTML syntax element. * * When a document ends in the middle of a syntax element it will pause * the processor. This is to make it possible in the future to extend the * input document and proceed - an important requirement for chunked * streaming parsing of a document. * * Example: * * $processor = new WP_HTML_Tag_Processor( 'This
` inside an HTML comment. * - STYLE content is raw text. * - TITLE content is plain text but character references are decoded. * - TEXTAREA content is plain text but character references are decoded. * - XMP (deprecated) content is raw text. * * ### Modifying HTML attributes for a found tag * * Once you've found the start of an opening tag you can modify * any number of the attributes on that tag. You can set a new * value for an attribute, remove the entire attribute, or do * nothing and move on to the next opening tag. * * Example: * * if ( $tags->next_tag( array( 'class_name' => 'wp-group-block' ) ) ) { * $tags->set_attribute( 'title', 'This groups the contained content.' ); * $tags->remove_attribute( 'data-test-id' ); * } * * If `set_attribute()` is called for an existing attribute it will * overwrite the existing value. Similarly, calling `remove_attribute()` * for a non-existing attribute has no effect on the document. Both * of these methods are safe to call without knowing if a given attribute * exists beforehand. * * ### Modifying CSS classes for a found tag * * The tag processor treats the `class` attribute as a special case. * Because it's a common operation to add or remove CSS classes, this * interface adds helper methods to make that easier. * * As with attribute values, adding or removing CSS classes is a safe * operation that doesn't require checking if the attribute or class * exists before making changes. If removing the only class then the * entire `class` attribute will be removed. * * Example: * * // from `Yippee!` * // to `Yippee!` * $tags->add_class( 'is-active' ); * * // from `Yippee!` * // to `Yippee!` * $tags->add_class( 'is-active' ); * * // from `Yippee!` * // to `Yippee!` * $tags->add_class( 'is-active' ); * * // from `` * // to ` * $tags->remove_class( 'rugby' ); * * // from `` * // to ` * $tags->remove_class( 'rugby' ); * * // from `` * // to ` * $tags->remove_class( 'rugby' ); * * When class changes are enqueued but a direct change to `class` is made via * `set_attribute` then the changes to `set_attribute` (or `remove_attribute`) * will take precedence over those made through `add_class` and `remove_class`. * * ### Bookmarks * * While scanning through the input HTMl document it's possible to set * a named bookmark when a particular tag is found. Later on, after * continuing to scan other tags, it's possible to `seek` to one of * the set bookmarks and then proceed again from that point forward. * * Because bookmarks create processing overhead one should avoid * creating too many of them. As a rule, create only bookmarks * of known string literal names; avoid creating "mark_{$index}" * and so on. It's fine from a performance standpoint to create a * bookmark and update it frequently, such as within a loop. * * $total_todos = 0; * while ( $p->next_tag( array( 'tag_name' => 'UL', 'class_name' => 'todo' ) ) ) { * $p->set_bookmark( 'list-start' ); * while ( $p->next_tag( array( 'tag_closers' => 'visit' ) ) ) { * if ( 'UL' === $p->get_tag() && $p->is_tag_closer() ) { * $p->set_bookmark( 'list-end' ); * $p->seek( 'list-start' ); * $p->set_attribute( 'data-contained-todos', (string) $total_todos ); * $total_todos = 0; * $p->seek( 'list-end' ); * break; * } * * if ( 'LI' === $p->get_tag() && ! $p->is_tag_closer() ) { * $total_todos++; * } * } * } * * ## Tokens and finer-grained processing. * * It's possible to scan through every lexical token in the * HTML document using the `next_token()` function. This * alternative form takes no argument and provides no built-in * query syntax. * * Example: * * $title = '(untitled)'; * $text = ''; * while ( $processor->next_token() ) { * switch ( $processor->get_token_name() ) { * case '#text': * $text .= $processor->get_modifiable_text(); * break; * * case 'BR': * $text .= "\n"; * break; * * case 'TITLE': * $title = $processor->get_modifiable_text(); * break; * } * } * return trim( "# {$title}\n\n{$text}" ); * * ### Tokens and _modifiable text_. * * #### Special "atomic" HTML elements. * * Not all HTML elements are able to contain other elements inside of them. * For instance, the contents inside a TITLE element are plaintext (except * that character references like & will be decoded). This means that * if the string `` appears inside a TITLE element, then it's not an * image tag, but rather it's text describing an image tag. Likewise, the * contents of a SCRIPT or STYLE element are handled entirely separately in * a browser than the contents of other elements because they represent a * different language than HTML. * * For these elements the Tag Processor treats the entire sequence as one, * from the opening tag, including its contents, through its closing tag. * This means that the it's not possible to match the closing tag for a * SCRIPT element unless it's unexpected; the Tag Processor already matched * it when it found the opening tag. * * The inner contents of these elements are that element's _modifiable text_. * * The special elements are: * - `SCRIPT` whose contents are treated as raw plaintext but supports a legacy * style of including JavaScript inside of HTML comments to avoid accidentally * closing the SCRIPT from inside a JavaScript string. E.g. `console.log( '' )`. * - `TITLE` and `TEXTAREA` whose contents are treated as plaintext and then any * character references are decoded. E.g. `1 < 2 < 3` becomes `1 < 2 < 3`. * - `IFRAME`, `NOSCRIPT`, `NOEMBED`, `NOFRAME`, `STYLE` whose contents are treated as * raw plaintext and left as-is. E.g. `1 < 2 < 3` remains `1 < 2 < 3`. * * #### Other tokens with modifiable text. * * There are also non-elements which are void/self-closing in nature and contain * modifiable text that is part of that individual syntax token itself. * * - `#text` nodes, whose entire token _is_ the modifiable text. * - HTML comments and tokens that become comments due to some syntax error. The * text for these tokens is the portion of the comment inside of the syntax. * E.g. for `` the text is `" comment "` (note the spaces are included). * - `CDATA` sections, whose text is the content inside of the section itself. E.g. for * `` the text is `"some content"` (with restrictions [1]). * - "Funky comments," which are a special case of invalid closing tags whose name is * invalid. The text for these nodes is the text that a browser would transform into * an HTML comment when parsing. E.g. for `` the text is `%post_author`. * - `DOCTYPE` declarations like `` which have no closing tag. * - XML Processing instruction nodes like `` (with restrictions [2]). * - The empty end tag `` which is ignored in the browser and DOM. * * [1]: There are no CDATA sections in HTML. When encountering `` becomes a bogus HTML comment, meaning there can be no CDATA * section in an HTML document containing `>`. The Tag Processor will first find * all valid and bogus HTML comments, and then if the comment _would_ have been a * CDATA section _were they to exist_, it will indicate this as the type of comment. * * [2]: XML allows a broader range of characters in a processing instruction's target name * and disallows "xml" as a name, since it's special. The Tag Processor only recognizes * target names with an ASCII-representable subset of characters. It also exhibits the * same constraint as with CDATA sections, in that `>` cannot exist within the token * since Processing Instructions do no exist within HTML and their syntax transforms * into a bogus comment in the DOM. * * ## Design and limitations * * The Tag Processor is designed to linearly scan HTML documents and tokenize * HTML tags and their attributes. It's designed to do this as efficiently as * possible without compromising parsing integrity. Therefore it will be * slower than some methods of modifying HTML, such as those incorporating * over-simplified PCRE patterns, but will not introduce the defects and * failures that those methods bring in, which lead to broken page renders * and often to security vulnerabilities. On the other hand, it will be faster * than full-blown HTML parsers such as DOMDocument and use considerably * less memory. It requires a negligible memory overhead, enough to consider * it a zero-overhead system. * * The performance characteristics are maintained by avoiding tree construction * and semantic cleanups which are specified in HTML5. Because of this, for * example, it's not possible for the Tag Processor to associate any given * opening tag with its corresponding closing tag, or to return the inner markup * inside an element. Systems may be built on top of the Tag Processor to do * this, but the Tag Processor is and should be constrained so it can remain an * efficient, low-level, and reliable HTML scanner. * * The Tag Processor's design incorporates a "garbage-in-garbage-out" philosophy. * HTML5 specifies that certain invalid content be transformed into different forms * for display, such as removing null bytes from an input document and replacing * invalid characters with the Unicode replacement character `U+FFFD` (visually "�"). * Where errors or transformations exist within the HTML5 specification, the Tag Processor * leaves those invalid inputs untouched, passing them through to the final browser * to handle. While this implies that certain operations will be non-spec-compliant, * such as reading the value of an attribute with invalid content, it also preserves a * simplicity and efficiency for handling those error cases. * * Most operations within the Tag Processor are designed to minimize the difference * between an input and output document for any given change. For example, the * `add_class` and `remove_class` methods preserve whitespace and the class ordering * within the `class` attribute; and when encountering tags with duplicated attributes, * the Tag Processor will leave those invalid duplicate attributes where they are but * update the proper attribute which the browser will read for parsing its value. An * exception to this rule is that all attribute updates store their values as * double-quoted strings, meaning that attributes on input with single-quoted or * unquoted values will appear in the output with double-quotes. * * ### Scripting Flag * * The Tag Processor parses HTML with the "scripting flag" disabled. This means * that it doesn't run any scripts while parsing the page. In a browser with * JavaScript enabled, for example, the script can change the parse of the * document as it loads. On the server, however, evaluating JavaScript is not * only impractical, but also unwanted. * * Practically this means that the Tag Processor will descend into NOSCRIPT * elements and process its child tags. Were the scripting flag enabled, such * as in a typical browser, the contents of NOSCRIPT are skipped entirely. * * This allows the HTML API to process the content that will be presented in * a browser when scripting is disabled, but it offers a different view of a * page than most browser sessions will experience. E.g. the tags inside the * NOSCRIPT disappear. * * ### Text Encoding * * The Tag Processor assumes that the input HTML document is encoded with a * text encoding compatible with 7-bit ASCII's '<', '>', '&', ';', '/', '=', * "'", '"', 'a' - 'z', 'A' - 'Z', and the whitespace characters ' ', tab, * carriage-return, newline, and form-feed. * * In practice, this includes almost every single-byte encoding as well as * UTF-8. Notably, however, it does not include UTF-16. If providing input * that's incompatible, then convert the encoding beforehand. * * @since 6.2.0 * @since 6.2.1 Fix: Support for various invalid comments; attribute updates are case-insensitive. * @since 6.3.2 Fix: Skip HTML-like content inside rawtext elements such as STYLE. * @since 6.5.0 Pauses processor when input ends in an incomplete syntax token. * Introduces "special" elements which act like void elements, e.g. TITLE, STYLE. * Allows scanning through all tokens and processing modifiable text, where applicable. */ class WP_HTML_Tag_Processor { /** * The maximum number of bookmarks allowed to exist at * any given time. * * @since 6.2.0 * @var int * * @see WP_HTML_Tag_Processor::set_bookmark() */ const MAX_BOOKMARKS = 10; /** * Maximum number of times seek() can be called. * Prevents accidental infinite loops. * * @since 6.2.0 * @var int * * @see WP_HTML_Tag_Processor::seek() */ const MAX_SEEK_OPS = 1000; /** * The HTML document to parse. * * @since 6.2.0 * @var string */ protected $html; /** * The last query passed to next_tag(). * * @since 6.2.0 * @var array|null */ private $last_query; /** * The tag name this processor currently scans for. * * @since 6.2.0 * @var string|null */ private $sought_tag_name; /** * The CSS class name this processor currently scans for. * * @since 6.2.0 * @var string|null */ private $sought_class_name; /** * The match offset this processor currently scans for. * * @since 6.2.0 * @var int|null */ private $sought_match_offset; /** * Whether to visit tag closers, e.g.
, when walking an input document. * * @since 6.2.0 * @var bool */ private $stop_on_tag_closers; /** * Specifies mode of operation of the parser at any given time. * * | State | Meaning | * | ----------------|----------------------------------------------------------------------| * | *Ready* | The parser is ready to run. | * | *Complete* | There is nothing left to parse. | * | *Incomplete* | The HTML ended in the middle of a token; nothing more can be parsed. | * | *Matched tag* | Found an HTML tag; it's possible to modify its attributes. | * | *Text node* | Found a #text node; this is plaintext and modifiable. | * | *CDATA node* | Found a CDATA section; this is modifiable. | * | *Comment* | Found a comment or bogus comment; this is modifiable. | * | *Presumptuous* | Found an empty tag closer: ``. | * | *Funky comment* | Found a tag closer with an invalid tag name; this is modifiable. | * * @since 6.5.0 * * @see WP_HTML_Tag_Processor::STATE_READY * @see WP_HTML_Tag_Processor::STATE_COMPLETE * @see WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT * @see WP_HTML_Tag_Processor::STATE_MATCHED_TAG * @see WP_HTML_Tag_Processor::STATE_TEXT_NODE * @see WP_HTML_Tag_Processor::STATE_CDATA_NODE * @see WP_HTML_Tag_Processor::STATE_COMMENT * @see WP_HTML_Tag_Processor::STATE_DOCTYPE * @see WP_HTML_Tag_Processor::STATE_PRESUMPTUOUS_TAG * @see WP_HTML_Tag_Processor::STATE_FUNKY_COMMENT * * @var string */ protected $parser_state = self::STATE_READY; /** * Indicates if the document is in quirks mode or no-quirks mode. * * Impact on HTML parsing: * * - In `NO_QUIRKS_MODE` (also known as "standard mode"): * - CSS class and ID selectors match byte-for-byte (case-sensitively). * - A TABLE start tag `` implicitly closes any open `P` element. * * - In `QUIRKS_MODE`: * - CSS class and ID selectors match match in an ASCII case-insensitive manner. * - A TABLE start tag `
` opens a `TABLE` element as a child of a `P` * element if one is open. * * Quirks and no-quirks mode are thus mostly about styling, but have an impact when * tables are found inside paragraph elements. * * @see self::QUIRKS_MODE * @see self::NO_QUIRKS_MODE * * @since 6.7.0 * * @var string */ protected $compat_mode = self::NO_QUIRKS_MODE; /** * Indicates whether the parser is inside foreign content, * e.g. inside an SVG or MathML element. * * One of 'html', 'svg', or 'math'. * * Several parsing rules change based on whether the parser * is inside foreign content, including whether CDATA sections * are allowed and whether a self-closing flag indicates that * an element has no content. * * @since 6.7.0 * * @var string */ private $parsing_namespace = 'html'; /** * What kind of syntax token became an HTML comment. * * Since there are many ways in which HTML syntax can create an HTML comment, * this indicates which of those caused it. This allows the Tag Processor to * represent more from the original input document than would appear in the DOM. * * @since 6.5.0 * * @var string|null */ protected $comment_type = null; /** * What kind of text the matched text node represents, if it was subdivided. * * @see self::TEXT_IS_NULL_SEQUENCE * @see self::TEXT_IS_WHITESPACE * @see self::TEXT_IS_GENERIC * @see self::subdivide_text_appropriately * * @since 6.7.0 * * @var string */ protected $text_node_classification = self::TEXT_IS_GENERIC; /** * How many bytes from the original HTML document have been read and parsed. * * This value points to the latest byte offset in the input document which * has been already parsed. It is the internal cursor for the Tag Processor * and updates while scanning through the HTML tokens. * * @since 6.2.0 * @var int */ private $bytes_already_parsed = 0; /** * Byte offset in input document where current token starts. * * Example: * *
... * 01234 * - token starts at 0 * * @since 6.5.0 * * @var int|null */ private $token_starts_at; /** * Byte length of current token. * * Example: * *
... * 012345678901234 * - token length is 14 - 0 = 14 * * a is a token. * 0123456789 123456789 123456789 * - token length is 17 - 2 = 15 * * @since 6.5.0 * * @var int|null */ private $token_length; /** * Byte offset in input document where current tag name starts. * * Example: * *
... * 01234 * - tag name starts at 1 * * @since 6.2.0 * * @var int|null */ private $tag_name_starts_at; /** * Byte length of current tag name. * * Example: * *
... * 01234 * --- tag name length is 3 * * @since 6.2.0 * * @var int|null */ private $tag_name_length; /** * Byte offset into input document where current modifiable text starts. * * @since 6.5.0 * * @var int */ private $text_starts_at; /** * Byte length of modifiable text. * * @since 6.5.0 * * @var int */ private $text_length; /** * Whether the current tag is an opening tag, e.g.
, or a closing tag, e.g.
. * * @var bool */ private $is_closing_tag; /** * Lazily-built index of attributes found within an HTML tag, keyed by the attribute name. * * Example: * * // Supposing the parser is working through this content * // and stops after recognizing the `id` attribute. * //
* // ^ parsing will continue from this point. * $this->attributes = array( * 'id' => new WP_HTML_Attribute_Token( 'id', 9, 6, 5, 11, false ) * ); * * // When picking up parsing again, or when asking to find the * // `class` attribute we will continue and add to this array. * $this->attributes = array( * 'id' => new WP_HTML_Attribute_Token( 'id', 9, 6, 5, 11, false ), * 'class' => new WP_HTML_Attribute_Token( 'class', 23, 7, 17, 13, false ) * ); * * // Note that only the `class` attribute value is stored in the index. * // That's because it is the only value used by this class at the moment. * * @since 6.2.0 * @var WP_HTML_Attribute_Token[] */ private $attributes = array(); /** * Tracks spans of duplicate attributes on a given tag, used for removing * all copies of an attribute when calling `remove_attribute()`. * * @since 6.3.2 * * @var (WP_HTML_Span[])[]|null */ private $duplicate_attributes = null; /** * Which class names to add or remove from a tag. * * These are tracked separately from attribute updates because they are * semantically distinct, whereas this interface exists for the common * case of adding and removing class names while other attributes are * generally modified as with DOM `setAttribute` calls. * * When modifying an HTML document these will eventually be collapsed * into a single `set_attribute( 'class', $changes )` call. * * Example: * * // Add the `wp-block-group` class, remove the `wp-group` class. * $classname_updates = array( * // Indexed by a comparable class name. * 'wp-block-group' => WP_HTML_Tag_Processor::ADD_CLASS, * 'wp-group' => WP_HTML_Tag_Processor::REMOVE_CLASS * ); * * @since 6.2.0 * @var bool[] */ private $classname_updates = array(); /** * Tracks a semantic location in the original HTML which * shifts with updates as they are applied to the document. * * @since 6.2.0 * @var WP_HTML_Span[] */ protected $bookmarks = array(); const ADD_CLASS = true; const REMOVE_CLASS = false; const SKIP_CLASS = null; /** * Lexical replacements to apply to input HTML document. * * "Lexical" in this class refers to the part of this class which * operates on pure text _as text_ and not as HTML. There's a line * between the public interface, with HTML-semantic methods like * `set_attribute` and `add_class`, and an internal state that tracks * text offsets in the input document. * * When higher-level HTML methods are called, those have to transform their * operations (such as setting an attribute's value) into text diffing * operations (such as replacing the sub-string from indices A to B with * some given new string). These text-diffing operations are the lexical * updates. * * As new higher-level methods are added they need to collapse their * operations into these lower-level lexical updates since that's the * Tag Processor's internal language of change. Any code which creates * these lexical updates must ensure that they do not cross HTML syntax * boundaries, however, so these should never be exposed outside of this * class or any classes which intentionally expand its functionality. * * These are enqueued while editing the document instead of being immediately * applied to avoid processing overhead, string allocations, and string * copies when applying many updates to a single document. * * Example: * * // Replace an attribute stored with a new value, indices * // sourced from the lazily-parsed HTML recognizer. * $start = $attributes['src']->start; * $length = $attributes['src']->length; * $modifications[] = new WP_HTML_Text_Replacement( $start, $length, $new_value ); * * // Correspondingly, something like this will appear in this array. * $lexical_updates = array( * WP_HTML_Text_Replacement( 14, 28, 'https://my-site.my-domain/wp-content/uploads/2014/08/kittens.jpg' ) * ); * * @since 6.2.0 * @var WP_HTML_Text_Replacement[] */ protected $lexical_updates = array(); /** * Tracks and limits `seek()` calls to prevent accidental infinite loops. * * @since 6.2.0 * @var int * * @see WP_HTML_Tag_Processor::seek() */ protected $seek_count = 0; /** * Whether the parser should skip over an immediately-following linefeed * character, as is the case with LISTING, PRE, and TEXTAREA. * * > If the next token is a U+000A LINE FEED (LF) character token, then * > ignore that token and move on to the next one. (Newlines at the start * > of [these] elements are ignored as an authoring convenience.) * * @since 6.7.0 * * @var int|null */ private $skip_newline_at = null; /** * Constructor. * * @since 6.2.0 * * @param string $html HTML to process. */ public function __construct( $html ) { $this->html = $html; } /** * Switches parsing mode into a new namespace, such as when * encountering an SVG tag and entering foreign content. * * @since 6.7.0 * * @param string $new_namespace One of 'html', 'svg', or 'math' indicating into what * namespace the next tokens will be processed. * @return bool Whether the namespace was valid and changed. */ public function change_parsing_namespace( string $new_namespace ): bool { if ( ! in_array( $new_namespace, array( 'html', 'math', 'svg' ), true ) ) { return false; } $this->parsing_namespace = $new_namespace; return true; } /** * Finds the next tag matching the $query. * * @since 6.2.0 * @since 6.5.0 No longer processes incomplete tokens at end of document; pauses the processor at start of token. * * @param array|string|null $query { * Optional. Which tag name to find, having which class, etc. Default is to find any tag. * * @type string|null $tag_name Which tag to find, or `null` for "any tag." * @type int|null $match_offset Find the Nth tag matching all search criteria. * 1 for "first" tag, 3 for "third," etc. * Defaults to first tag. * @type string|null $class_name Tag must contain this whole class name to match. * @type string|null $tag_closers "visit" or "skip": whether to stop on tag closers, e.g.
. * } * @return bool Whether a tag was matched. */ public function next_tag( $query = null ): bool { $this->parse_query( $query ); $already_found = 0; do { if ( false === $this->next_token() ) { return false; } if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { continue; } if ( $this->matches() ) { ++$already_found; } } while ( $already_found < $this->sought_match_offset ); return true; } /** * Finds the next token in the HTML document. * * An HTML document can be viewed as a stream of tokens, * where tokens are things like HTML tags, HTML comments, * text nodes, etc. This method finds the next token in * the HTML document and returns whether it found one. * * If it starts parsing a token and reaches the end of the * document then it will seek to the start of the last * token and pause, returning `false` to indicate that it * failed to find a complete token. * * Possible token types, based on the HTML specification: * * - an HTML tag, whether opening, closing, or void. * - a text node - the plaintext inside tags. * - an HTML comment. * - a DOCTYPE declaration. * - a processing instruction, e.g. ``. * * The Tag Processor currently only supports the tag token. * * @since 6.5.0 * @since 6.7.0 Recognizes CDATA sections within foreign content. * * @return bool Whether a token was parsed. */ public function next_token(): bool { return $this->base_class_next_token(); } /** * Internal method which finds the next token in the HTML document. * * This method is a protected internal function which implements the logic for * finding the next token in a document. It exists so that the parser can update * its state without affecting the location of the cursor in the document and * without triggering subclass methods for things like `next_token()`, e.g. when * applying patches before searching for the next token. * * @since 6.5.0 * * @access private * * @return bool Whether a token was parsed. */ private function base_class_next_token(): bool { $was_at = $this->bytes_already_parsed; $this->after_tag(); // Don't proceed if there's nothing more to scan. if ( self::STATE_COMPLETE === $this->parser_state || self::STATE_INCOMPLETE_INPUT === $this->parser_state ) { return false; } /* * The next step in the parsing loop determines the parsing state; * clear it so that state doesn't linger from the previous step. */ $this->parser_state = self::STATE_READY; if ( $this->bytes_already_parsed >= strlen( $this->html ) ) { $this->parser_state = self::STATE_COMPLETE; return false; } // Find the next tag if it exists. if ( false === $this->parse_next_tag() ) { if ( self::STATE_INCOMPLETE_INPUT === $this->parser_state ) { $this->bytes_already_parsed = $was_at; } return false; } /* * For legacy reasons the rest of this function handles tags and their * attributes. If the processor has reached the end of the document * or if it matched any other token then it should return here to avoid * attempting to process tag-specific syntax. */ if ( self::STATE_INCOMPLETE_INPUT !== $this->parser_state && self::STATE_COMPLETE !== $this->parser_state && self::STATE_MATCHED_TAG !== $this->parser_state ) { return true; } // Parse all of its attributes. while ( $this->parse_next_attribute() ) { continue; } // Ensure that the tag closes before the end of the document. if ( self::STATE_INCOMPLETE_INPUT === $this->parser_state || $this->bytes_already_parsed >= strlen( $this->html ) ) { // Does this appropriately clear state (parsed attributes)? $this->parser_state = self::STATE_INCOMPLETE_INPUT; $this->bytes_already_parsed = $was_at; return false; } $tag_ends_at = strpos( $this->html, '>', $this->bytes_already_parsed ); if ( false === $tag_ends_at ) { $this->parser_state = self::STATE_INCOMPLETE_INPUT; $this->bytes_already_parsed = $was_at; return false; } $this->parser_state = self::STATE_MATCHED_TAG; $this->bytes_already_parsed = $tag_ends_at + 1; $this->token_length = $this->bytes_already_parsed - $this->token_starts_at; /* * Certain tags require additional processing. The first-letter pre-check * avoids unnecessary string allocation when comparing the tag names. * * - IFRAME * - LISTING (deprecated) * - NOEMBED (deprecated) * - NOFRAMES (deprecated) * - PRE * - SCRIPT * - STYLE * - TEXTAREA * - TITLE * - XMP (deprecated) */ if ( $this->is_closing_tag || 'html' !== $this->parsing_namespace || 1 !== strspn( $this->html, 'iIlLnNpPsStTxX', $this->tag_name_starts_at, 1 ) ) { return true; } $tag_name = $this->get_tag(); /* * For LISTING, PRE, and TEXTAREA, the first linefeed of an immediately-following * text node is ignored as an authoring convenience. * * @see static::skip_newline_at */ if ( 'LISTING' === $tag_name || 'PRE' === $tag_name ) { $this->skip_newline_at = $this->bytes_already_parsed; return true; } /* * There are certain elements whose children are not DATA but are instead * RCDATA or RAWTEXT. These cannot contain other elements, and the contents * are parsed as plaintext, with character references decoded in RCDATA but * not in RAWTEXT. * * These elements are described here as "self-contained" or special atomic * elements whose end tag is consumed with the opening tag, and they will * contain modifiable text inside of them. * * Preserve the opening tag pointers, as these will be overwritten * when finding the closing tag. They will be reset after finding * the closing to tag to point to the opening of the special atomic * tag sequence. */ $tag_name_starts_at = $this->tag_name_starts_at; $tag_name_length = $this->tag_name_length; $tag_ends_at = $this->token_starts_at + $this->token_length; $attributes = $this->attributes; $duplicate_attributes = $this->duplicate_attributes; // Find the closing tag if necessary. switch ( $tag_name ) { case 'SCRIPT': $found_closer = $this->skip_script_data(); break; case 'TEXTAREA': case 'TITLE': $found_closer = $this->skip_rcdata( $tag_name ); break; /* * In the browser this list would include the NOSCRIPT element, * but the Tag Processor is an environment with the scripting * flag disabled, meaning that it needs to descend into the * NOSCRIPT element to be able to properly process what will be * sent to a browser. * * Note that this rule makes HTML5 syntax incompatible with XML, * because the parsing of this token depends on client application. * The NOSCRIPT element cannot be represented in the XHTML syntax. */ case 'IFRAME': case 'NOEMBED': case 'NOFRAMES': case 'STYLE': case 'XMP': $found_closer = $this->skip_rawtext( $tag_name ); break; // No other tags should be treated in their entirety here. default: return true; } if ( ! $found_closer ) { $this->parser_state = self::STATE_INCOMPLETE_INPUT; $this->bytes_already_parsed = $was_at; return false; } /* * The values here look like they reference the opening tag but they reference * the closing tag instead. This is why the opening tag values were stored * above in a variable. It reads confusingly here, but that's because the * functions that skip the contents have moved all the internal cursors past * the inner content of the tag. */ $this->token_starts_at = $was_at; $this->token_length = $this->bytes_already_parsed - $this->token_starts_at; $this->text_starts_at = $tag_ends_at; $this->text_length = $this->tag_name_starts_at - $this->text_starts_at; $this->tag_name_starts_at = $tag_name_starts_at; $this->tag_name_length = $tag_name_length; $this->attributes = $attributes; $this->duplicate_attributes = $duplicate_attributes; return true; } /** * Whether the processor paused because the input HTML document ended * in the middle of a syntax element, such as in the middle of a tag. * * Example: * * $processor = new WP_HTML_Tag_Processor( '" ); * $p->next_tag(); * foreach ( $p->class_list() as $class_name ) { * echo "{$class_name} "; * } * // Outputs: "free lang-en " * * @since 6.4.0 */ public function class_list() { if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { return; } /** @var string $class contains the string value of the class attribute, with character references decoded. */ $class = $this->get_attribute( 'class' ); if ( ! is_string( $class ) ) { return; } $seen = array(); $is_quirks = self::QUIRKS_MODE === $this->compat_mode; $at = 0; while ( $at < strlen( $class ) ) { // Skip past any initial boundary characters. $at += strspn( $class, " \t\f\r\n", $at ); if ( $at >= strlen( $class ) ) { return; } // Find the byte length until the next boundary. $length = strcspn( $class, " \t\f\r\n", $at ); if ( 0 === $length ) { return; } $name = str_replace( "\x00", "\u{FFFD}", substr( $class, $at, $length ) ); if ( $is_quirks ) { $name = strtolower( $name ); } $at += $length; /* * It's expected that the number of class names for a given tag is relatively small. * Given this, it is probably faster overall to scan an array for a value rather * than to use the class name as a key and check if it's a key of $seen. */ if ( in_array( $name, $seen, true ) ) { continue; } $seen[] = $name; yield $name; } } /** * Returns if a matched tag contains the given ASCII case-insensitive class name. * * @since 6.4.0 * * @param string $wanted_class Look for this CSS class name, ASCII case-insensitive. * @return bool|null Whether the matched tag contains the given class name, or null if not matched. */ public function has_class( $wanted_class ): ?bool { if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { return null; } $case_insensitive = self::QUIRKS_MODE === $this->compat_mode; $wanted_length = strlen( $wanted_class ); foreach ( $this->class_list() as $class_name ) { if ( strlen( $class_name ) === $wanted_length && 0 === substr_compare( $class_name, $wanted_class, 0, strlen( $wanted_class ), $case_insensitive ) ) { return true; } } return false; } /** * Sets a bookmark in the HTML document. * * Bookmarks represent specific places or tokens in the HTML * document, such as a tag opener or closer. When applying * edits to a document, such as setting an attribute, the * text offsets of that token may shift; the bookmark is * kept updated with those shifts and remains stable unless * the entire span of text in which the token sits is removed. * * Release bookmarks when they are no longer needed. * * Example: * *

Surprising fact you may not know!

* ^ ^ * \-|-- this `H2` opener bookmark tracks the token * *

Surprising fact you may no… * ^ ^ * \-|-- it shifts with edits * * Bookmarks provide the ability to seek to a previously-scanned * place in the HTML document. This avoids the need to re-scan * the entire document. * * Example: * *
  • One
  • Two
  • Three
* ^^^^ * want to note this last item * * $p = new WP_HTML_Tag_Processor( $html ); * $in_list = false; * while ( $p->next_tag( array( 'tag_closers' => $in_list ? 'visit' : 'skip' ) ) ) { * if ( 'UL' === $p->get_tag() ) { * if ( $p->is_tag_closer() ) { * $in_list = false; * $p->set_bookmark( 'resume' ); * if ( $p->seek( 'last-li' ) ) { * $p->add_class( 'last-li' ); * } * $p->seek( 'resume' ); * $p->release_bookmark( 'last-li' ); * $p->release_bookmark( 'resume' ); * } else { * $in_list = true; * } * } * * if ( 'LI' === $p->get_tag() ) { * $p->set_bookmark( 'last-li' ); * } * } * * Bookmarks intentionally hide the internal string offsets * to which they refer. They are maintained internally as * updates are applied to the HTML document and therefore * retain their "position" - the location to which they * originally pointed. The inability to use bookmarks with * functions like `substr` is therefore intentional to guard * against accidentally breaking the HTML. * * Because bookmarks allocate memory and require processing * for every applied update, they are limited and require * a name. They should not be created with programmatically-made * names, such as "li_{$index}" with some loop. As a general * rule they should only be created with string-literal names * like "start-of-section" or "last-paragraph". * * Bookmarks are a powerful tool to enable complicated behavior. * Consider double-checking that you need this tool if you are * reaching for it, as inappropriate use could lead to broken * HTML structure or unwanted processing overhead. * * @since 6.2.0 * * @param string $name Identifies this particular bookmark. * @return bool Whether the bookmark was successfully created. */ public function set_bookmark( $name ): bool { // It only makes sense to set a bookmark if the parser has paused on a concrete token. if ( self::STATE_COMPLETE === $this->parser_state || self::STATE_INCOMPLETE_INPUT === $this->parser_state ) { return false; } if ( ! array_key_exists( $name, $this->bookmarks ) && count( $this->bookmarks ) >= static::MAX_BOOKMARKS ) { _doing_it_wrong( __METHOD__, __( 'Too many bookmarks: cannot create any more.' ), '6.2.0' ); return false; } $this->bookmarks[ $name ] = new WP_HTML_Span( $this->token_starts_at, $this->token_length ); return true; } /** * Removes a bookmark that is no longer needed. * * Releasing a bookmark frees up the small * performance overhead it requires. * * @param string $name Name of the bookmark to remove. * @return bool Whether the bookmark already existed before removal. */ public function release_bookmark( $name ): bool { if ( ! array_key_exists( $name, $this->bookmarks ) ) { return false; } unset( $this->bookmarks[ $name ] ); return true; } /** * Skips contents of generic rawtext elements. * * @since 6.3.2 * * @see https://html.spec.whatwg.org/#generic-raw-text-element-parsing-algorithm * * @param string $tag_name The uppercase tag name which will close the RAWTEXT region. * @return bool Whether an end to the RAWTEXT region was found before the end of the document. */ private function skip_rawtext( string $tag_name ): bool { /* * These two functions distinguish themselves on whether character references are * decoded, and since functionality to read the inner markup isn't supported, it's * not necessary to implement these two functions separately. */ return $this->skip_rcdata( $tag_name ); } /** * Skips contents of RCDATA elements, namely title and textarea tags. * * @since 6.2.0 * * @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state * * @param string $tag_name The uppercase tag name which will close the RCDATA region. * @return bool Whether an end to the RCDATA region was found before the end of the document. */ private function skip_rcdata( string $tag_name ): bool { $html = $this->html; $doc_length = strlen( $html ); $tag_length = strlen( $tag_name ); $at = $this->bytes_already_parsed; while ( false !== $at && $at < $doc_length ) { $at = strpos( $this->html, 'tag_name_starts_at = $at; // Fail if there is no possible tag closer. if ( false === $at || ( $at + $tag_length ) >= $doc_length ) { return false; } $at += 2; /* * Find a case-insensitive match to the tag name. * * Because tag names are limited to US-ASCII there is no * need to perform any kind of Unicode normalization when * comparing; any character which could be impacted by such * normalization could not be part of a tag name. */ for ( $i = 0; $i < $tag_length; $i++ ) { $tag_char = $tag_name[ $i ]; $html_char = $html[ $at + $i ]; if ( $html_char !== $tag_char && strtoupper( $html_char ) !== $tag_char ) { $at += $i; continue 2; } } $at += $tag_length; $this->bytes_already_parsed = $at; if ( $at >= strlen( $html ) ) { return false; } /* * Ensure that the tag name terminates to avoid matching on * substrings of a longer tag name. For example, the sequence * "' !== $c ) { continue; } while ( $this->parse_next_attribute() ) { continue; } $at = $this->bytes_already_parsed; if ( $at >= strlen( $this->html ) ) { return false; } if ( '>' === $html[ $at ] ) { $this->bytes_already_parsed = $at + 1; return true; } if ( $at + 1 >= strlen( $this->html ) ) { return false; } if ( '/' === $html[ $at ] && '>' === $html[ $at + 1 ] ) { $this->bytes_already_parsed = $at + 2; return true; } } return false; } /** * Skips contents of script tags. * * @since 6.2.0 * * @return bool Whether the script tag was closed before the end of the document. */ private function skip_script_data(): bool { $state = 'unescaped'; $html = $this->html; $doc_length = strlen( $html ); $at = $this->bytes_already_parsed; while ( false !== $at && $at < $doc_length ) { $at += strcspn( $html, '-<', $at ); /* * For all script states a "-->" transitions * back into the normal unescaped script mode, * even if that's the current state. */ if ( $at + 2 < $doc_length && '-' === $html[ $at ] && '-' === $html[ $at + 1 ] && '>' === $html[ $at + 2 ] ) { $at += 3; $state = 'unescaped'; continue; } if ( $at + 1 >= $doc_length ) { return false; } /* * Everything of interest past here starts with "<". * Check this character and advance position regardless. */ if ( '<' !== $html[ $at++ ] ) { continue; } /* * Unlike with "-->", the "`. Unlike other comment * and bogus comment syntax, these leave no clear insertion point for text and * they need to be modified specially in order to contain text. E.g. to store * `?` as the modifiable text, the `` needs to become ``, which * involves inserting an additional `-` into the token after the modifiable text. */ $this->parser_state = self::STATE_COMMENT; $this->comment_type = self::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT; $this->token_length = $closer_at + $span_of_dashes + 1 - $this->token_starts_at; // Only provide modifiable text if the token is long enough to contain it. if ( $span_of_dashes >= 2 ) { $this->comment_type = self::COMMENT_AS_HTML_COMMENT; $this->text_starts_at = $this->token_starts_at + 4; $this->text_length = $span_of_dashes - 2; } $this->bytes_already_parsed = $closer_at + $span_of_dashes + 1; return true; } /* * Comments may be closed by either a --> or an invalid --!>. * The first occurrence closes the comment. * * See https://html.spec.whatwg.org/#parse-error-incorrectly-closed-comment */ --$closer_at; // Pre-increment inside condition below reduces risk of accidental infinite looping. while ( ++$closer_at < $doc_length ) { $closer_at = strpos( $html, '--', $closer_at ); if ( false === $closer_at ) { $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } if ( $closer_at + 2 < $doc_length && '>' === $html[ $closer_at + 2 ] ) { $this->parser_state = self::STATE_COMMENT; $this->comment_type = self::COMMENT_AS_HTML_COMMENT; $this->token_length = $closer_at + 3 - $this->token_starts_at; $this->text_starts_at = $this->token_starts_at + 4; $this->text_length = $closer_at - $this->text_starts_at; $this->bytes_already_parsed = $closer_at + 3; return true; } if ( $closer_at + 3 < $doc_length && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) { $this->parser_state = self::STATE_COMMENT; $this->comment_type = self::COMMENT_AS_HTML_COMMENT; $this->token_length = $closer_at + 4 - $this->token_starts_at; $this->text_starts_at = $this->token_starts_at + 4; $this->text_length = $closer_at - $this->text_starts_at; $this->bytes_already_parsed = $closer_at + 4; return true; } } } /* * ` * These are ASCII-case-insensitive. * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state */ if ( $doc_length > $at + 8 && ( 'D' === $html[ $at + 2 ] || 'd' === $html[ $at + 2 ] ) && ( 'O' === $html[ $at + 3 ] || 'o' === $html[ $at + 3 ] ) && ( 'C' === $html[ $at + 4 ] || 'c' === $html[ $at + 4 ] ) && ( 'T' === $html[ $at + 5 ] || 't' === $html[ $at + 5 ] ) && ( 'Y' === $html[ $at + 6 ] || 'y' === $html[ $at + 6 ] ) && ( 'P' === $html[ $at + 7 ] || 'p' === $html[ $at + 7 ] ) && ( 'E' === $html[ $at + 8 ] || 'e' === $html[ $at + 8 ] ) ) { $closer_at = strpos( $html, '>', $at + 9 ); if ( false === $closer_at ) { $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } $this->parser_state = self::STATE_DOCTYPE; $this->token_length = $closer_at + 1 - $this->token_starts_at; $this->text_starts_at = $this->token_starts_at + 9; $this->text_length = $closer_at - $this->text_starts_at; $this->bytes_already_parsed = $closer_at + 1; return true; } if ( 'html' !== $this->parsing_namespace && strlen( $html ) > $at + 8 && '[' === $html[ $at + 2 ] && 'C' === $html[ $at + 3 ] && 'D' === $html[ $at + 4 ] && 'A' === $html[ $at + 5 ] && 'T' === $html[ $at + 6 ] && 'A' === $html[ $at + 7 ] && '[' === $html[ $at + 8 ] ) { $closer_at = strpos( $html, ']]>', $at + 9 ); if ( false === $closer_at ) { $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } $this->parser_state = self::STATE_CDATA_NODE; $this->text_starts_at = $at + 9; $this->text_length = $closer_at - $this->text_starts_at; $this->token_length = $closer_at + 3 - $this->token_starts_at; $this->bytes_already_parsed = $closer_at + 3; return true; } /* * Anything else here is an incorrectly-opened comment and transitions * to the bogus comment state - skip to the nearest >. If no closer is * found then the HTML was truncated inside the markup declaration. */ $closer_at = strpos( $html, '>', $at + 1 ); if ( false === $closer_at ) { $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } $this->parser_state = self::STATE_COMMENT; $this->comment_type = self::COMMENT_AS_INVALID_HTML; $this->token_length = $closer_at + 1 - $this->token_starts_at; $this->text_starts_at = $this->token_starts_at + 2; $this->text_length = $closer_at - $this->text_starts_at; $this->bytes_already_parsed = $closer_at + 1; /* * Identify nodes that would be CDATA if HTML had CDATA sections. * * This section must occur after identifying the bogus comment end * because in an HTML parser it will span to the nearest `>`, even * if there's no `]]>` as would be required in an XML document. It * is therefore not possible to parse a CDATA section containing * a `>` in the HTML syntax. * * Inside foreign elements there is a discrepancy between browsers * and the specification on this. * * @todo Track whether the Tag Processor is inside a foreign element * and require the proper closing `]]>` in those cases. */ if ( $this->token_length >= 10 && '[' === $html[ $this->token_starts_at + 2 ] && 'C' === $html[ $this->token_starts_at + 3 ] && 'D' === $html[ $this->token_starts_at + 4 ] && 'A' === $html[ $this->token_starts_at + 5 ] && 'T' === $html[ $this->token_starts_at + 6 ] && 'A' === $html[ $this->token_starts_at + 7 ] && '[' === $html[ $this->token_starts_at + 8 ] && ']' === $html[ $closer_at - 1 ] && ']' === $html[ $closer_at - 2 ] ) { $this->parser_state = self::STATE_COMMENT; $this->comment_type = self::COMMENT_AS_CDATA_LOOKALIKE; $this->text_starts_at += 7; $this->text_length -= 9; } return true; } /* * is a missing end tag name, which is ignored. * * This was also known as the "presumptuous empty tag" * in early discussions as it was proposed to close * the nearest previous opening tag. * * See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name */ if ( '>' === $html[ $at + 1 ] ) { // `<>` is interpreted as plaintext. if ( ! $this->is_closing_tag ) { ++$at; continue; } $this->parser_state = self::STATE_PRESUMPTUOUS_TAG; $this->token_length = $at + 2 - $this->token_starts_at; $this->bytes_already_parsed = $at + 2; return true; } /* * ` * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state */ if ( ! $this->is_closing_tag && '?' === $html[ $at + 1 ] ) { $closer_at = strpos( $html, '>', $at + 2 ); if ( false === $closer_at ) { $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } $this->parser_state = self::STATE_COMMENT; $this->comment_type = self::COMMENT_AS_INVALID_HTML; $this->token_length = $closer_at + 1 - $this->token_starts_at; $this->text_starts_at = $this->token_starts_at + 2; $this->text_length = $closer_at - $this->text_starts_at; $this->bytes_already_parsed = $closer_at + 1; /* * Identify a Processing Instruction node were HTML to have them. * * This section must occur after identifying the bogus comment end * because in an HTML parser it will span to the nearest `>`, even * if there's no `?>` as would be required in an XML document. It * is therefore not possible to parse a Processing Instruction node * containing a `>` in the HTML syntax. * * XML allows for more target names, but this code only identifies * those with ASCII-representable target names. This means that it * may identify some Processing Instruction nodes as bogus comments, * but it will not misinterpret the HTML structure. By limiting the * identification to these target names the Tag Processor can avoid * the need to start parsing UTF-8 sequences. * * > NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | * [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | * [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | * [#x10000-#xEFFFF] * > NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] * * @todo Processing instruction nodes in SGML may contain any kind of markup. XML defines a * special case with `` syntax, but the `?` is part of the bogus comment. * * @see https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PITarget */ if ( $this->token_length >= 5 && '?' === $html[ $closer_at - 1 ] ) { $comment_text = substr( $html, $this->token_starts_at + 2, $this->token_length - 4 ); $pi_target_length = strspn( $comment_text, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ:_' ); if ( 0 < $pi_target_length ) { $pi_target_length += strspn( $comment_text, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:_-.', $pi_target_length ); $this->comment_type = self::COMMENT_AS_PI_NODE_LOOKALIKE; $this->tag_name_starts_at = $this->token_starts_at + 2; $this->tag_name_length = $pi_target_length; $this->text_starts_at += $pi_target_length; $this->text_length -= $pi_target_length + 1; } } return true; } /* * If a non-alpha starts the tag name in a tag closer it's a comment. * Find the first `>`, which closes the comment. * * This parser classifies these particular comments as special "funky comments" * which are made available for further processing. * * See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name */ if ( $this->is_closing_tag ) { // No chance of finding a closer. if ( $at + 3 > $doc_length ) { $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } $closer_at = strpos( $html, '>', $at + 2 ); if ( false === $closer_at ) { $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } $this->parser_state = self::STATE_FUNKY_COMMENT; $this->token_length = $closer_at + 1 - $this->token_starts_at; $this->text_starts_at = $this->token_starts_at + 2; $this->text_length = $closer_at - $this->text_starts_at; $this->bytes_already_parsed = $closer_at + 1; return true; } ++$at; } /* * This does not imply an incomplete parse; it indicates that there * can be nothing left in the document other than a #text node. */ $this->parser_state = self::STATE_TEXT_NODE; $this->token_starts_at = $was_at; $this->token_length = $doc_length - $was_at; $this->text_starts_at = $was_at; $this->text_length = $this->token_length; $this->bytes_already_parsed = $doc_length; return true; } /** * Parses the next attribute. * * @since 6.2.0 * * @return bool Whether an attribute was found before the end of the document. */ private function parse_next_attribute(): bool { $doc_length = strlen( $this->html ); // Skip whitespace and slashes. $this->bytes_already_parsed += strspn( $this->html, " \t\f\r\n/", $this->bytes_already_parsed ); if ( $this->bytes_already_parsed >= $doc_length ) { $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } /* * Treat the equal sign as a part of the attribute * name if it is the first encountered byte. * * @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state */ $name_length = '=' === $this->html[ $this->bytes_already_parsed ] ? 1 + strcspn( $this->html, "=/> \t\f\r\n", $this->bytes_already_parsed + 1 ) : strcspn( $this->html, "=/> \t\f\r\n", $this->bytes_already_parsed ); // No attribute, just tag closer. if ( 0 === $name_length || $this->bytes_already_parsed + $name_length >= $doc_length ) { return false; } $attribute_start = $this->bytes_already_parsed; $attribute_name = substr( $this->html, $attribute_start, $name_length ); $this->bytes_already_parsed += $name_length; if ( $this->bytes_already_parsed >= $doc_length ) { $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } $this->skip_whitespace(); if ( $this->bytes_already_parsed >= $doc_length ) { $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } $has_value = '=' === $this->html[ $this->bytes_already_parsed ]; if ( $has_value ) { ++$this->bytes_already_parsed; $this->skip_whitespace(); if ( $this->bytes_already_parsed >= $doc_length ) { $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } switch ( $this->html[ $this->bytes_already_parsed ] ) { case "'": case '"': $quote = $this->html[ $this->bytes_already_parsed ]; $value_start = $this->bytes_already_parsed + 1; $end_quote_at = strpos( $this->html, $quote, $value_start ); $end_quote_at = false === $end_quote_at ? $doc_length : $end_quote_at; $value_length = $end_quote_at - $value_start; $attribute_end = $end_quote_at + 1; $this->bytes_already_parsed = $attribute_end; break; default: $value_start = $this->bytes_already_parsed; $value_length = strcspn( $this->html, "> \t\f\r\n", $value_start ); $attribute_end = $value_start + $value_length; $this->bytes_already_parsed = $attribute_end; } } else { $value_start = $this->bytes_already_parsed; $value_length = 0; $attribute_end = $attribute_start + $name_length; } if ( $attribute_end >= $doc_length ) { $this->parser_state = self::STATE_INCOMPLETE_INPUT; return false; } if ( $this->is_closing_tag ) { return true; } /* * > There must never be two or more attributes on * > the same start tag whose names are an ASCII * > case-insensitive match for each other. * - HTML 5 spec * * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive */ $comparable_name = strtolower( $attribute_name ); // If an attribute is listed many times, only use the first declaration and ignore the rest. if ( ! isset( $this->attributes[ $comparable_name ] ) ) { $this->attributes[ $comparable_name ] = new WP_HTML_Attribute_Token( $attribute_name, $value_start, $value_length, $attribute_start, $attribute_end - $attribute_start, ! $has_value ); return true; } /* * Track the duplicate attributes so if we remove it, all disappear together. * * While `$this->duplicated_attributes` could always be stored as an `array()`, * which would simplify the logic here, storing a `null` and only allocating * an array when encountering duplicates avoids needless allocations in the * normative case of parsing tags with no duplicate attributes. */ $duplicate_span = new WP_HTML_Span( $attribute_start, $attribute_end - $attribute_start ); if ( null === $this->duplicate_attributes ) { $this->duplicate_attributes = array( $comparable_name => array( $duplicate_span ) ); } elseif ( ! isset( $this->duplicate_attributes[ $comparable_name ] ) ) { $this->duplicate_attributes[ $comparable_name ] = array( $duplicate_span ); } else { $this->duplicate_attributes[ $comparable_name ][] = $duplicate_span; } return true; } /** * Move the internal cursor past any immediate successive whitespace. * * @since 6.2.0 */ private function skip_whitespace(): void { $this->bytes_already_parsed += strspn( $this->html, " \t\f\r\n", $this->bytes_already_parsed ); } /** * Applies attribute updates and cleans up once a tag is fully parsed. * * @since 6.2.0 */ private function after_tag(): void { /* * There could be lexical updates enqueued for an attribute that * also exists on the next tag. In order to avoid conflating the * attributes across the two tags, lexical updates with names * need to be flushed to raw lexical updates. */ $this->class_name_updates_to_attributes_updates(); /* * Purge updates if there are too many. The actual count isn't * scientific, but a few values from 100 to a few thousand were * tests to find a practically-useful limit. * * If the update queue grows too big, then the Tag Processor * will spend more time iterating through them and lose the * efficiency gains of deferring applying them. */ if ( 1000 < count( $this->lexical_updates ) ) { $this->get_updated_html(); } foreach ( $this->lexical_updates as $name => $update ) { /* * Any updates appearing after the cursor should be applied * before proceeding, otherwise they may be overlooked. */ if ( $update->start >= $this->bytes_already_parsed ) { $this->get_updated_html(); break; } if ( is_int( $name ) ) { continue; } $this->lexical_updates[] = $update; unset( $this->lexical_updates[ $name ] ); } $this->token_starts_at = null; $this->token_length = null; $this->tag_name_starts_at = null; $this->tag_name_length = null; $this->text_starts_at = 0; $this->text_length = 0; $this->is_closing_tag = null; $this->attributes = array(); $this->comment_type = null; $this->text_node_classification = self::TEXT_IS_GENERIC; $this->duplicate_attributes = null; } /** * Converts class name updates into tag attributes updates * (they are accumulated in different data formats for performance). * * @since 6.2.0 * * @see WP_HTML_Tag_Processor::$lexical_updates * @see WP_HTML_Tag_Processor::$classname_updates */ private function class_name_updates_to_attributes_updates(): void { if ( count( $this->classname_updates ) === 0 ) { return; } $existing_class = $this->get_enqueued_attribute_value( 'class' ); if ( null === $existing_class || true === $existing_class ) { $existing_class = ''; } if ( false === $existing_class && isset( $this->attributes['class'] ) ) { $existing_class = substr( $this->html, $this->attributes['class']->value_starts_at, $this->attributes['class']->value_length ); } if ( false === $existing_class ) { $existing_class = ''; } /** * Updated "class" attribute value. * * This is incrementally built while scanning through the existing class * attribute, skipping removed classes on the way, and then appending * added classes at the end. Only when finished processing will the * value contain the final new value. * @var string $class */ $class = ''; /** * Tracks the cursor position in the existing * class attribute value while parsing. * * @var int $at */ $at = 0; /** * Indicates if there's any need to modify the existing class attribute. * * If a call to `add_class()` and `remove_class()` wouldn't impact * the `class` attribute value then there's no need to rebuild it. * For example, when adding a class that's already present or * removing one that isn't. * * This flag enables a performance optimization when none of the enqueued * class updates would impact the `class` attribute; namely, that the * processor can continue without modifying the input document, as if * none of the `add_class()` or `remove_class()` calls had been made. * * This flag is set upon the first change that requires a string update. * * @var bool $modified */ $modified = false; $seen = array(); $to_remove = array(); $is_quirks = self::QUIRKS_MODE === $this->compat_mode; if ( $is_quirks ) { foreach ( $this->classname_updates as $updated_name => $action ) { if ( self::REMOVE_CLASS === $action ) { $to_remove[] = strtolower( $updated_name ); } } } else { foreach ( $this->classname_updates as $updated_name => $action ) { if ( self::REMOVE_CLASS === $action ) { $to_remove[] = $updated_name; } } } // Remove unwanted classes by only copying the new ones. $existing_class_length = strlen( $existing_class ); while ( $at < $existing_class_length ) { // Skip to the first non-whitespace character. $ws_at = $at; $ws_length = strspn( $existing_class, " \t\f\r\n", $ws_at ); $at += $ws_length; // Capture the class name – it's everything until the next whitespace. $name_length = strcspn( $existing_class, " \t\f\r\n", $at ); if ( 0 === $name_length ) { // If no more class names are found then that's the end. break; } $name = substr( $existing_class, $at, $name_length ); $comparable_class_name = $is_quirks ? strtolower( $name ) : $name; $at += $name_length; // If this class is marked for removal, remove it and move on to the next one. if ( in_array( $comparable_class_name, $to_remove, true ) ) { $modified = true; continue; } // If a class has already been seen then skip it; it should not be added twice. if ( in_array( $comparable_class_name, $seen, true ) ) { continue; } $seen[] = $comparable_class_name; /* * Otherwise, append it to the new "class" attribute value. * * There are options for handling whitespace between tags. * Preserving the existing whitespace produces fewer changes * to the HTML content and should clarify the before/after * content when debugging the modified output. * * This approach contrasts normalizing the inter-class * whitespace to a single space, which might appear cleaner * in the output HTML but produce a noisier change. */ if ( '' !== $class ) { $class .= substr( $existing_class, $ws_at, $ws_length ); } $class .= $name; } // Add new classes by appending those which haven't already been seen. foreach ( $this->classname_updates as $name => $operation ) { $comparable_name = $is_quirks ? strtolower( $name ) : $name; if ( self::ADD_CLASS === $operation && ! in_array( $comparable_name, $seen, true ) ) { $modified = true; $class .= strlen( $class ) > 0 ? ' ' : ''; $class .= $name; } } $this->classname_updates = array(); if ( ! $modified ) { return; } if ( strlen( $class ) > 0 ) { $this->set_attribute( 'class', $class ); } else { $this->remove_attribute( 'class' ); } } /** * Applies attribute updates to HTML document. * * @since 6.2.0 * @since 6.2.1 Accumulates shift for internal cursor and passed pointer. * @since 6.3.0 Invalidate any bookmarks whose targets are overwritten. * * @param int $shift_this_point Accumulate and return shift for this position. * @return int How many bytes the given pointer moved in response to the updates. */ private function apply_attributes_updates( int $shift_this_point ): int { if ( ! count( $this->lexical_updates ) ) { return 0; } $accumulated_shift_for_given_point = 0; /* * Attribute updates can be enqueued in any order but updates * to the document must occur in lexical order; that is, each * replacement must be made before all others which follow it * at later string indices in the input document. * * Sorting avoid making out-of-order replacements which * can lead to mangled output, partially-duplicated * attributes, and overwritten attributes. */ usort( $this->lexical_updates, array( self::class, 'sort_start_ascending' ) ); $bytes_already_copied = 0; $output_buffer = ''; foreach ( $this->lexical_updates as $diff ) { $shift = strlen( $diff->text ) - $diff->length; // Adjust the cursor position by however much an update affects it. if ( $diff->start < $this->bytes_already_parsed ) { $this->bytes_already_parsed += $shift; } // Accumulate shift of the given pointer within this function call. if ( $diff->start < $shift_this_point ) { $accumulated_shift_for_given_point += $shift; } $output_buffer .= substr( $this->html, $bytes_already_copied, $diff->start - $bytes_already_copied ); $output_buffer .= $diff->text; $bytes_already_copied = $diff->start + $diff->length; } $this->html = $output_buffer . substr( $this->html, $bytes_already_copied ); /* * Adjust bookmark locations to account for how the text * replacements adjust offsets in the input document. */ foreach ( $this->bookmarks as $bookmark_name => $bookmark ) { $bookmark_end = $bookmark->start + $bookmark->length; /* * Each lexical update which appears before the bookmark's endpoints * might shift the offsets for those endpoints. Loop through each change * and accumulate the total shift for each bookmark, then apply that * shift after tallying the full delta. */ $head_delta = 0; $tail_delta = 0; foreach ( $this->lexical_updates as $diff ) { $diff_end = $diff->start + $diff->length; if ( $bookmark->start < $diff->start && $bookmark_end < $diff->start ) { break; } if ( $bookmark->start >= $diff->start && $bookmark_end < $diff_end ) { $this->release_bookmark( $bookmark_name ); continue 2; } $delta = strlen( $diff->text ) - $diff->length; if ( $bookmark->start >= $diff->start ) { $head_delta += $delta; } if ( $bookmark_end >= $diff_end ) { $tail_delta += $delta; } } $bookmark->start += $head_delta; $bookmark->length += $tail_delta - $head_delta; } $this->lexical_updates = array(); return $accumulated_shift_for_given_point; } /** * Checks whether a bookmark with the given name exists. * * @since 6.3.0 * * @param string $bookmark_name Name to identify a bookmark that potentially exists. * @return bool Whether that bookmark exists. */ public function has_bookmark( $bookmark_name ): bool { return array_key_exists( $bookmark_name, $this->bookmarks ); } /** * Move the internal cursor in the Tag Processor to a given bookmark's location. * * In order to prevent accidental infinite loops, there's a * maximum limit on the number of times seek() can be called. * * @since 6.2.0 * * @param string $bookmark_name Jump to the place in the document identified by this bookmark name. * @return bool Whether the internal cursor was successfully moved to the bookmark's location. */ public function seek( $bookmark_name ): bool { if ( ! array_key_exists( $bookmark_name, $this->bookmarks ) ) { _doing_it_wrong( __METHOD__, __( 'Unknown bookmark name.' ), '6.2.0' ); return false; } if ( ++$this->seek_count > static::MAX_SEEK_OPS ) { _doing_it_wrong( __METHOD__, __( 'Too many calls to seek() - this can lead to performance issues.' ), '6.2.0' ); return false; } // Flush out any pending updates to the document. $this->get_updated_html(); // Point this tag processor before the sought tag opener and consume it. $this->bytes_already_parsed = $this->bookmarks[ $bookmark_name ]->start; $this->parser_state = self::STATE_READY; return $this->next_token(); } /** * Compare two WP_HTML_Text_Replacement objects. * * @since 6.2.0 * * @param WP_HTML_Text_Replacement $a First attribute update. * @param WP_HTML_Text_Replacement $b Second attribute update. * @return int Comparison value for string order. */ private static function sort_start_ascending( WP_HTML_Text_Replacement $a, WP_HTML_Text_Replacement $b ): int { $by_start = $a->start - $b->start; if ( 0 !== $by_start ) { return $by_start; } $by_text = isset( $a->text, $b->text ) ? strcmp( $a->text, $b->text ) : 0; if ( 0 !== $by_text ) { return $by_text; } /* * This code should be unreachable, because it implies the two replacements * start at the same location and contain the same text. */ return $a->length - $b->length; } /** * Return the enqueued value for a given attribute, if one exists. * * Enqueued updates can take different data types: * - If an update is enqueued and is boolean, the return will be `true` * - If an update is otherwise enqueued, the return will be the string value of that update. * - If an attribute is enqueued to be removed, the return will be `null` to indicate that. * - If no updates are enqueued, the return will be `false` to differentiate from "removed." * * @since 6.2.0 * * @param string $comparable_name The attribute name in its comparable form. * @return string|boolean|null Value of enqueued update if present, otherwise false. */ private function get_enqueued_attribute_value( string $comparable_name ) { if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { return false; } if ( ! isset( $this->lexical_updates[ $comparable_name ] ) ) { return false; } $enqueued_text = $this->lexical_updates[ $comparable_name ]->text; // Removed attributes erase the entire span. if ( '' === $enqueued_text ) { return null; } /* * Boolean attribute updates are just the attribute name without a corresponding value. * * This value might differ from the given comparable name in that there could be leading * or trailing whitespace, and that the casing follows the name given in `set_attribute`. * * Example: * * $p->set_attribute( 'data-TEST-id', 'update' ); * 'update' === $p->get_enqueued_attribute_value( 'data-test-id' ); * * Detect this difference based on the absence of the `=`, which _must_ exist in any * attribute containing a value, e.g. ``. * ¹ ² * 1. Attribute with a string value. * 2. Boolean attribute whose value is `true`. */ $equals_at = strpos( $enqueued_text, '=' ); if ( false === $equals_at ) { return true; } /* * Finally, a normal update's value will appear after the `=` and * be double-quoted, as performed incidentally by `set_attribute`. * * e.g. `type="text"` * ¹² ³ * 1. Equals is here. * 2. Double-quoting starts one after the equals sign. * 3. Double-quoting ends at the last character in the update. */ $enqueued_value = substr( $enqueued_text, $equals_at + 2, -1 ); return WP_HTML_Decoder::decode_attribute( $enqueued_value ); } /** * Returns the value of a requested attribute from a matched tag opener if that attribute exists. * * Example: * * $p = new WP_HTML_Tag_Processor( '
Test
' ); * $p->next_tag( array( 'class_name' => 'test' ) ) === true; * $p->get_attribute( 'data-test-id' ) === '14'; * $p->get_attribute( 'enabled' ) === true; * $p->get_attribute( 'aria-label' ) === null; * * $p->next_tag() === false; * $p->get_attribute( 'class' ) === null; * * @since 6.2.0 * * @param string $name Name of attribute whose value is requested. * @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`. */ public function get_attribute( $name ) { if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { return null; } $comparable = strtolower( $name ); /* * For every attribute other than `class` it's possible to perform a quick check if * there's an enqueued lexical update whose value takes priority over what's found in * the input document. * * The `class` attribute is special though because of the exposed helpers `add_class` * and `remove_class`. These form a builder for the `class` attribute, so an additional * check for enqueued class changes is required in addition to the check for any enqueued * attribute values. If any exist, those enqueued class changes must first be flushed out * into an attribute value update. */ if ( 'class' === $name ) { $this->class_name_updates_to_attributes_updates(); } // Return any enqueued attribute value updates if they exist. $enqueued_value = $this->get_enqueued_attribute_value( $comparable ); if ( false !== $enqueued_value ) { return $enqueued_value; } if ( ! isset( $this->attributes[ $comparable ] ) ) { return null; } $attribute = $this->attributes[ $comparable ]; /* * This flag distinguishes an attribute with no value * from an attribute with an empty string value. For * unquoted attributes this could look very similar. * It refers to whether an `=` follows the name. * * e.g.
* ¹ ² * 1. Attribute `boolean-attribute` is `true`. * 2. Attribute `empty-attribute` is `""`. */ if ( true === $attribute->is_true ) { return true; } $raw_value = substr( $this->html, $attribute->value_starts_at, $attribute->value_length ); return WP_HTML_Decoder::decode_attribute( $raw_value ); } /** * Gets lowercase names of all attributes matching a given prefix in the current tag. * * Note that matching is case-insensitive. This is in accordance with the spec: * * > There must never be two or more attributes on * > the same start tag whose names are an ASCII * > case-insensitive match for each other. * - HTML 5 spec * * Example: * * $p = new WP_HTML_Tag_Processor( '
Test
' ); * $p->next_tag( array( 'class_name' => 'test' ) ) === true; * $p->get_attribute_names_with_prefix( 'data-' ) === array( 'data-enabled', 'data-test-id' ); * * $p->next_tag() === false; * $p->get_attribute_names_with_prefix( 'data-' ) === null; * * @since 6.2.0 * * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive * * @param string $prefix Prefix of requested attribute names. * @return array|null List of attribute names, or `null` when no tag opener is matched. */ public function get_attribute_names_with_prefix( $prefix ): ?array { if ( self::STATE_MATCHED_TAG !== $this->parser_state || $this->is_closing_tag ) { return null; } $comparable = strtolower( $prefix ); $matches = array(); foreach ( array_keys( $this->attributes ) as $attr_name ) { if ( str_starts_with( $attr_name, $comparable ) ) { $matches[] = $attr_name; } } return $matches; } /** * Returns the namespace of the matched token. * * @since 6.7.0 * * @return string One of 'html', 'math', or 'svg'. */ public function get_namespace(): string { return $this->parsing_namespace; } /** * Returns the uppercase name of the matched tag. * * Example: * * $p = new WP_HTML_Tag_Processor( '
Test
' ); * $p->next_tag() === true; * $p->get_tag() === 'DIV'; * * $p->next_tag() === false; * $p->get_tag() === null; * * @since 6.2.0 * * @return string|null Name of currently matched tag in input HTML, or `null` if none found. */ public function get_tag(): ?string { if ( null === $this->tag_name_starts_at ) { return null; } $tag_name = substr( $this->html, $this->tag_name_starts_at, $this->tag_name_length ); if ( self::STATE_MATCHED_TAG === $this->parser_state ) { return strtoupper( $tag_name ); } if ( self::STATE_COMMENT === $this->parser_state && self::COMMENT_AS_PI_NODE_LOOKALIKE === $this->get_comment_type() ) { return $tag_name; } return null; } /** * Returns the adjusted tag name for a given token, taking into * account the current parsing context, whether HTML, SVG, or MathML. * * @since 6.7.0 * * @return string|null Name of current tag name. */ public function get_qualified_tag_name(): ?string { $tag_name = $this->get_tag(); if ( null === $tag_name ) { return null; } if ( 'html' === $this->get_namespace() ) { return $tag_name; } $lower_tag_name = strtolower( $tag_name ); if ( 'math' === $this->get_namespace() ) { return $lower_tag_name; } if ( 'svg' === $this->get_namespace() ) { switch ( $lower_tag_name ) { case 'altglyph': return 'altGlyph'; case 'altglyphdef': return 'altGlyphDef'; case 'altglyphitem': return 'altGlyphItem'; case 'animatecolor': return 'animateColor'; case 'animatemotion': return 'animateMotion'; case 'animatetransform': return 'animateTransform'; case 'clippath': return 'clipPath'; case 'feblend': return 'feBlend'; case 'fecolormatrix': return 'feColorMatrix'; case 'fecomponenttransfer': return 'feComponentTransfer'; case 'fecomposite': return 'feComposite'; case 'feconvolvematrix': return 'feConvolveMatrix'; case 'fediffuselighting': return 'feDiffuseLighting'; case 'fedisplacementmap': return 'feDisplacementMap'; case 'fedistantlight': return 'feDistantLight'; case 'fedropshadow': return 'feDropShadow'; case 'feflood': return 'feFlood'; case 'fefunca': return 'feFuncA'; case 'fefuncb': return 'feFuncB'; case 'fefuncg': return 'feFuncG'; case 'fefuncr': return 'feFuncR'; case 'fegaussianblur': return 'feGaussianBlur'; case 'feimage': return 'feImage'; case 'femerge': return 'feMerge'; case 'femergenode': return 'feMergeNode'; case 'femorphology': return 'feMorphology'; case 'feoffset': return 'feOffset'; case 'fepointlight': return 'fePointLight'; case 'fespecularlighting': return 'feSpecularLighting'; case 'fespotlight': return 'feSpotLight'; case 'fetile': return 'feTile'; case 'feturbulence': return 'feTurbulence'; case 'foreignobject': return 'foreignObject'; case 'glyphref': return 'glyphRef'; case 'lineargradient': return 'linearGradient'; case 'radialgradient': return 'radialGradient'; case 'textpath': return 'textPath'; default: return $lower_tag_name; } } // This unnecessary return prevents tools from inaccurately reporting type errors. return $tag_name; } /** * Returns the adjusted attribute name for a given attribute, taking into * account the current parsing context, whether HTML, SVG, or MathML. * * @since 6.7.0 * * @param string $attribute_name Which attribute to adjust. * * @return string|null */ public function get_qualified_attribute_name( $attribute_name ): ?string { if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { return null; } $namespace = $this->get_namespace(); $lower_name = strtolower( $attribute_name ); if ( 'math' === $namespace && 'definitionurl' === $lower_name ) { return 'definitionURL'; } if ( 'svg' === $this->get_namespace() ) { switch ( $lower_name ) { case 'attributename': return 'attributeName'; case 'attributetype': return 'attributeType'; case 'basefrequency': return 'baseFrequency'; case 'baseprofile': return 'baseProfile'; case 'calcmode': return 'calcMode'; case 'clippathunits': return 'clipPathUnits'; case 'diffuseconstant': return 'diffuseConstant'; case 'edgemode': return 'edgeMode'; case 'filterunits': return 'filterUnits'; case 'glyphref': return 'glyphRef'; case 'gradienttransform': return 'gradientTransform'; case 'gradientunits': return 'gradientUnits'; case 'kernelmatrix': return 'kernelMatrix'; case 'kernelunitlength': return 'kernelUnitLength'; case 'keypoints': return 'keyPoints'; case 'keysplines': return 'keySplines'; case 'keytimes': return 'keyTimes'; case 'lengthadjust': return 'lengthAdjust'; case 'limitingconeangle': return 'limitingConeAngle'; case 'markerheight': return 'markerHeight'; case 'markerunits': return 'markerUnits'; case 'markerwidth': return 'markerWidth'; case 'maskcontentunits': return 'maskContentUnits'; case 'maskunits': return 'maskUnits'; case 'numoctaves': return 'numOctaves'; case 'pathlength': return 'pathLength'; case 'patterncontentunits': return 'patternContentUnits'; case 'patterntransform': return 'patternTransform'; case 'patternunits': return 'patternUnits'; case 'pointsatx': return 'pointsAtX'; case 'pointsaty': return 'pointsAtY'; case 'pointsatz': return 'pointsAtZ'; case 'preservealpha': return 'preserveAlpha'; case 'preserveaspectratio': return 'preserveAspectRatio'; case 'primitiveunits': return 'primitiveUnits'; case 'refx': return 'refX'; case 'refy': return 'refY'; case 'repeatcount': return 'repeatCount'; case 'repeatdur': return 'repeatDur'; case 'requiredextensions': return 'requiredExtensions'; case 'requiredfeatures': return 'requiredFeatures'; case 'specularconstant': return 'specularConstant'; case 'specularexponent': return 'specularExponent'; case 'spreadmethod': return 'spreadMethod'; case 'startoffset': return 'startOffset'; case 'stddeviation': return 'stdDeviation'; case 'stitchtiles': return 'stitchTiles'; case 'surfacescale': return 'surfaceScale'; case 'systemlanguage': return 'systemLanguage'; case 'tablevalues': return 'tableValues'; case 'targetx': return 'targetX'; case 'targety': return 'targetY'; case 'textlength': return 'textLength'; case 'viewbox': return 'viewBox'; case 'viewtarget': return 'viewTarget'; case 'xchannelselector': return 'xChannelSelector'; case 'ychannelselector': return 'yChannelSelector'; case 'zoomandpan': return 'zoomAndPan'; } } if ( 'html' !== $namespace ) { switch ( $lower_name ) { case 'xlink:actuate': return 'xlink actuate'; case 'xlink:arcrole': return 'xlink arcrole'; case 'xlink:href': return 'xlink href'; case 'xlink:role': return 'xlink role'; case 'xlink:show': return 'xlink show'; case 'xlink:title': return 'xlink title'; case 'xlink:type': return 'xlink type'; case 'xml:lang': return 'xml lang'; case 'xml:space': return 'xml space'; case 'xmlns': return 'xmlns'; case 'xmlns:xlink': return 'xmlns xlink'; } } return $attribute_name; } /** * Indicates if the currently matched tag contains the self-closing flag. * * No HTML elements ought to have the self-closing flag and for those, the self-closing * flag will be ignored. For void elements this is benign because they "self close" * automatically. For non-void HTML elements though problems will appear if someone * intends to use a self-closing element in place of that element with an empty body. * For HTML foreign elements and custom elements the self-closing flag determines if * they self-close or not. * * This function does not determine if a tag is self-closing, * but only if the self-closing flag is present in the syntax. * * @since 6.3.0 * * @return bool Whether the currently matched tag contains the self-closing flag. */ public function has_self_closing_flag(): bool { if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { return false; } /* * The self-closing flag is the solidus at the _end_ of the tag, not the beginning. * * Example: * *
* ^ this appears one character before the end of the closing ">". */ return '/' === $this->html[ $this->token_starts_at + $this->token_length - 2 ]; } /** * Indicates if the current tag token is a tag closer. * * Example: * * $p = new WP_HTML_Tag_Processor( '
' ); * $p->next_tag( array( 'tag_name' => 'div', 'tag_closers' => 'visit' ) ); * $p->is_tag_closer() === false; * * $p->next_tag( array( 'tag_name' => 'div', 'tag_closers' => 'visit' ) ); * $p->is_tag_closer() === true; * * @since 6.2.0 * @since 6.7.0 Reports all BR tags as opening tags. * * @return bool Whether the current tag is a tag closer. */ public function is_tag_closer(): bool { return ( self::STATE_MATCHED_TAG === $this->parser_state && $this->is_closing_tag && /* * The BR tag can only exist as an opening tag. If something like `
` * appears then the HTML parser will treat it as an opening tag with no * attributes. The BR tag is unique in this way. * * @see https://html.spec.whatwg.org/#parsing-main-inbody */ 'BR' !== $this->get_tag() ); } /** * Indicates the kind of matched token, if any. * * This differs from `get_token_name()` in that it always * returns a static string indicating the type, whereas * `get_token_name()` may return values derived from the * token itself, such as a tag name or processing * instruction tag. * * Possible values: * - `#tag` when matched on a tag. * - `#text` when matched on a text node. * - `#cdata-section` when matched on a CDATA node. * - `#comment` when matched on a comment. * - `#doctype` when matched on a DOCTYPE declaration. * - `#presumptuous-tag` when matched on an empty tag closer. * - `#funky-comment` when matched on a funky comment. * * @since 6.5.0 * * @return string|null What kind of token is matched, or null. */ public function get_token_type(): ?string { switch ( $this->parser_state ) { case self::STATE_MATCHED_TAG: return '#tag'; case self::STATE_DOCTYPE: return '#doctype'; default: return $this->get_token_name(); } } /** * Returns the node name represented by the token. * * This matches the DOM API value `nodeName`. Some values * are static, such as `#text` for a text node, while others * are dynamically generated from the token itself. * * Dynamic names: * - Uppercase tag name for tag matches. * - `html` for DOCTYPE declarations. * * Note that if the Tag Processor is not matched on a token * then this function will return `null`, either because it * hasn't yet found a token or because it reached the end * of the document without matching a token. * * @since 6.5.0 * * @return string|null Name of the matched token. */ public function get_token_name(): ?string { switch ( $this->parser_state ) { case self::STATE_MATCHED_TAG: return $this->get_tag(); case self::STATE_TEXT_NODE: return '#text'; case self::STATE_CDATA_NODE: return '#cdata-section'; case self::STATE_COMMENT: return '#comment'; case self::STATE_DOCTYPE: return 'html'; case self::STATE_PRESUMPTUOUS_TAG: return '#presumptuous-tag'; case self::STATE_FUNKY_COMMENT: return '#funky-comment'; } return null; } /** * Indicates what kind of comment produced the comment node. * * Because there are different kinds of HTML syntax which produce * comments, the Tag Processor tracks and exposes this as a type * for the comment. Nominally only regular HTML comments exist as * they are commonly known, but a number of unrelated syntax errors * also produce comments. * * @see self::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT * @see self::COMMENT_AS_CDATA_LOOKALIKE * @see self::COMMENT_AS_INVALID_HTML * @see self::COMMENT_AS_HTML_COMMENT * @see self::COMMENT_AS_PI_NODE_LOOKALIKE * * @since 6.5.0 * * @return string|null */ public function get_comment_type(): ?string { if ( self::STATE_COMMENT !== $this->parser_state ) { return null; } return $this->comment_type; } /** * Returns the text of a matched comment or null if not on a comment type node. * * This method returns the entire text content of a comment node as it * would appear in the browser. * * This differs from {@see ::get_modifiable_text()} in that certain comment * types in the HTML API cannot allow their entire comment text content to * be modified. Namely, "bogus comments" of the form `` * will create a comment whose text content starts with `?`. Note that if * that character were modified, it would be possible to change the node * type. * * @since 6.7.0 * * @return string|null The comment text as it would appear in the browser or null * if not on a comment type node. */ public function get_full_comment_text(): ?string { if ( self::STATE_FUNKY_COMMENT === $this->parser_state ) { return $this->get_modifiable_text(); } if ( self::STATE_COMMENT !== $this->parser_state ) { return null; } switch ( $this->get_comment_type() ) { case self::COMMENT_AS_HTML_COMMENT: case self::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT: return $this->get_modifiable_text(); case self::COMMENT_AS_CDATA_LOOKALIKE: return "[CDATA[{$this->get_modifiable_text()}]]"; case self::COMMENT_AS_PI_NODE_LOOKALIKE: return "?{$this->get_tag()}{$this->get_modifiable_text()}?"; /* * This represents "bogus comments state" from HTML tokenization. * This can be entered by `html[ $this->text_starts_at - 1 ]; $comment_start = '?' === $preceding_character ? '?' : ''; return "{$comment_start}{$this->get_modifiable_text()}"; } return null; } /** * Subdivides a matched text node, splitting NULL byte sequences and decoded whitespace as * distinct nodes prefixes. * * Note that once anything that's neither a NULL byte nor decoded whitespace is * encountered, then the remainder of the text node is left intact as generic text. * * - The HTML Processor uses this to apply distinct rules for different kinds of text. * - Inter-element whitespace can be detected and skipped with this method. * * Text nodes aren't eagerly subdivided because there's no need to split them unless * decisions are being made on NULL byte sequences or whitespace-only text. * * Example: * * $processor = new WP_HTML_Tag_Processor( "\x00Apples & Oranges" ); * true === $processor->next_token(); // Text is "Apples & Oranges". * true === $processor->subdivide_text_appropriately(); // Text is "". * true === $processor->next_token(); // Text is "Apples & Oranges". * false === $processor->subdivide_text_appropriately(); * * $processor = new WP_HTML_Tag_Processor( " \r\n\tMore" ); * true === $processor->next_token(); // Text is "␤ ␤␉More". * true === $processor->subdivide_text_appropriately(); // Text is "␤ ␤␉". * true === $processor->next_token(); // Text is "More". * false === $processor->subdivide_text_appropriately(); * * @since 6.7.0 * * @return bool Whether the text node was subdivided. */ public function subdivide_text_appropriately(): bool { if ( self::STATE_TEXT_NODE !== $this->parser_state ) { return false; } $this->text_node_classification = self::TEXT_IS_GENERIC; /* * NULL bytes are treated categorically different than numeric character * references whose number is zero. `�` is not the same as `"\x00"`. */ $leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length ); if ( $leading_nulls > 0 ) { $this->token_length = $leading_nulls; $this->text_length = $leading_nulls; $this->bytes_already_parsed = $this->token_starts_at + $leading_nulls; $this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE; return true; } /* * Start a decoding loop to determine the point at which the * text subdivides. This entails raw whitespace bytes and any * character reference that decodes to the same. */ $at = $this->text_starts_at; $end = $this->text_starts_at + $this->text_length; while ( $at < $end ) { $skipped = strspn( $this->html, " \t\f\r\n", $at, $end - $at ); $at += $skipped; if ( $at < $end && '&' === $this->html[ $at ] ) { $matched_byte_length = null; $replacement = WP_HTML_Decoder::read_character_reference( 'data', $this->html, $at, $matched_byte_length ); if ( isset( $replacement ) && 1 === strspn( $replacement, " \t\f\r\n" ) ) { $at += $matched_byte_length; continue; } } break; } if ( $at > $this->text_starts_at ) { $new_length = $at - $this->text_starts_at; $this->text_length = $new_length; $this->token_length = $new_length; $this->bytes_already_parsed = $at; $this->text_node_classification = self::TEXT_IS_WHITESPACE; return true; } return false; } /** * Returns the modifiable text for a matched token, or an empty string. * * Modifiable text is text content that may be read and changed without * changing the HTML structure of the document around it. This includes * the contents of `#text` nodes in the HTML as well as the inner * contents of HTML comments, Processing Instructions, and others, even * though these nodes aren't part of a parsed DOM tree. They also contain * the contents of SCRIPT and STYLE tags, of TEXTAREA tags, and of any * other section in an HTML document which cannot contain HTML markup (DATA). * * If a token has no modifiable text then an empty string is returned to * avoid needless crashing or type errors. An empty string does not mean * that a token has modifiable text, and a token with modifiable text may * have an empty string (e.g. a comment with no contents). * * Limitations: * * - This function will not strip the leading newline appropriately * after seeking into a LISTING or PRE element. To ensure that the * newline is treated properly, seek to the LISTING or PRE opening * tag instead of to the first text node inside the element. * * @since 6.5.0 * @since 6.7.0 Replaces NULL bytes (U+0000) and newlines appropriately. * * @return string */ public function get_modifiable_text(): string { $has_enqueued_update = isset( $this->lexical_updates['modifiable text'] ); if ( ! $has_enqueued_update && ( null === $this->text_starts_at || 0 === $this->text_length ) ) { return ''; } $text = $has_enqueued_update ? $this->lexical_updates['modifiable text']->text : substr( $this->html, $this->text_starts_at, $this->text_length ); /* * Pre-processing the input stream would normally happen before * any parsing is done, but deferring it means it's possible to * skip in most cases. When getting the modifiable text, however * it's important to apply the pre-processing steps, which is * normalizing newlines. * * @see https://html.spec.whatwg.org/#preprocessing-the-input-stream * @see https://infra.spec.whatwg.org/#normalize-newlines */ $text = str_replace( "\r\n", "\n", $text ); $text = str_replace( "\r", "\n", $text ); // Comment data is not decoded. if ( self::STATE_CDATA_NODE === $this->parser_state || self::STATE_COMMENT === $this->parser_state || self::STATE_DOCTYPE === $this->parser_state || self::STATE_FUNKY_COMMENT === $this->parser_state ) { return str_replace( "\x00", "\u{FFFD}", $text ); } $tag_name = $this->get_token_name(); if ( // Script data is not decoded. 'SCRIPT' === $tag_name || // RAWTEXT data is not decoded. 'IFRAME' === $tag_name || 'NOEMBED' === $tag_name || 'NOFRAMES' === $tag_name || 'STYLE' === $tag_name || 'XMP' === $tag_name ) { return str_replace( "\x00", "\u{FFFD}", $text ); } $decoded = WP_HTML_Decoder::decode_text_node( $text ); /* * Skip the first line feed after LISTING, PRE, and TEXTAREA opening tags. * * Note that this first newline may come in the form of a character * reference, such as ` `, and so it's important to perform * this transformation only after decoding the raw text content. */ if ( ( "\n" === ( $decoded[0] ?? '' ) ) && ( ( $this->skip_newline_at === $this->token_starts_at && '#text' === $tag_name ) || 'TEXTAREA' === $tag_name ) ) { $decoded = substr( $decoded, 1 ); } /* * Only in normative text nodes does the NULL byte (U+0000) get removed. * In all other contexts it's replaced by the replacement character (U+FFFD) * for security reasons (to avoid joining together strings that were safe * when separated, but not when joined). * * @todo Inside HTML integration points and MathML integration points, the * text is processed according to the insertion mode, not according * to the foreign content rules. This should strip the NULL bytes. */ return ( '#text' === $tag_name && 'html' === $this->get_namespace() ) ? str_replace( "\x00", '', $decoded ) : str_replace( "\x00", "\u{FFFD}", $decoded ); } /** * Sets the modifiable text for the matched token, if matched. * * Modifiable text is text content that may be read and changed without * changing the HTML structure of the document around it. This includes * the contents of `#text` nodes in the HTML as well as the inner * contents of HTML comments, Processing Instructions, and others, even * though these nodes aren't part of a parsed DOM tree. They also contain * the contents of SCRIPT and STYLE tags, of TEXTAREA tags, and of any * other section in an HTML document which cannot contain HTML markup (DATA). * * Not all modifiable text may be set by this method, and not all content * may be set as modifiable text. In the case that this fails it will return * `false` indicating as much. For instance, it will not allow inserting the * string `next_tag( 'STYLE' ) ) { * $style = $processor->get_modifiable_text(); * $processor->set_modifiable_text( "// Made with love on the World Wide Web\n{$style}" ); * } * * // Replace smiley text with Emoji smilies. * while ( $processor->next_token() ) { * if ( '#text' !== $processor->get_token_name() ) { * continue; * } * * $chunk = $processor->get_modifiable_text(); * if ( ! str_contains( $chunk, ':)' ) ) { * continue; * } * * $processor->set_modifiable_text( str_replace( ':)', '🙂', $chunk ) ); * } * * @since 6.7.0 * * @param string $plaintext_content New text content to represent in the matched token. * * @return bool Whether the text was able to update. */ public function set_modifiable_text( string $plaintext_content ): bool { if ( self::STATE_TEXT_NODE === $this->parser_state ) { $this->lexical_updates['modifiable text'] = new WP_HTML_Text_Replacement( $this->text_starts_at, $this->text_length, htmlspecialchars( $plaintext_content, ENT_QUOTES | ENT_HTML5 ) ); return true; } // Comment data is not encoded. if ( self::STATE_COMMENT === $this->parser_state && self::COMMENT_AS_HTML_COMMENT === $this->comment_type ) { // Check if the text could close the comment. if ( 1 === preg_match( '/--!?>/', $plaintext_content ) ) { return false; } $this->lexical_updates['modifiable text'] = new WP_HTML_Text_Replacement( $this->text_starts_at, $this->text_length, $plaintext_content ); return true; } if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { return false; } switch ( $this->get_tag() ) { case 'SCRIPT': /* * This is over-protective, but ensures the update doesn't break * out of the SCRIPT element. A more thorough check would need to * ensure that the script closing tag doesn't exist, and isn't * also "hidden" inside the script double-escaped state. * * It may seem like replacing `lexical_updates['modifiable text'] = new WP_HTML_Text_Replacement( $this->text_starts_at, $this->text_length, $plaintext_content ); return true; case 'STYLE': $plaintext_content = preg_replace_callback( '~style)~i', static function ( $tag_match ) { return "\\3c\\2f{$tag_match['TAG_NAME']}"; }, $plaintext_content ); $this->lexical_updates['modifiable text'] = new WP_HTML_Text_Replacement( $this->text_starts_at, $this->text_length, $plaintext_content ); return true; case 'TEXTAREA': case 'TITLE': $plaintext_content = preg_replace_callback( "~{$this->get_tag()})~i", static function ( $tag_match ) { return "</{$tag_match['TAG_NAME']}"; }, $plaintext_content ); /* * These don't _need_ to be escaped, but since they are decoded it's * safe to leave them escaped and this can prevent other code from * naively detecting tags within the contents. * * @todo It would be useful to prefix a multiline replacement text * with a newline, but not necessary. This is for aesthetics. */ $this->lexical_updates['modifiable text'] = new WP_HTML_Text_Replacement( $this->text_starts_at, $this->text_length, $plaintext_content ); return true; } return false; } /** * Updates or creates a new attribute on the currently matched tag with the passed value. * * For boolean attributes special handling is provided: * - When `true` is passed as the value, then only the attribute name is added to the tag. * - When `false` is passed, the attribute gets removed if it existed before. * * For string attributes, the value is escaped using the `esc_attr` function. * * @since 6.2.0 * @since 6.2.1 Fix: Only create a single update for multiple calls with case-variant attribute names. * * @param string $name The attribute name to target. * @param string|bool $value The new attribute value. * @return bool Whether an attribute value was set. */ public function set_attribute( $name, $value ): bool { if ( self::STATE_MATCHED_TAG !== $this->parser_state || $this->is_closing_tag ) { return false; } /* * WordPress rejects more characters than are strictly forbidden * in HTML5. This is to prevent additional security risks deeper * in the WordPress and plugin stack. Specifically the * less-than (<) greater-than (>) and ampersand (&) aren't allowed. * * The use of a PCRE match enables looking for specific Unicode * code points without writing a UTF-8 decoder. Whereas scanning * for one-byte characters is trivial (with `strcspn`), scanning * for the longer byte sequences would be more complicated. Given * that this shouldn't be in the hot path for execution, it's a * reasonable compromise in efficiency without introducing a * noticeable impact on the overall system. * * @see https://html.spec.whatwg.org/#attributes-2 * * @todo As the only regex pattern maybe we should take it out? * Are Unicode patterns available broadly in Core? */ if ( preg_match( '~[' . // Syntax-like characters. '"\'>& The values "true" and "false" are not allowed on boolean attributes. * > To represent a false value, the attribute has to be omitted altogether. * - HTML5 spec, https://html.spec.whatwg.org/#boolean-attributes */ if ( false === $value ) { return $this->remove_attribute( $name ); } if ( true === $value ) { $updated_attribute = $name; } else { $comparable_name = strtolower( $name ); /* * Escape URL attributes. * * @see https://html.spec.whatwg.org/#attributes-3 */ $escaped_new_value = in_array( $comparable_name, wp_kses_uri_attributes(), true ) ? esc_url( $value ) : esc_attr( $value ); // If the escaping functions wiped out the update, reject it and indicate it was rejected. if ( '' === $escaped_new_value && '' !== $value ) { return false; } $updated_attribute = "{$name}=\"{$escaped_new_value}\""; } /* * > There must never be two or more attributes on * > the same start tag whose names are an ASCII * > case-insensitive match for each other. * - HTML 5 spec * * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive */ $comparable_name = strtolower( $name ); if ( isset( $this->attributes[ $comparable_name ] ) ) { /* * Update an existing attribute. * * Example – set attribute id to "new" in
: * *
* ^-------------^ * start end * replacement: `id="new"` * * Result:
*/ $existing_attribute = $this->attributes[ $comparable_name ]; $this->lexical_updates[ $comparable_name ] = new WP_HTML_Text_Replacement( $existing_attribute->start, $existing_attribute->length, $updated_attribute ); } else { /* * Create a new attribute at the tag's name end. * * Example – add attribute id="new" to
: * *
* ^ * start and end * replacement: ` id="new"` * * Result:
*/ $this->lexical_updates[ $comparable_name ] = new WP_HTML_Text_Replacement( $this->tag_name_starts_at + $this->tag_name_length, 0, ' ' . $updated_attribute ); } /* * Any calls to update the `class` attribute directly should wipe out any * enqueued class changes from `add_class` and `remove_class`. */ if ( 'class' === $comparable_name && ! empty( $this->classname_updates ) ) { $this->classname_updates = array(); } return true; } /** * Remove an attribute from the currently-matched tag. * * @since 6.2.0 * * @param string $name The attribute name to remove. * @return bool Whether an attribute was removed. */ public function remove_attribute( $name ): bool { if ( self::STATE_MATCHED_TAG !== $this->parser_state || $this->is_closing_tag ) { return false; } /* * > There must never be two or more attributes on * > the same start tag whose names are an ASCII * > case-insensitive match for each other. * - HTML 5 spec * * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive */ $name = strtolower( $name ); /* * Any calls to update the `class` attribute directly should wipe out any * enqueued class changes from `add_class` and `remove_class`. */ if ( 'class' === $name && count( $this->classname_updates ) !== 0 ) { $this->classname_updates = array(); } /* * If updating an attribute that didn't exist in the input * document, then remove the enqueued update and move on. * * For example, this might occur when calling `remove_attribute()` * after calling `set_attribute()` for the same attribute * and when that attribute wasn't originally present. */ if ( ! isset( $this->attributes[ $name ] ) ) { if ( isset( $this->lexical_updates[ $name ] ) ) { unset( $this->lexical_updates[ $name ] ); } return false; } /* * Removes an existing tag attribute. * * Example – remove the attribute id from
: *
* ^-------------^ * start end * replacement: `` * * Result:
*/ $this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement( $this->attributes[ $name ]->start, $this->attributes[ $name ]->length, '' ); // Removes any duplicated attributes if they were also present. foreach ( $this->duplicate_attributes[ $name ] ?? array() as $attribute_token ) { $this->lexical_updates[] = new WP_HTML_Text_Replacement( $attribute_token->start, $attribute_token->length, '' ); } return true; } /** * Adds a new class name to the currently matched tag. * * @since 6.2.0 * * @param string $class_name The class name to add. * @return bool Whether the class was set to be added. */ public function add_class( $class_name ): bool { if ( self::STATE_MATCHED_TAG !== $this->parser_state || $this->is_closing_tag ) { return false; } if ( self::QUIRKS_MODE !== $this->compat_mode ) { $this->classname_updates[ $class_name ] = self::ADD_CLASS; return true; } /* * Because class names are matched ASCII-case-insensitively in quirks mode, * this needs to see if a case variant of the given class name is already * enqueued and update that existing entry, if so. This picks the casing of * the first-provided class name for all lexical variations. */ $class_name_length = strlen( $class_name ); foreach ( $this->classname_updates as $updated_name => $action ) { if ( strlen( $updated_name ) === $class_name_length && 0 === substr_compare( $updated_name, $class_name, 0, $class_name_length, true ) ) { $this->classname_updates[ $updated_name ] = self::ADD_CLASS; return true; } } $this->classname_updates[ $class_name ] = self::ADD_CLASS; return true; } /** * Removes a class name from the currently matched tag. * * @since 6.2.0 * * @param string $class_name The class name to remove. * @return bool Whether the class was set to be removed. */ public function remove_class( $class_name ): bool { if ( self::STATE_MATCHED_TAG !== $this->parser_state || $this->is_closing_tag ) { return false; } if ( self::QUIRKS_MODE !== $this->compat_mode ) { $this->classname_updates[ $class_name ] = self::REMOVE_CLASS; return true; } /* * Because class names are matched ASCII-case-insensitively in quirks mode, * this needs to see if a case variant of the given class name is already * enqueued and update that existing entry, if so. This picks the casing of * the first-provided class name for all lexical variations. */ $class_name_length = strlen( $class_name ); foreach ( $this->classname_updates as $updated_name => $action ) { if ( strlen( $updated_name ) === $class_name_length && 0 === substr_compare( $updated_name, $class_name, 0, $class_name_length, true ) ) { $this->classname_updates[ $updated_name ] = self::REMOVE_CLASS; return true; } } $this->classname_updates[ $class_name ] = self::REMOVE_CLASS; return true; } /** * Returns the string representation of the HTML Tag Processor. * * @since 6.2.0 * * @see WP_HTML_Tag_Processor::get_updated_html() * * @return string The processed HTML. */ public function __toString(): string { return $this->get_updated_html(); } /** * Returns the string representation of the HTML Tag Processor. * * @since 6.2.0 * @since 6.2.1 Shifts the internal cursor corresponding to the applied updates. * @since 6.4.0 No longer calls subclass method `next_tag()` after updating HTML. * * @return string The processed HTML. */ public function get_updated_html(): string { $requires_no_updating = 0 === count( $this->classname_updates ) && 0 === count( $this->lexical_updates ); /* * When there is nothing more to update and nothing has already been * updated, return the original document and avoid a string copy. */ if ( $requires_no_updating ) { return $this->html; } /* * Keep track of the position right before the current tag. This will * be necessary for reparsing the current tag after updating the HTML. */ $before_current_tag = $this->token_starts_at ?? 0; /* * 1. Apply the enqueued edits and update all the pointers to reflect those changes. */ $this->class_name_updates_to_attributes_updates(); $before_current_tag += $this->apply_attributes_updates( $before_current_tag ); /* * 2. Rewind to before the current tag and reparse to get updated attributes. * * At this point the internal cursor points to the end of the tag name. * Rewind before the tag name starts so that it's as if the cursor didn't * move; a call to `next_tag()` will reparse the recently-updated attributes * and additional calls to modify the attributes will apply at this same * location, but in order to avoid issues with subclasses that might add * behaviors to `next_tag()`, the internal methods should be called here * instead. * * It's important to note that in this specific place there will be no change * because the processor was already at a tag when this was called and it's * rewinding only to the beginning of this very tag before reprocessing it * and its attributes. * *

Previous HTMLMore HTML

* ↑ │ back up by the length of the tag name plus the opening < * └←─┘ back up by strlen("em") + 1 ==> 3 */ $this->bytes_already_parsed = $before_current_tag; $this->base_class_next_token(); return $this->html; } /** * Parses tag query input into internal search criteria. * * @since 6.2.0 * * @param array|string|null $query { * Optional. Which tag name to find, having which class, etc. Default is to find any tag. * * @type string|null $tag_name Which tag to find, or `null` for "any tag." * @type int|null $match_offset Find the Nth tag matching all search criteria. * 1 for "first" tag, 3 for "third," etc. * Defaults to first tag. * @type string|null $class_name Tag must contain this class name to match. * @type string $tag_closers "visit" or "skip": whether to stop on tag closers, e.g.
. * } */ private function parse_query( $query ) { if ( null !== $query && $query === $this->last_query ) { return; } $this->last_query = $query; $this->sought_tag_name = null; $this->sought_class_name = null; $this->sought_match_offset = 1; $this->stop_on_tag_closers = false; // A single string value means "find the tag of this name". if ( is_string( $query ) ) { $this->sought_tag_name = $query; return; } // An empty query parameter applies no restrictions on the search. if ( null === $query ) { return; } // If not using the string interface, an associative array is required. if ( ! is_array( $query ) ) { _doing_it_wrong( __METHOD__, __( 'The query argument must be an array or a tag name.' ), '6.2.0' ); return; } if ( isset( $query['tag_name'] ) && is_string( $query['tag_name'] ) ) { $this->sought_tag_name = $query['tag_name']; } if ( isset( $query['class_name'] ) && is_string( $query['class_name'] ) ) { $this->sought_class_name = $query['class_name']; } if ( isset( $query['match_offset'] ) && is_int( $query['match_offset'] ) && 0 < $query['match_offset'] ) { $this->sought_match_offset = $query['match_offset']; } if ( isset( $query['tag_closers'] ) ) { $this->stop_on_tag_closers = 'visit' === $query['tag_closers']; } } /** * Checks whether a given tag and its attributes match the search criteria. * * @since 6.2.0 * * @return bool Whether the given tag and its attribute match the search criteria. */ private function matches(): bool { if ( $this->is_closing_tag && ! $this->stop_on_tag_closers ) { return false; } // Does the tag name match the requested tag name in a case-insensitive manner? if ( isset( $this->sought_tag_name ) && ( strlen( $this->sought_tag_name ) !== $this->tag_name_length || 0 !== substr_compare( $this->html, $this->sought_tag_name, $this->tag_name_starts_at, $this->tag_name_length, true ) ) ) { return false; } if ( null !== $this->sought_class_name && ! $this->has_class( $this->sought_class_name ) ) { return false; } return true; } /** * Gets DOCTYPE declaration info from a DOCTYPE token. * * DOCTYPE tokens may appear in many places in an HTML document. In most places, they are * simply ignored. The main parsing functions find the basic shape of DOCTYPE tokens but * do not perform detailed parsing. * * This method can be called to perform a full parse of the DOCTYPE token and retrieve * its information. * * @return WP_HTML_Doctype_Info|null The DOCTYPE declaration information or `null` if not * currently at a DOCTYPE node. */ public function get_doctype_info(): ?WP_HTML_Doctype_Info { if ( self::STATE_DOCTYPE !== $this->parser_state ) { return null; } return WP_HTML_Doctype_Info::from_doctype_token( substr( $this->html, $this->token_starts_at, $this->token_length ) ); } /** * Parser Ready State. * * Indicates that the parser is ready to run and waiting for a state transition. * It may not have started yet, or it may have just finished parsing a token and * is ready to find the next one. * * @since 6.5.0 * * @access private */ const STATE_READY = 'STATE_READY'; /** * Parser Complete State. * * Indicates that the parser has reached the end of the document and there is * nothing left to scan. It finished parsing the last token completely. * * @since 6.5.0 * * @access private */ const STATE_COMPLETE = 'STATE_COMPLETE'; /** * Parser Incomplete Input State. * * Indicates that the parser has reached the end of the document before finishing * a token. It started parsing a token but there is a possibility that the input * HTML document was truncated in the middle of a token. * * The parser is reset at the start of the incomplete token and has paused. There * is nothing more than can be scanned unless provided a more complete document. * * @since 6.5.0 * * @access private */ const STATE_INCOMPLETE_INPUT = 'STATE_INCOMPLETE_INPUT'; /** * Parser Matched Tag State. * * Indicates that the parser has found an HTML tag and it's possible to get * the tag name and read or modify its attributes (if it's not a closing tag). * * @since 6.5.0 * * @access private */ const STATE_MATCHED_TAG = 'STATE_MATCHED_TAG'; /** * Parser Text Node State. * * Indicates that the parser has found a text node and it's possible * to read and modify that text. * * @since 6.5.0 * * @access private */ const STATE_TEXT_NODE = 'STATE_TEXT_NODE'; /** * Parser CDATA Node State. * * Indicates that the parser has found a CDATA node and it's possible * to read and modify its modifiable text. Note that in HTML there are * no CDATA nodes outside of foreign content (SVG and MathML). Outside * of foreign content, they are treated as HTML comments. * * @since 6.5.0 * * @access private */ const STATE_CDATA_NODE = 'STATE_CDATA_NODE'; /** * Indicates that the parser has found an HTML comment and it's * possible to read and modify its modifiable text. * * @since 6.5.0 * * @access private */ const STATE_COMMENT = 'STATE_COMMENT'; /** * Indicates that the parser has found a DOCTYPE node and it's * possible to read its DOCTYPE information via `get_doctype_info()`. * * @since 6.5.0 * * @access private */ const STATE_DOCTYPE = 'STATE_DOCTYPE'; /** * Indicates that the parser has found an empty tag closer ``. * * Note that in HTML there are no empty tag closers, and they * are ignored. Nonetheless, the Tag Processor still * recognizes them as they appear in the HTML stream. * * These were historically discussed as a "presumptuous tag * closer," which would close the nearest open tag, but were * dismissed in favor of explicitly-closing tags. * * @since 6.5.0 * * @access private */ const STATE_PRESUMPTUOUS_TAG = 'STATE_PRESUMPTUOUS_TAG'; /** * Indicates that the parser has found a "funky comment" * and it's possible to read and modify its modifiable text. * * Example: * * * * * * Funky comments are tag closers with invalid tag names. Note * that in HTML these are turn into bogus comments. Nonetheless, * the Tag Processor recognizes them in a stream of HTML and * exposes them for inspection and modification. * * @since 6.5.0 * * @access private */ const STATE_FUNKY_COMMENT = 'STATE_WP_FUNKY'; /** * Indicates that a comment was created when encountering abruptly-closed HTML comment. * * Example: * * * * * @since 6.5.0 */ const COMMENT_AS_ABRUPTLY_CLOSED_COMMENT = 'COMMENT_AS_ABRUPTLY_CLOSED_COMMENT'; /** * Indicates that a comment would be parsed as a CDATA node, * were HTML to allow CDATA nodes outside of foreign content. * * Example: * * * * This is an HTML comment, but it looks like a CDATA node. * * @since 6.5.0 */ const COMMENT_AS_CDATA_LOOKALIKE = 'COMMENT_AS_CDATA_LOOKALIKE'; /** * Indicates that a comment was created when encountering * normative HTML comment syntax. * * Example: * * * * @since 6.5.0 */ const COMMENT_AS_HTML_COMMENT = 'COMMENT_AS_HTML_COMMENT'; /** * Indicates that a comment would be parsed as a Processing * Instruction node, were they to exist within HTML. * * Example: * * * * This is an HTML comment, but it looks like a CDATA node. * * @since 6.5.0 */ const COMMENT_AS_PI_NODE_LOOKALIKE = 'COMMENT_AS_PI_NODE_LOOKALIKE'; /** * Indicates that a comment was created when encountering invalid * HTML input, a so-called "bogus comment." * * Example: * * * * * @since 6.5.0 */ const COMMENT_AS_INVALID_HTML = 'COMMENT_AS_INVALID_HTML'; /** * No-quirks mode document compatability mode. * * > In no-quirks mode, the behavior is (hopefully) the desired behavior * > described by the modern HTML and CSS specifications. * * @see self::$compat_mode * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Quirks_Mode_and_Standards_Mode * * @since 6.7.0 * * @var string */ const NO_QUIRKS_MODE = 'no-quirks-mode'; /** * Quirks mode document compatability mode. * * > In quirks mode, layout emulates behavior in Navigator 4 and Internet * > Explorer 5. This is essential in order to support websites that were * > built before the widespread adoption of web standards. * * @see self::$compat_mode * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Quirks_Mode_and_Standards_Mode * * @since 6.7.0 * * @var string */ const QUIRKS_MODE = 'quirks-mode'; /** * Indicates that a span of text may contain any combination of significant * kinds of characters: NULL bytes, whitespace, and others. * * @see self::$text_node_classification * @see self::subdivide_text_appropriately * * @since 6.7.0 */ const TEXT_IS_GENERIC = 'TEXT_IS_GENERIC'; /** * Indicates that a span of text comprises a sequence only of NULL bytes. * * @see self::$text_node_classification * @see self::subdivide_text_appropriately * * @since 6.7.0 */ const TEXT_IS_NULL_SEQUENCE = 'TEXT_IS_NULL_SEQUENCE'; /** * Indicates that a span of decoded text comprises only whitespace. * * @see self::$text_node_classification * @see self::subdivide_text_appropriately * * @since 6.7.0 */ const TEXT_IS_WHITESPACE = 'TEXT_IS_WHITESPACE'; } class-wp-html-active-formatting-elements.php000064400000016140147333266670015226 0ustar00 Initially, the list of active formatting elements is empty. * > It is used to handle mis-nested formatting element tags. * > * > The list contains elements in the formatting category, and markers. * > The markers are inserted when entering applet, object, marquee, * > template, td, th, and caption elements, and are used to prevent * > formatting from "leaking" into applet, object, marquee, template, * > td, th, and caption elements. * > * > In addition, each element in the list of active formatting elements * > is associated with the token for which it was created, so that * > further elements can be created for that token if necessary. * * @since 6.4.0 * * @access private * * @see https://html.spec.whatwg.org/#list-of-active-formatting-elements * @see WP_HTML_Processor */ class WP_HTML_Active_Formatting_Elements { /** * Holds the stack of active formatting element references. * * @since 6.4.0 * * @var WP_HTML_Token[] */ private $stack = array(); /** * Reports if a specific node is in the stack of active formatting elements. * * @since 6.4.0 * * @param WP_HTML_Token $token Look for this node in the stack. * @return bool Whether the referenced node is in the stack of active formatting elements. */ public function contains_node( WP_HTML_Token $token ) { foreach ( $this->walk_up() as $item ) { if ( $token->bookmark_name === $item->bookmark_name ) { return true; } } return false; } /** * Returns how many nodes are currently in the stack of active formatting elements. * * @since 6.4.0 * * @return int How many node are in the stack of active formatting elements. */ public function count() { return count( $this->stack ); } /** * Returns the node at the end of the stack of active formatting elements, * if one exists. If the stack is empty, returns null. * * @since 6.4.0 * * @return WP_HTML_Token|null Last node in the stack of active formatting elements, if one exists, otherwise null. */ public function current_node() { $current_node = end( $this->stack ); return $current_node ? $current_node : null; } /** * Inserts a "marker" at the end of the list of active formatting elements. * * > The markers are inserted when entering applet, object, marquee, * > template, td, th, and caption elements, and are used to prevent * > formatting from "leaking" into applet, object, marquee, template, * > td, th, and caption elements. * * @see https://html.spec.whatwg.org/#concept-parser-marker * * @since 6.7.0 */ public function insert_marker(): void { $this->push( new WP_HTML_Token( null, 'marker', false ) ); } /** * Pushes a node onto the stack of active formatting elements. * * @since 6.4.0 * * @see https://html.spec.whatwg.org/#push-onto-the-list-of-active-formatting-elements * * @param WP_HTML_Token $token Push this node onto the stack. */ public function push( WP_HTML_Token $token ) { /* * > If there are already three elements in the list of active formatting elements after the last marker, * > if any, or anywhere in the list if there are no markers, that have the same tag name, namespace, and * > attributes as element, then remove the earliest such element from the list of active formatting * > elements. For these purposes, the attributes must be compared as they were when the elements were * > created by the parser; two elements have the same attributes if all their parsed attributes can be * > paired such that the two attributes in each pair have identical names, namespaces, and values * > (the order of the attributes does not matter). * * @todo Implement the "Noah's Ark clause" to only add up to three of any given kind of formatting elements to the stack. */ // > Add element to the list of active formatting elements. $this->stack[] = $token; } /** * Removes a node from the stack of active formatting elements. * * @since 6.4.0 * * @param WP_HTML_Token $token Remove this node from the stack, if it's there already. * @return bool Whether the node was found and removed from the stack of active formatting elements. */ public function remove_node( WP_HTML_Token $token ) { foreach ( $this->walk_up() as $position_from_end => $item ) { if ( $token->bookmark_name !== $item->bookmark_name ) { continue; } $position_from_start = $this->count() - $position_from_end - 1; array_splice( $this->stack, $position_from_start, 1 ); return true; } return false; } /** * Steps through the stack of active formatting elements, starting with the * top element (added first) and walking downwards to the one added last. * * This generator function is designed to be used inside a "foreach" loop. * * Example: * * $html = 'We are here'; * foreach ( $stack->walk_down() as $node ) { * echo "{$node->node_name} -> "; * } * > EM -> STRONG -> A -> * * To start with the most-recently added element and walk towards the top, * see WP_HTML_Active_Formatting_Elements::walk_up(). * * @since 6.4.0 */ public function walk_down() { $count = count( $this->stack ); for ( $i = 0; $i < $count; $i++ ) { yield $this->stack[ $i ]; } } /** * Steps through the stack of active formatting elements, starting with the * bottom element (added last) and walking upwards to the one added first. * * This generator function is designed to be used inside a "foreach" loop. * * Example: * * $html = 'We are here'; * foreach ( $stack->walk_up() as $node ) { * echo "{$node->node_name} -> "; * } * > A -> STRONG -> EM -> * * To start with the first added element and walk towards the bottom, * see WP_HTML_Active_Formatting_Elements::walk_down(). * * @since 6.4.0 */ public function walk_up() { for ( $i = count( $this->stack ) - 1; $i >= 0; $i-- ) { yield $this->stack[ $i ]; } } /** * Clears the list of active formatting elements up to the last marker. * * > When the steps below require the UA to clear the list of active formatting elements up to * > the last marker, the UA must perform the following steps: * > * > 1. Let entry be the last (most recently added) entry in the list of active * > formatting elements. * > 2. Remove entry from the list of active formatting elements. * > 3. If entry was a marker, then stop the algorithm at this point. * > The list has been cleared up to the last marker. * > 4. Go to step 1. * * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-list-of-active-formatting-elements-up-to-the-last-marker * * @since 6.7.0 */ public function clear_up_to_last_marker(): void { foreach ( $this->walk_up() as $item ) { array_pop( $this->stack ); if ( 'marker' === $item->node_name ) { break; } } } } class-wp-html-decoder.php000064400000040464147333266670011404 0ustar00= $length ) { return null; } if ( '&' !== $text[ $at ] ) { return null; } /* * Numeric character references. * * When truncated, these will encode the code point found by parsing the * digits that are available. For example, when `🅰` is truncated * to `DZ` it will encode `DZ`. It does not: * - know how to parse the original `🅰`. * - fail to parse and return plaintext `DZ`. * - fail to parse and return the replacement character `�` */ if ( '#' === $text[ $at + 1 ] ) { if ( $at + 2 >= $length ) { return null; } /** Tracks inner parsing within the numeric character reference. */ $digits_at = $at + 2; if ( 'x' === $text[ $digits_at ] || 'X' === $text[ $digits_at ] ) { $numeric_base = 16; $numeric_digits = '0123456789abcdefABCDEF'; $max_digits = 6; // 􏿿 ++$digits_at; } else { $numeric_base = 10; $numeric_digits = '0123456789'; $max_digits = 7; // 􏿿 } // Cannot encode invalid Unicode code points. Max is to U+10FFFF. $zero_count = strspn( $text, '0', $digits_at ); $digit_count = strspn( $text, $numeric_digits, $digits_at + $zero_count ); $after_digits = $digits_at + $zero_count + $digit_count; $has_semicolon = $after_digits < $length && ';' === $text[ $after_digits ]; $end_of_span = $has_semicolon ? $after_digits + 1 : $after_digits; // `&#` or `&#x` without digits returns into plaintext. if ( 0 === $digit_count && 0 === $zero_count ) { return null; } // Whereas `&#` and only zeros is invalid. if ( 0 === $digit_count ) { $match_byte_length = $end_of_span - $at; return '�'; } // If there are too many digits then it's not worth parsing. It's invalid. if ( $digit_count > $max_digits ) { $match_byte_length = $end_of_span - $at; return '�'; } $digits = substr( $text, $digits_at + $zero_count, $digit_count ); $code_point = intval( $digits, $numeric_base ); /* * Noncharacters, 0x0D, and non-ASCII-whitespace control characters. * * > A noncharacter is a code point that is in the range U+FDD0 to U+FDEF, * > inclusive, or U+FFFE, U+FFFF, U+1FFFE, U+1FFFF, U+2FFFE, U+2FFFF, * > U+3FFFE, U+3FFFF, U+4FFFE, U+4FFFF, U+5FFFE, U+5FFFF, U+6FFFE, * > U+6FFFF, U+7FFFE, U+7FFFF, U+8FFFE, U+8FFFF, U+9FFFE, U+9FFFF, * > U+AFFFE, U+AFFFF, U+BFFFE, U+BFFFF, U+CFFFE, U+CFFFF, U+DFFFE, * > U+DFFFF, U+EFFFE, U+EFFFF, U+FFFFE, U+FFFFF, U+10FFFE, or U+10FFFF. * * A C0 control is a code point that is in the range of U+00 to U+1F, * but ASCII whitespace includes U+09, U+0A, U+0C, and U+0D. * * These characters are invalid but still decode as any valid character. * This comment is here to note and explain why there's no check to * remove these characters or replace them. * * @see https://infra.spec.whatwg.org/#noncharacter */ /* * Code points in the C1 controls area need to be remapped as if they * were stored in Windows-1252. Note! This transformation only happens * for numeric character references. The raw code points in the byte * stream are not translated. * * > If the number is one of the numbers in the first column of * > the following table, then find the row with that number in * > the first column, and set the character reference code to * > the number in the second column of that row. */ if ( $code_point >= 0x80 && $code_point <= 0x9F ) { $windows_1252_mapping = array( 0x20AC, // 0x80 -> EURO SIGN (€). 0x81, // 0x81 -> (no change). 0x201A, // 0x82 -> SINGLE LOW-9 QUOTATION MARK (‚). 0x0192, // 0x83 -> LATIN SMALL LETTER F WITH HOOK (ƒ). 0x201E, // 0x84 -> DOUBLE LOW-9 QUOTATION MARK („). 0x2026, // 0x85 -> HORIZONTAL ELLIPSIS (…). 0x2020, // 0x86 -> DAGGER (†). 0x2021, // 0x87 -> DOUBLE DAGGER (‡). 0x02C6, // 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT (ˆ). 0x2030, // 0x89 -> PER MILLE SIGN (‰). 0x0160, // 0x8A -> LATIN CAPITAL LETTER S WITH CARON (Š). 0x2039, // 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK (‹). 0x0152, // 0x8C -> LATIN CAPITAL LIGATURE OE (Œ). 0x8D, // 0x8D -> (no change). 0x017D, // 0x8E -> LATIN CAPITAL LETTER Z WITH CARON (Ž). 0x8F, // 0x8F -> (no change). 0x90, // 0x90 -> (no change). 0x2018, // 0x91 -> LEFT SINGLE QUOTATION MARK (‘). 0x2019, // 0x92 -> RIGHT SINGLE QUOTATION MARK (’). 0x201C, // 0x93 -> LEFT DOUBLE QUOTATION MARK (“). 0x201D, // 0x94 -> RIGHT DOUBLE QUOTATION MARK (”). 0x2022, // 0x95 -> BULLET (•). 0x2013, // 0x96 -> EN DASH (–). 0x2014, // 0x97 -> EM DASH (—). 0x02DC, // 0x98 -> SMALL TILDE (˜). 0x2122, // 0x99 -> TRADE MARK SIGN (™). 0x0161, // 0x9A -> LATIN SMALL LETTER S WITH CARON (š). 0x203A, // 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK (›). 0x0153, // 0x9C -> LATIN SMALL LIGATURE OE (œ). 0x9D, // 0x9D -> (no change). 0x017E, // 0x9E -> LATIN SMALL LETTER Z WITH CARON (ž). 0x0178, // 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS (Ÿ). ); $code_point = $windows_1252_mapping[ $code_point - 0x80 ]; } $match_byte_length = $end_of_span - $at; return self::code_point_to_utf8_bytes( $code_point ); } /** Tracks inner parsing within the named character reference. */ $name_at = $at + 1; // Minimum named character reference is two characters. E.g. `GT`. if ( $name_at + 2 > $length ) { return null; } $name_length = 0; $replacement = $html5_named_character_references->read_token( $text, $name_at, $name_length ); if ( false === $replacement ) { return null; } $after_name = $name_at + $name_length; // If the match ended with a semicolon then it should always be decoded. if ( ';' === $text[ $name_at + $name_length - 1 ] ) { $match_byte_length = $after_name - $at; return $replacement; } /* * At this point though there's a match for an entry in the named * character reference table but the match doesn't end in `;`. * It may be allowed if it's followed by something unambiguous. */ $ambiguous_follower = ( $after_name < $length && $name_at < $length && ( ctype_alnum( $text[ $after_name ] ) || '=' === $text[ $after_name ] ) ); // It's non-ambiguous, safe to leave it in. if ( ! $ambiguous_follower ) { $match_byte_length = $after_name - $at; return $replacement; } // It's ambiguous, which isn't allowed inside attributes. if ( 'attribute' === $context ) { return null; } $match_byte_length = $after_name - $at; return $replacement; } /** * Encode a code point number into the UTF-8 encoding. * * This encoder implements the UTF-8 encoding algorithm for converting * a code point into a byte sequence. If it receives an invalid code * point it will return the Unicode Replacement Character U+FFFD `�`. * * Example: * * '🅰' === WP_HTML_Decoder::code_point_to_utf8_bytes( 0x1f170 ); * * // Half of a surrogate pair is an invalid code point. * '�' === WP_HTML_Decoder::code_point_to_utf8_bytes( 0xd83c ); * * @since 6.6.0 * * @see https://www.rfc-editor.org/rfc/rfc3629 For the UTF-8 standard. * * @param int $code_point Which code point to convert. * @return string Converted code point, or `�` if invalid. */ public static function code_point_to_utf8_bytes( $code_point ): string { // Pre-check to ensure a valid code point. if ( $code_point <= 0 || ( $code_point >= 0xD800 && $code_point <= 0xDFFF ) || $code_point > 0x10FFFF ) { return '�'; } if ( $code_point <= 0x7F ) { return chr( $code_point ); } if ( $code_point <= 0x7FF ) { $byte1 = chr( ( $code_point >> 6 ) | 0xC0 ); $byte2 = chr( $code_point & 0x3F | 0x80 ); return "{$byte1}{$byte2}"; } if ( $code_point <= 0xFFFF ) { $byte1 = chr( ( $code_point >> 12 ) | 0xE0 ); $byte2 = chr( ( $code_point >> 6 ) & 0x3F | 0x80 ); $byte3 = chr( $code_point & 0x3F | 0x80 ); return "{$byte1}{$byte2}{$byte3}"; } // Any values above U+10FFFF are eliminated above in the pre-check. $byte1 = chr( ( $code_point >> 18 ) | 0xF0 ); $byte2 = chr( ( $code_point >> 12 ) & 0x3F | 0x80 ); $byte3 = chr( ( $code_point >> 6 ) & 0x3F | 0x80 ); $byte4 = chr( $code_point & 0x3F | 0x80 ); return "{$byte1}{$byte2}{$byte3}{$byte4}"; } } class-wp-html-text-replacement.php000064400000002601147333266670013247 0ustar00start = $start; $this->length = $length; $this->text = $text; } } html5-named-character-references.php000064400000234443147333266670013474 0ustar00 "6.6.0-trunk", "key_length" => 2, "groups" => "AE\x00AM\x00Aa\x00Ab\x00Ac\x00Af\x00Ag\x00Al\x00Am\x00An\x00Ao\x00Ap\x00Ar\x00As\x00At\x00Au\x00Ba\x00Bc\x00Be\x00Bf\x00Bo\x00Br\x00Bs\x00Bu\x00CH\x00CO\x00Ca\x00Cc\x00Cd\x00Ce\x00Cf\x00Ch\x00Ci\x00Cl\x00Co\x00Cr\x00Cs\x00Cu\x00DD\x00DJ\x00DS\x00DZ\x00Da\x00Dc\x00De\x00Df\x00Di\x00Do\x00Ds\x00EN\x00ET\x00Ea\x00Ec\x00Ed\x00Ef\x00Eg\x00El\x00Em\x00Eo\x00Ep\x00Eq\x00Es\x00Et\x00Eu\x00Ex\x00Fc\x00Ff\x00Fi\x00Fo\x00Fs\x00GJ\x00GT\x00Ga\x00Gb\x00Gc\x00Gd\x00Gf\x00Gg\x00Go\x00Gr\x00Gs\x00Gt\x00HA\x00Ha\x00Hc\x00Hf\x00Hi\x00Ho\x00Hs\x00Hu\x00IE\x00IJ\x00IO\x00Ia\x00Ic\x00Id\x00If\x00Ig\x00Im\x00In\x00Io\x00Is\x00It\x00Iu\x00Jc\x00Jf\x00Jo\x00Js\x00Ju\x00KH\x00KJ\x00Ka\x00Kc\x00Kf\x00Ko\x00Ks\x00LJ\x00LT\x00La\x00Lc\x00Le\x00Lf\x00Ll\x00Lm\x00Lo\x00Ls\x00Lt\x00Ma\x00Mc\x00Me\x00Mf\x00Mi\x00Mo\x00Ms\x00Mu\x00NJ\x00Na\x00Nc\x00Ne\x00Nf\x00No\x00Ns\x00Nt\x00Nu\x00OE\x00Oa\x00Oc\x00Od\x00Of\x00Og\x00Om\x00Oo\x00Op\x00Or\x00Os\x00Ot\x00Ou\x00Ov\x00Pa\x00Pc\x00Pf\x00Ph\x00Pi\x00Pl\x00Po\x00Pr\x00Ps\x00QU\x00Qf\x00Qo\x00Qs\x00RB\x00RE\x00Ra\x00Rc\x00Re\x00Rf\x00Rh\x00Ri\x00Ro\x00Rr\x00Rs\x00Ru\x00SH\x00SO\x00Sa\x00Sc\x00Sf\x00Sh\x00Si\x00Sm\x00So\x00Sq\x00Ss\x00St\x00Su\x00TH\x00TR\x00TS\x00Ta\x00Tc\x00Tf\x00Th\x00Ti\x00To\x00Tr\x00Ts\x00Ua\x00Ub\x00Uc\x00Ud\x00Uf\x00Ug\x00Um\x00Un\x00Uo\x00Up\x00Ur\x00Us\x00Ut\x00Uu\x00VD\x00Vb\x00Vc\x00Vd\x00Ve\x00Vf\x00Vo\x00Vs\x00Vv\x00Wc\x00We\x00Wf\x00Wo\x00Ws\x00Xf\x00Xi\x00Xo\x00Xs\x00YA\x00YI\x00YU\x00Ya\x00Yc\x00Yf\x00Yo\x00Ys\x00Yu\x00ZH\x00Za\x00Zc\x00Zd\x00Ze\x00Zf\x00Zo\x00Zs\x00aa\x00ab\x00ac\x00ae\x00af\x00ag\x00al\x00am\x00an\x00ao\x00ap\x00ar\x00as\x00at\x00au\x00aw\x00bN\x00ba\x00bb\x00bc\x00bd\x00be\x00bf\x00bi\x00bk\x00bl\x00bn\x00bo\x00bp\x00br\x00bs\x00bu\x00ca\x00cc\x00cd\x00ce\x00cf\x00ch\x00ci\x00cl\x00co\x00cr\x00cs\x00ct\x00cu\x00cw\x00cy\x00dA\x00dH\x00da\x00db\x00dc\x00dd\x00de\x00df\x00dh\x00di\x00dj\x00dl\x00do\x00dr\x00ds\x00dt\x00du\x00dw\x00dz\x00eD\x00ea\x00ec\x00ed\x00ee\x00ef\x00eg\x00el\x00em\x00en\x00eo\x00ep\x00eq\x00er\x00es\x00et\x00eu\x00ex\x00fa\x00fc\x00fe\x00ff\x00fi\x00fj\x00fl\x00fn\x00fo\x00fp\x00fr\x00fs\x00gE\x00ga\x00gb\x00gc\x00gd\x00ge\x00gf\x00gg\x00gi\x00gj\x00gl\x00gn\x00go\x00gr\x00gs\x00gt\x00gv\x00hA\x00ha\x00hb\x00hc\x00he\x00hf\x00hk\x00ho\x00hs\x00hy\x00ia\x00ic\x00ie\x00if\x00ig\x00ii\x00ij\x00im\x00in\x00io\x00ip\x00iq\x00is\x00it\x00iu\x00jc\x00jf\x00jm\x00jo\x00js\x00ju\x00ka\x00kc\x00kf\x00kg\x00kh\x00kj\x00ko\x00ks\x00lA\x00lB\x00lE\x00lH\x00la\x00lb\x00lc\x00ld\x00le\x00lf\x00lg\x00lh\x00lj\x00ll\x00lm\x00ln\x00lo\x00lp\x00lr\x00ls\x00lt\x00lu\x00lv\x00mD\x00ma\x00mc\x00md\x00me\x00mf\x00mh\x00mi\x00ml\x00mn\x00mo\x00mp\x00ms\x00mu\x00nG\x00nL\x00nR\x00nV\x00na\x00nb\x00nc\x00nd\x00ne\x00nf\x00ng\x00nh\x00ni\x00nj\x00nl\x00nm\x00no\x00np\x00nr\x00ns\x00nt\x00nu\x00nv\x00nw\x00oS\x00oa\x00oc\x00od\x00oe\x00of\x00og\x00oh\x00oi\x00ol\x00om\x00oo\x00op\x00or\x00os\x00ot\x00ou\x00ov\x00pa\x00pc\x00pe\x00pf\x00ph\x00pi\x00pl\x00pm\x00po\x00pr\x00ps\x00pu\x00qf\x00qi\x00qo\x00qp\x00qs\x00qu\x00rA\x00rB\x00rH\x00ra\x00rb\x00rc\x00rd\x00re\x00rf\x00rh\x00ri\x00rl\x00rm\x00rn\x00ro\x00rp\x00rr\x00rs\x00rt\x00ru\x00rx\x00sa\x00sb\x00sc\x00sd\x00se\x00sf\x00sh\x00si\x00sl\x00sm\x00so\x00sp\x00sq\x00sr\x00ss\x00st\x00su\x00sw\x00sz\x00ta\x00tb\x00tc\x00td\x00te\x00tf\x00th\x00ti\x00to\x00tp\x00tr\x00ts\x00tw\x00uA\x00uH\x00ua\x00ub\x00uc\x00ud\x00uf\x00ug\x00uh\x00ul\x00um\x00uo\x00up\x00ur\x00us\x00ut\x00uu\x00uw\x00vA\x00vB\x00vD\x00va\x00vc\x00vd\x00ve\x00vf\x00vl\x00vn\x00vo\x00vp\x00vr\x00vs\x00vz\x00wc\x00we\x00wf\x00wo\x00wp\x00wr\x00ws\x00xc\x00xd\x00xf\x00xh\x00xi\x00xl\x00xm\x00xn\x00xo\x00xr\x00xs\x00xu\x00xv\x00xw\x00ya\x00yc\x00ye\x00yf\x00yi\x00yo\x00ys\x00yu\x00za\x00zc\x00zd\x00ze\x00zf\x00zh\x00zi\x00zo\x00zs\x00zw\x00", "large_words" => array( // AElig;[Æ] AElig[Æ]. "\x04lig;\x02Æ\x03lig\x02Æ", // AMP;[&] AMP[&]. "\x02P;\x01&\x01P\x01&", // Aacute;[Á] Aacute[Á]. "\x05cute;\x02Á\x04cute\x02Á", // Abreve;[Ă]. "\x05reve;\x02Ă", // Acirc;[Â] Acirc[Â] Acy;[А]. "\x04irc;\x02Â\x03irc\x02Â\x02y;\x02А", // Afr;[𝔄]. "\x02r;\x04𝔄", // Agrave;[À] Agrave[À]. "\x05rave;\x02À\x04rave\x02À", // Alpha;[Α]. "\x04pha;\x02Α", // Amacr;[Ā]. "\x04acr;\x02Ā", // And;[⩓]. "\x02d;\x03⩓", // Aogon;[Ą] Aopf;[𝔸]. "\x04gon;\x02Ą\x03pf;\x04𝔸", // ApplyFunction;[⁡]. "\x0cplyFunction;\x03⁡", // Aring;[Å] Aring[Å]. "\x04ing;\x02Å\x03ing\x02Å", // Assign;[≔] Ascr;[𝒜]. "\x05sign;\x03≔\x03cr;\x04𝒜", // Atilde;[Ã] Atilde[Ã]. "\x05ilde;\x02Ã\x04ilde\x02Ã", // Auml;[Ä] Auml[Ä]. "\x03ml;\x02Ä\x02ml\x02Ä", // Backslash;[∖] Barwed;[⌆] Barv;[⫧]. "\x08ckslash;\x03∖\x05rwed;\x03⌆\x03rv;\x03⫧", // Bcy;[Б]. "\x02y;\x02Б", // Bernoullis;[ℬ] Because;[∵] Beta;[Β]. "\x09rnoullis;\x03ℬ\x06cause;\x03∵\x03ta;\x02Β", // Bfr;[𝔅]. "\x02r;\x04𝔅", // Bopf;[𝔹]. "\x03pf;\x04𝔹", // Breve;[˘]. "\x04eve;\x02˘", // Bscr;[ℬ]. "\x03cr;\x03ℬ", // Bumpeq;[≎]. "\x05mpeq;\x03≎", // CHcy;[Ч]. "\x03cy;\x02Ч", // COPY;[©] COPY[©]. "\x03PY;\x02©\x02PY\x02©", // CapitalDifferentialD;[ⅅ] Cayleys;[ℭ] Cacute;[Ć] Cap;[⋒]. "\x13pitalDifferentialD;\x03ⅅ\x06yleys;\x03ℭ\x05cute;\x02Ć\x02p;\x03⋒", // Cconint;[∰] Ccaron;[Č] Ccedil;[Ç] Ccedil[Ç] Ccirc;[Ĉ]. "\x06onint;\x03∰\x05aron;\x02Č\x05edil;\x02Ç\x04edil\x02Ç\x04irc;\x02Ĉ", // Cdot;[Ċ]. "\x03ot;\x02Ċ", // CenterDot;[·] Cedilla;[¸]. "\x08nterDot;\x02·\x06dilla;\x02¸", // Cfr;[ℭ]. "\x02r;\x03ℭ", // Chi;[Χ]. "\x02i;\x02Χ", // CircleMinus;[⊖] CircleTimes;[⊗] CirclePlus;[⊕] CircleDot;[⊙]. "\x0arcleMinus;\x03⊖\x0arcleTimes;\x03⊗\x09rclePlus;\x03⊕\x08rcleDot;\x03⊙", // ClockwiseContourIntegral;[∲] CloseCurlyDoubleQuote;[”] CloseCurlyQuote;[’]. "\x17ockwiseContourIntegral;\x03∲\x14oseCurlyDoubleQuote;\x03”\x0eoseCurlyQuote;\x03’", // CounterClockwiseContourIntegral;[∳] ContourIntegral;[∮] Congruent;[≡] Coproduct;[∐] Colone;[⩴] Conint;[∯] Colon;[∷] Copf;[ℂ]. "\x1eunterClockwiseContourIntegral;\x03∳\x0entourIntegral;\x03∮\x08ngruent;\x03≡\x08product;\x03∐\x05lone;\x03⩴\x05nint;\x03∯\x04lon;\x03∷\x03pf;\x03ℂ", // Cross;[⨯]. "\x04oss;\x03⨯", // Cscr;[𝒞]. "\x03cr;\x04𝒞", // CupCap;[≍] Cup;[⋓]. "\x05pCap;\x03≍\x02p;\x03⋓", // DDotrahd;[⤑] DD;[ⅅ]. "\x07otrahd;\x03⤑\x01;\x03ⅅ", // DJcy;[Ђ]. "\x03cy;\x02Ђ", // DScy;[Ѕ]. "\x03cy;\x02Ѕ", // DZcy;[Џ]. "\x03cy;\x02Џ", // Dagger;[‡] Dashv;[⫤] Darr;[↡]. "\x05gger;\x03‡\x04shv;\x03⫤\x03rr;\x03↡", // Dcaron;[Ď] Dcy;[Д]. "\x05aron;\x02Ď\x02y;\x02Д", // Delta;[Δ] Del;[∇]. "\x04lta;\x02Δ\x02l;\x03∇", // Dfr;[𝔇]. "\x02r;\x04𝔇", // DiacriticalDoubleAcute;[˝] DiacriticalAcute;[´] DiacriticalGrave;[`] DiacriticalTilde;[˜] DiacriticalDot;[˙] DifferentialD;[ⅆ] Diamond;[⋄]. "\x15acriticalDoubleAcute;\x02˝\x0facriticalAcute;\x02´\x0facriticalGrave;\x01`\x0facriticalTilde;\x02˜\x0dacriticalDot;\x02˙\x0cfferentialD;\x03ⅆ\x06amond;\x03⋄", // DoubleLongLeftRightArrow;[⟺] DoubleContourIntegral;[∯] DoubleLeftRightArrow;[⇔] DoubleLongRightArrow;[⟹] DoubleLongLeftArrow;[⟸] DownLeftRightVector;[⥐] DownRightTeeVector;[⥟] DownRightVectorBar;[⥗] DoubleUpDownArrow;[⇕] DoubleVerticalBar;[∥] DownLeftTeeVector;[⥞] DownLeftVectorBar;[⥖] DoubleRightArrow;[⇒] DownArrowUpArrow;[⇵] DoubleDownArrow;[⇓] DoubleLeftArrow;[⇐] DownRightVector;[⇁] DoubleRightTee;[⊨] DownLeftVector;[↽] DoubleLeftTee;[⫤] DoubleUpArrow;[⇑] DownArrowBar;[⤓] DownTeeArrow;[↧] DoubleDot;[¨] DownArrow;[↓] DownBreve;[̑] Downarrow;[⇓] DotEqual;[≐] DownTee;[⊤] DotDot;[⃜] Dopf;[𝔻] Dot;[¨]. "\x17ubleLongLeftRightArrow;\x03⟺\x14ubleContourIntegral;\x03∯\x13ubleLeftRightArrow;\x03⇔\x13ubleLongRightArrow;\x03⟹\x12ubleLongLeftArrow;\x03⟸\x12wnLeftRightVector;\x03⥐\x11wnRightTeeVector;\x03⥟\x11wnRightVectorBar;\x03⥗\x10ubleUpDownArrow;\x03⇕\x10ubleVerticalBar;\x03∥\x10wnLeftTeeVector;\x03⥞\x10wnLeftVectorBar;\x03⥖\x0fubleRightArrow;\x03⇒\x0fwnArrowUpArrow;\x03⇵\x0eubleDownArrow;\x03⇓\x0eubleLeftArrow;\x03⇐\x0ewnRightVector;\x03⇁\x0dubleRightTee;\x03⊨\x0dwnLeftVector;\x03↽\x0cubleLeftTee;\x03⫤\x0cubleUpArrow;\x03⇑\x0bwnArrowBar;\x03⤓\x0bwnTeeArrow;\x03↧\x08ubleDot;\x02¨\x08wnArrow;\x03↓\x08wnBreve;\x02̑\x08wnarrow;\x03⇓\x07tEqual;\x03≐\x06wnTee;\x03⊤\x05tDot;\x03⃜\x03pf;\x04𝔻\x02t;\x02¨", // Dstrok;[Đ] Dscr;[𝒟]. "\x05trok;\x02Đ\x03cr;\x04𝒟", // ENG;[Ŋ]. "\x02G;\x02Ŋ", // ETH;[Ð] ETH[Ð]. "\x02H;\x02Ð\x01H\x02Ð", // Eacute;[É] Eacute[É]. "\x05cute;\x02É\x04cute\x02É", // Ecaron;[Ě] Ecirc;[Ê] Ecirc[Ê] Ecy;[Э]. "\x05aron;\x02Ě\x04irc;\x02Ê\x03irc\x02Ê\x02y;\x02Э", // Edot;[Ė]. "\x03ot;\x02Ė", // Efr;[𝔈]. "\x02r;\x04𝔈", // Egrave;[È] Egrave[È]. "\x05rave;\x02È\x04rave\x02È", // Element;[∈]. "\x06ement;\x03∈", // EmptyVerySmallSquare;[▫] EmptySmallSquare;[◻] Emacr;[Ē]. "\x13ptyVerySmallSquare;\x03▫\x0fptySmallSquare;\x03◻\x04acr;\x02Ē", // Eogon;[Ę] Eopf;[𝔼]. "\x04gon;\x02Ę\x03pf;\x04𝔼", // Epsilon;[Ε]. "\x06silon;\x02Ε", // Equilibrium;[⇌] EqualTilde;[≂] Equal;[⩵]. "\x0auilibrium;\x03⇌\x09ualTilde;\x03≂\x04ual;\x03⩵", // Escr;[ℰ] Esim;[⩳]. "\x03cr;\x03ℰ\x03im;\x03⩳", // Eta;[Η]. "\x02a;\x02Η", // Euml;[Ë] Euml[Ë]. "\x03ml;\x02Ë\x02ml\x02Ë", // ExponentialE;[ⅇ] Exists;[∃]. "\x0bponentialE;\x03ⅇ\x05ists;\x03∃", // Fcy;[Ф]. "\x02y;\x02Ф", // Ffr;[𝔉]. "\x02r;\x04𝔉", // FilledVerySmallSquare;[▪] FilledSmallSquare;[◼]. "\x14lledVerySmallSquare;\x03▪\x10lledSmallSquare;\x03◼", // Fouriertrf;[ℱ] ForAll;[∀] Fopf;[𝔽]. "\x09uriertrf;\x03ℱ\x05rAll;\x03∀\x03pf;\x04𝔽", // Fscr;[ℱ]. "\x03cr;\x03ℱ", // GJcy;[Ѓ]. "\x03cy;\x02Ѓ", // GT;[>]. "\x01;\x01>", // Gammad;[Ϝ] Gamma;[Γ]. "\x05mmad;\x02Ϝ\x04mma;\x02Γ", // Gbreve;[Ğ]. "\x05reve;\x02Ğ", // Gcedil;[Ģ] Gcirc;[Ĝ] Gcy;[Г]. "\x05edil;\x02Ģ\x04irc;\x02Ĝ\x02y;\x02Г", // Gdot;[Ġ]. "\x03ot;\x02Ġ", // Gfr;[𝔊]. "\x02r;\x04𝔊", // Gg;[⋙]. "\x01;\x03⋙", // Gopf;[𝔾]. "\x03pf;\x04𝔾", // GreaterSlantEqual;[⩾] GreaterEqualLess;[⋛] GreaterFullEqual;[≧] GreaterGreater;[⪢] GreaterEqual;[≥] GreaterTilde;[≳] GreaterLess;[≷]. "\x10eaterSlantEqual;\x03⩾\x0featerEqualLess;\x03⋛\x0featerFullEqual;\x03≧\x0deaterGreater;\x03⪢\x0beaterEqual;\x03≥\x0beaterTilde;\x03≳\x0aeaterLess;\x03≷", // Gscr;[𝒢]. "\x03cr;\x04𝒢", // Gt;[≫]. "\x01;\x03≫", // HARDcy;[Ъ]. "\x05RDcy;\x02Ъ", // Hacek;[ˇ] Hat;[^]. "\x04cek;\x02ˇ\x02t;\x01^", // Hcirc;[Ĥ]. "\x04irc;\x02Ĥ", // Hfr;[ℌ]. "\x02r;\x03ℌ", // HilbertSpace;[ℋ]. "\x0blbertSpace;\x03ℋ", // HorizontalLine;[─] Hopf;[ℍ]. "\x0drizontalLine;\x03─\x03pf;\x03ℍ", // Hstrok;[Ħ] Hscr;[ℋ]. "\x05trok;\x02Ħ\x03cr;\x03ℋ", // HumpDownHump;[≎] HumpEqual;[≏]. "\x0bmpDownHump;\x03≎\x08mpEqual;\x03≏", // IEcy;[Е]. "\x03cy;\x02Е", // IJlig;[IJ]. "\x04lig;\x02IJ", // IOcy;[Ё]. "\x03cy;\x02Ё", // Iacute;[Í] Iacute[Í]. "\x05cute;\x02Í\x04cute\x02Í", // Icirc;[Î] Icirc[Î] Icy;[И]. "\x04irc;\x02Î\x03irc\x02Î\x02y;\x02И", // Idot;[İ]. "\x03ot;\x02İ", // Ifr;[ℑ]. "\x02r;\x03ℑ", // Igrave;[Ì] Igrave[Ì]. "\x05rave;\x02Ì\x04rave\x02Ì", // ImaginaryI;[ⅈ] Implies;[⇒] Imacr;[Ī] Im;[ℑ]. "\x09aginaryI;\x03ⅈ\x06plies;\x03⇒\x04acr;\x02Ī\x01;\x03ℑ", // InvisibleComma;[⁣] InvisibleTimes;[⁢] Intersection;[⋂] Integral;[∫] Int;[∬]. "\x0dvisibleComma;\x03⁣\x0dvisibleTimes;\x03⁢\x0btersection;\x03⋂\x07tegral;\x03∫\x02t;\x03∬", // Iogon;[Į] Iopf;[𝕀] Iota;[Ι]. "\x04gon;\x02Į\x03pf;\x04𝕀\x03ta;\x02Ι", // Iscr;[ℐ]. "\x03cr;\x03ℐ", // Itilde;[Ĩ]. "\x05ilde;\x02Ĩ", // Iukcy;[І] Iuml;[Ï] Iuml[Ï]. "\x04kcy;\x02І\x03ml;\x02Ï\x02ml\x02Ï", // Jcirc;[Ĵ] Jcy;[Й]. "\x04irc;\x02Ĵ\x02y;\x02Й", // Jfr;[𝔍]. "\x02r;\x04𝔍", // Jopf;[𝕁]. "\x03pf;\x04𝕁", // Jsercy;[Ј] Jscr;[𝒥]. "\x05ercy;\x02Ј\x03cr;\x04𝒥", // Jukcy;[Є]. "\x04kcy;\x02Є", // KHcy;[Х]. "\x03cy;\x02Х", // KJcy;[Ќ]. "\x03cy;\x02Ќ", // Kappa;[Κ]. "\x04ppa;\x02Κ", // Kcedil;[Ķ] Kcy;[К]. "\x05edil;\x02Ķ\x02y;\x02К", // Kfr;[𝔎]. "\x02r;\x04𝔎", // Kopf;[𝕂]. "\x03pf;\x04𝕂", // Kscr;[𝒦]. "\x03cr;\x04𝒦", // LJcy;[Љ]. "\x03cy;\x02Љ", // LT;[<]. "\x01;\x01<", // Laplacetrf;[ℒ] Lacute;[Ĺ] Lambda;[Λ] Lang;[⟪] Larr;[↞]. "\x09placetrf;\x03ℒ\x05cute;\x02Ĺ\x05mbda;\x02Λ\x03ng;\x03⟪\x03rr;\x03↞", // Lcaron;[Ľ] Lcedil;[Ļ] Lcy;[Л]. "\x05aron;\x02Ľ\x05edil;\x02Ļ\x02y;\x02Л", // LeftArrowRightArrow;[⇆] LeftDoubleBracket;[⟦] LeftDownTeeVector;[⥡] LeftDownVectorBar;[⥙] LeftTriangleEqual;[⊴] LeftAngleBracket;[⟨] LeftUpDownVector;[⥑] LessEqualGreater;[⋚] LeftRightVector;[⥎] LeftTriangleBar;[⧏] LeftUpTeeVector;[⥠] LeftUpVectorBar;[⥘] LeftDownVector;[⇃] LeftRightArrow;[↔] Leftrightarrow;[⇔] LessSlantEqual;[⩽] LeftTeeVector;[⥚] LeftVectorBar;[⥒] LessFullEqual;[≦] LeftArrowBar;[⇤] LeftTeeArrow;[↤] LeftTriangle;[⊲] LeftUpVector;[↿] LeftCeiling;[⌈] LessGreater;[≶] LeftVector;[↼] LeftArrow;[←] LeftFloor;[⌊] Leftarrow;[⇐] LessTilde;[≲] LessLess;[⪡] LeftTee;[⊣]. "\x12ftArrowRightArrow;\x03⇆\x10ftDoubleBracket;\x03⟦\x10ftDownTeeVector;\x03⥡\x10ftDownVectorBar;\x03⥙\x10ftTriangleEqual;\x03⊴\x0fftAngleBracket;\x03⟨\x0fftUpDownVector;\x03⥑\x0fssEqualGreater;\x03⋚\x0eftRightVector;\x03⥎\x0eftTriangleBar;\x03⧏\x0eftUpTeeVector;\x03⥠\x0eftUpVectorBar;\x03⥘\x0dftDownVector;\x03⇃\x0dftRightArrow;\x03↔\x0dftrightarrow;\x03⇔\x0dssSlantEqual;\x03⩽\x0cftTeeVector;\x03⥚\x0cftVectorBar;\x03⥒\x0cssFullEqual;\x03≦\x0bftArrowBar;\x03⇤\x0bftTeeArrow;\x03↤\x0bftTriangle;\x03⊲\x0bftUpVector;\x03↿\x0aftCeiling;\x03⌈\x0assGreater;\x03≶\x09ftVector;\x03↼\x08ftArrow;\x03←\x08ftFloor;\x03⌊\x08ftarrow;\x03⇐\x08ssTilde;\x03≲\x07ssLess;\x03⪡\x06ftTee;\x03⊣", // Lfr;[𝔏]. "\x02r;\x04𝔏", // Lleftarrow;[⇚] Ll;[⋘]. "\x09eftarrow;\x03⇚\x01;\x03⋘", // Lmidot;[Ŀ]. "\x05idot;\x02Ŀ", // LongLeftRightArrow;[⟷] Longleftrightarrow;[⟺] LowerRightArrow;[↘] LongRightArrow;[⟶] Longrightarrow;[⟹] LowerLeftArrow;[↙] LongLeftArrow;[⟵] Longleftarrow;[⟸] Lopf;[𝕃]. "\x11ngLeftRightArrow;\x03⟷\x11ngleftrightarrow;\x03⟺\x0ewerRightArrow;\x03↘\x0dngRightArrow;\x03⟶\x0dngrightarrow;\x03⟹\x0dwerLeftArrow;\x03↙\x0cngLeftArrow;\x03⟵\x0cngleftarrow;\x03⟸\x03pf;\x04𝕃", // Lstrok;[Ł] Lscr;[ℒ] Lsh;[↰]. "\x05trok;\x02Ł\x03cr;\x03ℒ\x02h;\x03↰", // Lt;[≪]. "\x01;\x03≪", // Map;[⤅]. "\x02p;\x03⤅", // Mcy;[М]. "\x02y;\x02М", // MediumSpace;[ ] Mellintrf;[ℳ]. "\x0adiumSpace;\x03 \x08llintrf;\x03ℳ", // Mfr;[𝔐]. "\x02r;\x04𝔐", // MinusPlus;[∓]. "\x08nusPlus;\x03∓", // Mopf;[𝕄]. "\x03pf;\x04𝕄", // Mscr;[ℳ]. "\x03cr;\x03ℳ", // Mu;[Μ]. "\x01;\x02Μ", // NJcy;[Њ]. "\x03cy;\x02Њ", // Nacute;[Ń]. "\x05cute;\x02Ń", // Ncaron;[Ň] Ncedil;[Ņ] Ncy;[Н]. "\x05aron;\x02Ň\x05edil;\x02Ņ\x02y;\x02Н", // NegativeVeryThinSpace;[​] NestedGreaterGreater;[≫] NegativeMediumSpace;[​] NegativeThickSpace;[​] NegativeThinSpace;[​] NestedLessLess;[≪] NewLine;[\xa]. "\x14gativeVeryThinSpace;\x03​\x13stedGreaterGreater;\x03≫\x12gativeMediumSpace;\x03​\x11gativeThickSpace;\x03​\x10gativeThinSpace;\x03​\x0dstedLessLess;\x03≪\x06wLine;\x01\xa", // Nfr;[𝔑]. "\x02r;\x04𝔑", // NotNestedGreaterGreater;[⪢̸] NotSquareSupersetEqual;[⋣] NotPrecedesSlantEqual;[⋠] NotRightTriangleEqual;[⋭] NotSucceedsSlantEqual;[⋡] NotDoubleVerticalBar;[∦] NotGreaterSlantEqual;[⩾̸] NotLeftTriangleEqual;[⋬] NotSquareSubsetEqual;[⋢] NotGreaterFullEqual;[≧̸] NotRightTriangleBar;[⧐̸] NotLeftTriangleBar;[⧏̸] NotGreaterGreater;[≫̸] NotLessSlantEqual;[⩽̸] NotNestedLessLess;[⪡̸] NotReverseElement;[∌] NotSquareSuperset;[⊐̸] NotTildeFullEqual;[≇] NonBreakingSpace;[ ] NotPrecedesEqual;[⪯̸] NotRightTriangle;[⋫] NotSucceedsEqual;[⪰̸] NotSucceedsTilde;[≿̸] NotSupersetEqual;[⊉] NotGreaterEqual;[≱] NotGreaterTilde;[≵] NotHumpDownHump;[≎̸] NotLeftTriangle;[⋪] NotSquareSubset;[⊏̸] NotGreaterLess;[≹] NotLessGreater;[≸] NotSubsetEqual;[⊈] NotVerticalBar;[∤] NotEqualTilde;[≂̸] NotTildeEqual;[≄] NotTildeTilde;[≉] NotCongruent;[≢] NotHumpEqual;[≏̸] NotLessEqual;[≰] NotLessTilde;[≴] NotLessLess;[≪̸] NotPrecedes;[⊀] NotSucceeds;[⊁] NotSuperset;[⊃⃒] NotElement;[∉] NotGreater;[≯] NotCupCap;[≭] NotExists;[∄] NotSubset;[⊂⃒] NotEqual;[≠] NotTilde;[≁] NoBreak;[⁠] NotLess;[≮] Nopf;[ℕ] Not;[⫬]. "\x16tNestedGreaterGreater;\x05⪢̸\x15tSquareSupersetEqual;\x03⋣\x14tPrecedesSlantEqual;\x03⋠\x14tRightTriangleEqual;\x03⋭\x14tSucceedsSlantEqual;\x03⋡\x13tDoubleVerticalBar;\x03∦\x13tGreaterSlantEqual;\x05⩾̸\x13tLeftTriangleEqual;\x03⋬\x13tSquareSubsetEqual;\x03⋢\x12tGreaterFullEqual;\x05≧̸\x12tRightTriangleBar;\x05⧐̸\x11tLeftTriangleBar;\x05⧏̸\x10tGreaterGreater;\x05≫̸\x10tLessSlantEqual;\x05⩽̸\x10tNestedLessLess;\x05⪡̸\x10tReverseElement;\x03∌\x10tSquareSuperset;\x05⊐̸\x10tTildeFullEqual;\x03≇\x0fnBreakingSpace;\x02 \x0ftPrecedesEqual;\x05⪯̸\x0ftRightTriangle;\x03⋫\x0ftSucceedsEqual;\x05⪰̸\x0ftSucceedsTilde;\x05≿̸\x0ftSupersetEqual;\x03⊉\x0etGreaterEqual;\x03≱\x0etGreaterTilde;\x03≵\x0etHumpDownHump;\x05≎̸\x0etLeftTriangle;\x03⋪\x0etSquareSubset;\x05⊏̸\x0dtGreaterLess;\x03≹\x0dtLessGreater;\x03≸\x0dtSubsetEqual;\x03⊈\x0dtVerticalBar;\x03∤\x0ctEqualTilde;\x05≂̸\x0ctTildeEqual;\x03≄\x0ctTildeTilde;\x03≉\x0btCongruent;\x03≢\x0btHumpEqual;\x05≏̸\x0btLessEqual;\x03≰\x0btLessTilde;\x03≴\x0atLessLess;\x05≪̸\x0atPrecedes;\x03⊀\x0atSucceeds;\x03⊁\x0atSuperset;\x06⊃⃒\x09tElement;\x03∉\x09tGreater;\x03≯\x08tCupCap;\x03≭\x08tExists;\x03∄\x08tSubset;\x06⊂⃒\x07tEqual;\x03≠\x07tTilde;\x03≁\x06Break;\x03⁠\x06tLess;\x03≮\x03pf;\x03ℕ\x02t;\x03⫬", // Nscr;[𝒩]. "\x03cr;\x04𝒩", // Ntilde;[Ñ] Ntilde[Ñ]. "\x05ilde;\x02Ñ\x04ilde\x02Ñ", // Nu;[Ν]. "\x01;\x02Ν", // OElig;[Œ]. "\x04lig;\x02Œ", // Oacute;[Ó] Oacute[Ó]. "\x05cute;\x02Ó\x04cute\x02Ó", // Ocirc;[Ô] Ocirc[Ô] Ocy;[О]. "\x04irc;\x02Ô\x03irc\x02Ô\x02y;\x02О", // Odblac;[Ő]. "\x05blac;\x02Ő", // Ofr;[𝔒]. "\x02r;\x04𝔒", // Ograve;[Ò] Ograve[Ò]. "\x05rave;\x02Ò\x04rave\x02Ò", // Omicron;[Ο] Omacr;[Ō] Omega;[Ω]. "\x06icron;\x02Ο\x04acr;\x02Ō\x04ega;\x02Ω", // Oopf;[𝕆]. "\x03pf;\x04𝕆", // OpenCurlyDoubleQuote;[“] OpenCurlyQuote;[‘]. "\x13enCurlyDoubleQuote;\x03“\x0denCurlyQuote;\x03‘", // Or;[⩔]. "\x01;\x03⩔", // Oslash;[Ø] Oslash[Ø] Oscr;[𝒪]. "\x05lash;\x02Ø\x04lash\x02Ø\x03cr;\x04𝒪", // Otilde;[Õ] Otimes;[⨷] Otilde[Õ]. "\x05ilde;\x02Õ\x05imes;\x03⨷\x04ilde\x02Õ", // Ouml;[Ö] Ouml[Ö]. "\x03ml;\x02Ö\x02ml\x02Ö", // OverParenthesis;[⏜] OverBracket;[⎴] OverBrace;[⏞] OverBar;[‾]. "\x0eerParenthesis;\x03⏜\x0aerBracket;\x03⎴\x08erBrace;\x03⏞\x06erBar;\x03‾", // PartialD;[∂]. "\x07rtialD;\x03∂", // Pcy;[П]. "\x02y;\x02П", // Pfr;[𝔓]. "\x02r;\x04𝔓", // Phi;[Φ]. "\x02i;\x02Φ", // Pi;[Π]. "\x01;\x02Π", // PlusMinus;[±]. "\x08usMinus;\x02±", // Poincareplane;[ℌ] Popf;[ℙ]. "\x0cincareplane;\x03ℌ\x03pf;\x03ℙ", // PrecedesSlantEqual;[≼] PrecedesEqual;[⪯] PrecedesTilde;[≾] Proportional;[∝] Proportion;[∷] Precedes;[≺] Product;[∏] Prime;[″] Pr;[⪻]. "\x11ecedesSlantEqual;\x03≼\x0cecedesEqual;\x03⪯\x0cecedesTilde;\x03≾\x0boportional;\x03∝\x09oportion;\x03∷\x07ecedes;\x03≺\x06oduct;\x03∏\x04ime;\x03″\x01;\x03⪻", // Pscr;[𝒫] Psi;[Ψ]. "\x03cr;\x04𝒫\x02i;\x02Ψ", // QUOT;[\"] QUOT[\"]. "\x03OT;\x01\"\x02OT\x01\"", // Qfr;[𝔔]. "\x02r;\x04𝔔", // Qopf;[ℚ]. "\x03pf;\x03ℚ", // Qscr;[𝒬]. "\x03cr;\x04𝒬", // RBarr;[⤐]. "\x04arr;\x03⤐", // REG;[®] REG[®]. "\x02G;\x02®\x01G\x02®", // Racute;[Ŕ] Rarrtl;[⤖] Rang;[⟫] Rarr;[↠]. "\x05cute;\x02Ŕ\x05rrtl;\x03⤖\x03ng;\x03⟫\x03rr;\x03↠", // Rcaron;[Ř] Rcedil;[Ŗ] Rcy;[Р]. "\x05aron;\x02Ř\x05edil;\x02Ŗ\x02y;\x02Р", // ReverseUpEquilibrium;[⥯] ReverseEquilibrium;[⇋] ReverseElement;[∋] Re;[ℜ]. "\x13verseUpEquilibrium;\x03⥯\x11verseEquilibrium;\x03⇋\x0dverseElement;\x03∋\x01;\x03ℜ", // Rfr;[ℜ]. "\x02r;\x03ℜ", // Rho;[Ρ]. "\x02o;\x02Ρ", // RightArrowLeftArrow;[⇄] RightDoubleBracket;[⟧] RightDownTeeVector;[⥝] RightDownVectorBar;[⥕] RightTriangleEqual;[⊵] RightAngleBracket;[⟩] RightUpDownVector;[⥏] RightTriangleBar;[⧐] RightUpTeeVector;[⥜] RightUpVectorBar;[⥔] RightDownVector;[⇂] RightTeeVector;[⥛] RightVectorBar;[⥓] RightArrowBar;[⇥] RightTeeArrow;[↦] RightTriangle;[⊳] RightUpVector;[↾] RightCeiling;[⌉] RightVector;[⇀] RightArrow;[→] RightFloor;[⌋] Rightarrow;[⇒] RightTee;[⊢]. "\x12ghtArrowLeftArrow;\x03⇄\x11ghtDoubleBracket;\x03⟧\x11ghtDownTeeVector;\x03⥝\x11ghtDownVectorBar;\x03⥕\x11ghtTriangleEqual;\x03⊵\x10ghtAngleBracket;\x03⟩\x10ghtUpDownVector;\x03⥏\x0fghtTriangleBar;\x03⧐\x0fghtUpTeeVector;\x03⥜\x0fghtUpVectorBar;\x03⥔\x0eghtDownVector;\x03⇂\x0dghtTeeVector;\x03⥛\x0dghtVectorBar;\x03⥓\x0cghtArrowBar;\x03⇥\x0cghtTeeArrow;\x03↦\x0cghtTriangle;\x03⊳\x0cghtUpVector;\x03↾\x0bghtCeiling;\x03⌉\x0aghtVector;\x03⇀\x09ghtArrow;\x03→\x09ghtFloor;\x03⌋\x09ghtarrow;\x03⇒\x07ghtTee;\x03⊢", // RoundImplies;[⥰] Ropf;[ℝ]. "\x0bundImplies;\x03⥰\x03pf;\x03ℝ", // Rrightarrow;[⇛]. "\x0aightarrow;\x03⇛", // Rscr;[ℛ] Rsh;[↱]. "\x03cr;\x03ℛ\x02h;\x03↱", // RuleDelayed;[⧴]. "\x0aleDelayed;\x03⧴", // SHCHcy;[Щ] SHcy;[Ш]. "\x05CHcy;\x02Щ\x03cy;\x02Ш", // SOFTcy;[Ь]. "\x05FTcy;\x02Ь", // Sacute;[Ś]. "\x05cute;\x02Ś", // Scaron;[Š] Scedil;[Ş] Scirc;[Ŝ] Scy;[С] Sc;[⪼]. "\x05aron;\x02Š\x05edil;\x02Ş\x04irc;\x02Ŝ\x02y;\x02С\x01;\x03⪼", // Sfr;[𝔖]. "\x02r;\x04𝔖", // ShortRightArrow;[→] ShortDownArrow;[↓] ShortLeftArrow;[←] ShortUpArrow;[↑]. "\x0eortRightArrow;\x03→\x0dortDownArrow;\x03↓\x0dortLeftArrow;\x03←\x0bortUpArrow;\x03↑", // Sigma;[Σ]. "\x04gma;\x02Σ", // SmallCircle;[∘]. "\x0aallCircle;\x03∘", // Sopf;[𝕊]. "\x03pf;\x04𝕊", // SquareSupersetEqual;[⊒] SquareIntersection;[⊓] SquareSubsetEqual;[⊑] SquareSuperset;[⊐] SquareSubset;[⊏] SquareUnion;[⊔] Square;[□] Sqrt;[√]. "\x12uareSupersetEqual;\x03⊒\x11uareIntersection;\x03⊓\x10uareSubsetEqual;\x03⊑\x0duareSuperset;\x03⊐\x0buareSubset;\x03⊏\x0auareUnion;\x03⊔\x05uare;\x03□\x03rt;\x03√", // Sscr;[𝒮]. "\x03cr;\x04𝒮", // Star;[⋆]. "\x03ar;\x03⋆", // SucceedsSlantEqual;[≽] SucceedsEqual;[⪰] SucceedsTilde;[≿] SupersetEqual;[⊇] SubsetEqual;[⊆] Succeeds;[≻] SuchThat;[∋] Superset;[⊃] Subset;[⋐] Supset;[⋑] Sub;[⋐] Sum;[∑] Sup;[⋑]. "\x11cceedsSlantEqual;\x03≽\x0ccceedsEqual;\x03⪰\x0ccceedsTilde;\x03≿\x0cpersetEqual;\x03⊇\x0absetEqual;\x03⊆\x07cceeds;\x03≻\x07chThat;\x03∋\x07perset;\x03⊃\x05bset;\x03⋐\x05pset;\x03⋑\x02b;\x03⋐\x02m;\x03∑\x02p;\x03⋑", // THORN;[Þ] THORN[Þ]. "\x04ORN;\x02Þ\x03ORN\x02Þ", // TRADE;[™]. "\x04ADE;\x03™", // TSHcy;[Ћ] TScy;[Ц]. "\x04Hcy;\x02Ћ\x03cy;\x02Ц", // Tab;[\x9] Tau;[Τ]. "\x02b;\x01\x9\x02u;\x02Τ", // Tcaron;[Ť] Tcedil;[Ţ] Tcy;[Т]. "\x05aron;\x02Ť\x05edil;\x02Ţ\x02y;\x02Т", // Tfr;[𝔗]. "\x02r;\x04𝔗", // ThickSpace;[  ] Therefore;[∴] ThinSpace;[ ] Theta;[Θ]. "\x09ickSpace;\x06  \x08erefore;\x03∴\x08inSpace;\x03 \x04eta;\x02Θ", // TildeFullEqual;[≅] TildeEqual;[≃] TildeTilde;[≈] Tilde;[∼]. "\x0dldeFullEqual;\x03≅\x09ldeEqual;\x03≃\x09ldeTilde;\x03≈\x04lde;\x03∼", // Topf;[𝕋]. "\x03pf;\x04𝕋", // TripleDot;[⃛]. "\x08ipleDot;\x03⃛", // Tstrok;[Ŧ] Tscr;[𝒯]. "\x05trok;\x02Ŧ\x03cr;\x04𝒯", // Uarrocir;[⥉] Uacute;[Ú] Uacute[Ú] Uarr;[↟]. "\x07rrocir;\x03⥉\x05cute;\x02Ú\x04cute\x02Ú\x03rr;\x03↟", // Ubreve;[Ŭ] Ubrcy;[Ў]. "\x05reve;\x02Ŭ\x04rcy;\x02Ў", // Ucirc;[Û] Ucirc[Û] Ucy;[У]. "\x04irc;\x02Û\x03irc\x02Û\x02y;\x02У", // Udblac;[Ű]. "\x05blac;\x02Ű", // Ufr;[𝔘]. "\x02r;\x04𝔘", // Ugrave;[Ù] Ugrave[Ù]. "\x05rave;\x02Ù\x04rave\x02Ù", // Umacr;[Ū]. "\x04acr;\x02Ū", // UnderParenthesis;[⏝] UnderBracket;[⎵] UnderBrace;[⏟] UnionPlus;[⊎] UnderBar;[_] Union;[⋃]. "\x0fderParenthesis;\x03⏝\x0bderBracket;\x03⎵\x09derBrace;\x03⏟\x08ionPlus;\x03⊎\x07derBar;\x01_\x04ion;\x03⋃", // Uogon;[Ų] Uopf;[𝕌]. "\x04gon;\x02Ų\x03pf;\x04𝕌", // UpArrowDownArrow;[⇅] UpperRightArrow;[↗] UpperLeftArrow;[↖] UpEquilibrium;[⥮] UpDownArrow;[↕] Updownarrow;[⇕] UpArrowBar;[⤒] UpTeeArrow;[↥] UpArrow;[↑] Uparrow;[⇑] Upsilon;[Υ] UpTee;[⊥] Upsi;[ϒ]. "\x0fArrowDownArrow;\x03⇅\x0eperRightArrow;\x03↗\x0dperLeftArrow;\x03↖\x0cEquilibrium;\x03⥮\x0aDownArrow;\x03↕\x0adownarrow;\x03⇕\x09ArrowBar;\x03⤒\x09TeeArrow;\x03↥\x06Arrow;\x03↑\x06arrow;\x03⇑\x06silon;\x02Υ\x04Tee;\x03⊥\x03si;\x02ϒ", // Uring;[Ů]. "\x04ing;\x02Ů", // Uscr;[𝒰]. "\x03cr;\x04𝒰", // Utilde;[Ũ]. "\x05ilde;\x02Ũ", // Uuml;[Ü] Uuml[Ü]. "\x03ml;\x02Ü\x02ml\x02Ü", // VDash;[⊫]. "\x04ash;\x03⊫", // Vbar;[⫫]. "\x03ar;\x03⫫", // Vcy;[В]. "\x02y;\x02В", // Vdashl;[⫦] Vdash;[⊩]. "\x05ashl;\x03⫦\x04ash;\x03⊩", // VerticalSeparator;[❘] VerticalTilde;[≀] VeryThinSpace;[ ] VerticalLine;[|] VerticalBar;[∣] Verbar;[‖] Vert;[‖] Vee;[⋁]. "\x10rticalSeparator;\x03❘\x0crticalTilde;\x03≀\x0cryThinSpace;\x03 \x0brticalLine;\x01|\x0articalBar;\x03∣\x05rbar;\x03‖\x03rt;\x03‖\x02e;\x03⋁", // Vfr;[𝔙]. "\x02r;\x04𝔙", // Vopf;[𝕍]. "\x03pf;\x04𝕍", // Vscr;[𝒱]. "\x03cr;\x04𝒱", // Vvdash;[⊪]. "\x05dash;\x03⊪", // Wcirc;[Ŵ]. "\x04irc;\x02Ŵ", // Wedge;[⋀]. "\x04dge;\x03⋀", // Wfr;[𝔚]. "\x02r;\x04𝔚", // Wopf;[𝕎]. "\x03pf;\x04𝕎", // Wscr;[𝒲]. "\x03cr;\x04𝒲", // Xfr;[𝔛]. "\x02r;\x04𝔛", // Xi;[Ξ]. "\x01;\x02Ξ", // Xopf;[𝕏]. "\x03pf;\x04𝕏", // Xscr;[𝒳]. "\x03cr;\x04𝒳", // YAcy;[Я]. "\x03cy;\x02Я", // YIcy;[Ї]. "\x03cy;\x02Ї", // YUcy;[Ю]. "\x03cy;\x02Ю", // Yacute;[Ý] Yacute[Ý]. "\x05cute;\x02Ý\x04cute\x02Ý", // Ycirc;[Ŷ] Ycy;[Ы]. "\x04irc;\x02Ŷ\x02y;\x02Ы", // Yfr;[𝔜]. "\x02r;\x04𝔜", // Yopf;[𝕐]. "\x03pf;\x04𝕐", // Yscr;[𝒴]. "\x03cr;\x04𝒴", // Yuml;[Ÿ]. "\x03ml;\x02Ÿ", // ZHcy;[Ж]. "\x03cy;\x02Ж", // Zacute;[Ź]. "\x05cute;\x02Ź", // Zcaron;[Ž] Zcy;[З]. "\x05aron;\x02Ž\x02y;\x02З", // Zdot;[Ż]. "\x03ot;\x02Ż", // ZeroWidthSpace;[​] Zeta;[Ζ]. "\x0droWidthSpace;\x03​\x03ta;\x02Ζ", // Zfr;[ℨ]. "\x02r;\x03ℨ", // Zopf;[ℤ]. "\x03pf;\x03ℤ", // Zscr;[𝒵]. "\x03cr;\x04𝒵", // aacute;[á] aacute[á]. "\x05cute;\x02á\x04cute\x02á", // abreve;[ă]. "\x05reve;\x02ă", // acirc;[â] acute;[´] acirc[â] acute[´] acE;[∾̳] acd;[∿] acy;[а] ac;[∾]. "\x04irc;\x02â\x04ute;\x02´\x03irc\x02â\x03ute\x02´\x02E;\x05∾̳\x02d;\x03∿\x02y;\x02а\x01;\x03∾", // aelig;[æ] aelig[æ]. "\x04lig;\x02æ\x03lig\x02æ", // afr;[𝔞] af;[⁡]. "\x02r;\x04𝔞\x01;\x03⁡", // agrave;[à] agrave[à]. "\x05rave;\x02à\x04rave\x02à", // alefsym;[ℵ] aleph;[ℵ] alpha;[α]. "\x06efsym;\x03ℵ\x04eph;\x03ℵ\x04pha;\x02α", // amacr;[ā] amalg;[⨿] amp;[&] amp[&]. "\x04acr;\x02ā\x04alg;\x03⨿\x02p;\x01&\x01p\x01&", // andslope;[⩘] angmsdaa;[⦨] angmsdab;[⦩] angmsdac;[⦪] angmsdad;[⦫] angmsdae;[⦬] angmsdaf;[⦭] angmsdag;[⦮] angmsdah;[⦯] angrtvbd;[⦝] angrtvb;[⊾] angzarr;[⍼] andand;[⩕] angmsd;[∡] angsph;[∢] angle;[∠] angrt;[∟] angst;[Å] andd;[⩜] andv;[⩚] ange;[⦤] and;[∧] ang;[∠]. "\x07dslope;\x03⩘\x07gmsdaa;\x03⦨\x07gmsdab;\x03⦩\x07gmsdac;\x03⦪\x07gmsdad;\x03⦫\x07gmsdae;\x03⦬\x07gmsdaf;\x03⦭\x07gmsdag;\x03⦮\x07gmsdah;\x03⦯\x07grtvbd;\x03⦝\x06grtvb;\x03⊾\x06gzarr;\x03⍼\x05dand;\x03⩕\x05gmsd;\x03∡\x05gsph;\x03∢\x04gle;\x03∠\x04grt;\x03∟\x04gst;\x02Å\x03dd;\x03⩜\x03dv;\x03⩚\x03ge;\x03⦤\x02d;\x03∧\x02g;\x03∠", // aogon;[ą] aopf;[𝕒]. "\x04gon;\x02ą\x03pf;\x04𝕒", // approxeq;[≊] apacir;[⩯] approx;[≈] apid;[≋] apos;['] apE;[⩰] ape;[≊] ap;[≈]. "\x07proxeq;\x03≊\x05acir;\x03⩯\x05prox;\x03≈\x03id;\x03≋\x03os;\x01'\x02E;\x03⩰\x02e;\x03≊\x01;\x03≈", // aring;[å] aring[å]. "\x04ing;\x02å\x03ing\x02å", // asympeq;[≍] asymp;[≈] ascr;[𝒶] ast;[*]. "\x06ympeq;\x03≍\x04ymp;\x03≈\x03cr;\x04𝒶\x02t;\x01*", // atilde;[ã] atilde[ã]. "\x05ilde;\x02ã\x04ilde\x02ã", // auml;[ä] auml[ä]. "\x03ml;\x02ä\x02ml\x02ä", // awconint;[∳] awint;[⨑]. "\x07conint;\x03∳\x04int;\x03⨑", // bNot;[⫭]. "\x03ot;\x03⫭", // backepsilon;[϶] backprime;[‵] backsimeq;[⋍] backcong;[≌] barwedge;[⌅] backsim;[∽] barvee;[⊽] barwed;[⌅]. "\x0ackepsilon;\x02϶\x08ckprime;\x03‵\x08cksimeq;\x03⋍\x07ckcong;\x03≌\x07rwedge;\x03⌅\x06cksim;\x03∽\x05rvee;\x03⊽\x05rwed;\x03⌅", // bbrktbrk;[⎶] bbrk;[⎵]. "\x07rktbrk;\x03⎶\x03rk;\x03⎵", // bcong;[≌] bcy;[б]. "\x04ong;\x03≌\x02y;\x02б", // bdquo;[„]. "\x04quo;\x03„", // because;[∵] bemptyv;[⦰] between;[≬] becaus;[∵] bernou;[ℬ] bepsi;[϶] beta;[β] beth;[ℶ]. "\x06cause;\x03∵\x06mptyv;\x03⦰\x06tween;\x03≬\x05caus;\x03∵\x05rnou;\x03ℬ\x04psi;\x02϶\x03ta;\x02β\x03th;\x03ℶ", // bfr;[𝔟]. "\x02r;\x04𝔟", // bigtriangledown;[▽] bigtriangleup;[△] bigotimes;[⨂] bigoplus;[⨁] bigsqcup;[⨆] biguplus;[⨄] bigwedge;[⋀] bigcirc;[◯] bigodot;[⨀] bigstar;[★] bigcap;[⋂] bigcup;[⋃] bigvee;[⋁]. "\x0egtriangledown;\x03▽\x0cgtriangleup;\x03△\x08gotimes;\x03⨂\x07goplus;\x03⨁\x07gsqcup;\x03⨆\x07guplus;\x03⨄\x07gwedge;\x03⋀\x06gcirc;\x03◯\x06godot;\x03⨀\x06gstar;\x03★\x05gcap;\x03⋂\x05gcup;\x03⋃\x05gvee;\x03⋁", // bkarow;[⤍]. "\x05arow;\x03⤍", // blacktriangleright;[▸] blacktriangledown;[▾] blacktriangleleft;[◂] blacktriangle;[▴] blacklozenge;[⧫] blacksquare;[▪] blank;[␣] blk12;[▒] blk14;[░] blk34;[▓] block;[█]. "\x11acktriangleright;\x03▸\x10acktriangledown;\x03▾\x10acktriangleleft;\x03◂\x0cacktriangle;\x03▴\x0backlozenge;\x03⧫\x0aacksquare;\x03▪\x04ank;\x03␣\x04k12;\x03▒\x04k14;\x03░\x04k34;\x03▓\x04ock;\x03█", // bnequiv;[≡⃥] bnot;[⌐] bne;[=⃥]. "\x06equiv;\x06≡⃥\x03ot;\x03⌐\x02e;\x04=⃥", // boxminus;[⊟] boxtimes;[⊠] boxplus;[⊞] bottom;[⊥] bowtie;[⋈] boxbox;[⧉] boxDL;[╗] boxDR;[╔] boxDl;[╖] boxDr;[╓] boxHD;[╦] boxHU;[╩] boxHd;[╤] boxHu;[╧] boxUL;[╝] boxUR;[╚] boxUl;[╜] boxUr;[╙] boxVH;[╬] boxVL;[╣] boxVR;[╠] boxVh;[╫] boxVl;[╢] boxVr;[╟] boxdL;[╕] boxdR;[╒] boxdl;[┐] boxdr;[┌] boxhD;[╥] boxhU;[╨] boxhd;[┬] boxhu;[┴] boxuL;[╛] boxuR;[╘] boxul;[┘] boxur;[└] boxvH;[╪] boxvL;[╡] boxvR;[╞] boxvh;[┼] boxvl;[┤] boxvr;[├] bopf;[𝕓] boxH;[═] boxV;[║] boxh;[─] boxv;[│] bot;[⊥]. "\x07xminus;\x03⊟\x07xtimes;\x03⊠\x06xplus;\x03⊞\x05ttom;\x03⊥\x05wtie;\x03⋈\x05xbox;\x03⧉\x04xDL;\x03╗\x04xDR;\x03╔\x04xDl;\x03╖\x04xDr;\x03╓\x04xHD;\x03╦\x04xHU;\x03╩\x04xHd;\x03╤\x04xHu;\x03╧\x04xUL;\x03╝\x04xUR;\x03╚\x04xUl;\x03╜\x04xUr;\x03╙\x04xVH;\x03╬\x04xVL;\x03╣\x04xVR;\x03╠\x04xVh;\x03╫\x04xVl;\x03╢\x04xVr;\x03╟\x04xdL;\x03╕\x04xdR;\x03╒\x04xdl;\x03┐\x04xdr;\x03┌\x04xhD;\x03╥\x04xhU;\x03╨\x04xhd;\x03┬\x04xhu;\x03┴\x04xuL;\x03╛\x04xuR;\x03╘\x04xul;\x03┘\x04xur;\x03└\x04xvH;\x03╪\x04xvL;\x03╡\x04xvR;\x03╞\x04xvh;\x03┼\x04xvl;\x03┤\x04xvr;\x03├\x03pf;\x04𝕓\x03xH;\x03═\x03xV;\x03║\x03xh;\x03─\x03xv;\x03│\x02t;\x03⊥", // bprime;[‵]. "\x05rime;\x03‵", // brvbar;[¦] breve;[˘] brvbar[¦]. "\x05vbar;\x02¦\x04eve;\x02˘\x04vbar\x02¦", // bsolhsub;[⟈] bsemi;[⁏] bsime;[⋍] bsolb;[⧅] bscr;[𝒷] bsim;[∽] bsol;[\\]. "\x07olhsub;\x03⟈\x04emi;\x03⁏\x04ime;\x03⋍\x04olb;\x03⧅\x03cr;\x04𝒷\x03im;\x03∽\x03ol;\x01\\", // bullet;[•] bumpeq;[≏] bumpE;[⪮] bumpe;[≏] bull;[•] bump;[≎]. "\x05llet;\x03•\x05mpeq;\x03≏\x04mpE;\x03⪮\x04mpe;\x03≏\x03ll;\x03•\x03mp;\x03≎", // capbrcup;[⩉] cacute;[ć] capand;[⩄] capcap;[⩋] capcup;[⩇] capdot;[⩀] caret;[⁁] caron;[ˇ] caps;[∩︀] cap;[∩]. "\x07pbrcup;\x03⩉\x05cute;\x02ć\x05pand;\x03⩄\x05pcap;\x03⩋\x05pcup;\x03⩇\x05pdot;\x03⩀\x04ret;\x03⁁\x04ron;\x02ˇ\x03ps;\x06∩︀\x02p;\x03∩", // ccupssm;[⩐] ccaron;[č] ccedil;[ç] ccaps;[⩍] ccedil[ç] ccirc;[ĉ] ccups;[⩌]. "\x06upssm;\x03⩐\x05aron;\x02č\x05edil;\x02ç\x04aps;\x03⩍\x04edil\x02ç\x04irc;\x02ĉ\x04ups;\x03⩌", // cdot;[ċ]. "\x03ot;\x02ċ", // centerdot;[·] cemptyv;[⦲] cedil;[¸] cedil[¸] cent;[¢] cent[¢]. "\x08nterdot;\x02·\x06mptyv;\x03⦲\x04dil;\x02¸\x03dil\x02¸\x03nt;\x02¢\x02nt\x02¢", // cfr;[𝔠]. "\x02r;\x04𝔠", // checkmark;[✓] check;[✓] chcy;[ч] chi;[χ]. "\x08eckmark;\x03✓\x04eck;\x03✓\x03cy;\x02ч\x02i;\x02χ", // circlearrowright;[↻] circlearrowleft;[↺] circledcirc;[⊚] circleddash;[⊝] circledast;[⊛] circledR;[®] circledS;[Ⓢ] cirfnint;[⨐] cirscir;[⧂] circeq;[≗] cirmid;[⫯] cirE;[⧃] circ;[ˆ] cire;[≗] cir;[○]. "\x0frclearrowright;\x03↻\x0erclearrowleft;\x03↺\x0arcledcirc;\x03⊚\x0arcleddash;\x03⊝\x09rcledast;\x03⊛\x07rcledR;\x02®\x07rcledS;\x03Ⓢ\x07rfnint;\x03⨐\x06rscir;\x03⧂\x05rceq;\x03≗\x05rmid;\x03⫯\x03rE;\x03⧃\x03rc;\x02ˆ\x03re;\x03≗\x02r;\x03○", // clubsuit;[♣] clubs;[♣]. "\x07ubsuit;\x03♣\x04ubs;\x03♣", // complement;[∁] complexes;[ℂ] coloneq;[≔] congdot;[⩭] colone;[≔] commat;[@] compfn;[∘] conint;[∮] coprod;[∐] copysr;[℗] colon;[:] comma;[,] comp;[∁] cong;[≅] copf;[𝕔] copy;[©] copy[©]. "\x09mplement;\x03∁\x08mplexes;\x03ℂ\x06loneq;\x03≔\x06ngdot;\x03⩭\x05lone;\x03≔\x05mmat;\x01@\x05mpfn;\x03∘\x05nint;\x03∮\x05prod;\x03∐\x05pysr;\x03℗\x04lon;\x01:\x04mma;\x01,\x03mp;\x03∁\x03ng;\x03≅\x03pf;\x04𝕔\x03py;\x02©\x02py\x02©", // crarr;[↵] cross;[✗]. "\x04arr;\x03↵\x04oss;\x03✗", // csube;[⫑] csupe;[⫒] cscr;[𝒸] csub;[⫏] csup;[⫐]. "\x04ube;\x03⫑\x04upe;\x03⫒\x03cr;\x04𝒸\x03ub;\x03⫏\x03up;\x03⫐", // ctdot;[⋯]. "\x04dot;\x03⋯", // curvearrowright;[↷] curvearrowleft;[↶] curlyeqprec;[⋞] curlyeqsucc;[⋟] curlywedge;[⋏] cupbrcap;[⩈] curlyvee;[⋎] cudarrl;[⤸] cudarrr;[⤵] cularrp;[⤽] curarrm;[⤼] cularr;[↶] cupcap;[⩆] cupcup;[⩊] cupdot;[⊍] curarr;[↷] curren;[¤] cuepr;[⋞] cuesc;[⋟] cupor;[⩅] curren[¤] cuvee;[⋎] cuwed;[⋏] cups;[∪︀] cup;[∪]. "\x0ervearrowright;\x03↷\x0drvearrowleft;\x03↶\x0arlyeqprec;\x03⋞\x0arlyeqsucc;\x03⋟\x09rlywedge;\x03⋏\x07pbrcap;\x03⩈\x07rlyvee;\x03⋎\x06darrl;\x03⤸\x06darrr;\x03⤵\x06larrp;\x03⤽\x06rarrm;\x03⤼\x05larr;\x03↶\x05pcap;\x03⩆\x05pcup;\x03⩊\x05pdot;\x03⊍\x05rarr;\x03↷\x05rren;\x02¤\x04epr;\x03⋞\x04esc;\x03⋟\x04por;\x03⩅\x04rren\x02¤\x04vee;\x03⋎\x04wed;\x03⋏\x03ps;\x06∪︀\x02p;\x03∪", // cwconint;[∲] cwint;[∱]. "\x07conint;\x03∲\x04int;\x03∱", // cylcty;[⌭]. "\x05lcty;\x03⌭", // dArr;[⇓]. "\x03rr;\x03⇓", // dHar;[⥥]. "\x03ar;\x03⥥", // dagger;[†] daleth;[ℸ] dashv;[⊣] darr;[↓] dash;[‐]. "\x05gger;\x03†\x05leth;\x03ℸ\x04shv;\x03⊣\x03rr;\x03↓\x03sh;\x03‐", // dbkarow;[⤏] dblac;[˝]. "\x06karow;\x03⤏\x04lac;\x02˝", // dcaron;[ď] dcy;[д]. "\x05aron;\x02ď\x02y;\x02д", // ddagger;[‡] ddotseq;[⩷] ddarr;[⇊] dd;[ⅆ]. "\x06agger;\x03‡\x06otseq;\x03⩷\x04arr;\x03⇊\x01;\x03ⅆ", // demptyv;[⦱] delta;[δ] deg;[°] deg[°]. "\x06mptyv;\x03⦱\x04lta;\x02δ\x02g;\x02°\x01g\x02°", // dfisht;[⥿] dfr;[𝔡]. "\x05isht;\x03⥿\x02r;\x04𝔡", // dharl;[⇃] dharr;[⇂]. "\x04arl;\x03⇃\x04arr;\x03⇂", // divideontimes;[⋇] diamondsuit;[♦] diamond;[⋄] digamma;[ϝ] divide;[÷] divonx;[⋇] diams;[♦] disin;[⋲] divide[÷] diam;[⋄] die;[¨] div;[÷]. "\x0cvideontimes;\x03⋇\x0aamondsuit;\x03♦\x06amond;\x03⋄\x06gamma;\x02ϝ\x05vide;\x02÷\x05vonx;\x03⋇\x04ams;\x03♦\x04sin;\x03⋲\x04vide\x02÷\x03am;\x03⋄\x02e;\x02¨\x02v;\x02÷", // djcy;[ђ]. "\x03cy;\x02ђ", // dlcorn;[⌞] dlcrop;[⌍]. "\x05corn;\x03⌞\x05crop;\x03⌍", // downharpoonright;[⇂] downharpoonleft;[⇃] doublebarwedge;[⌆] downdownarrows;[⇊] dotsquare;[⊡] downarrow;[↓] doteqdot;[≑] dotminus;[∸] dotplus;[∔] dollar;[$] doteq;[≐] dopf;[𝕕] dot;[˙]. "\x0fwnharpoonright;\x03⇂\x0ewnharpoonleft;\x03⇃\x0dublebarwedge;\x03⌆\x0dwndownarrows;\x03⇊\x08tsquare;\x03⊡\x08wnarrow;\x03↓\x07teqdot;\x03≑\x07tminus;\x03∸\x06tplus;\x03∔\x05llar;\x01$\x04teq;\x03≐\x03pf;\x04𝕕\x02t;\x02˙", // drbkarow;[⤐] drcorn;[⌟] drcrop;[⌌]. "\x07bkarow;\x03⤐\x05corn;\x03⌟\x05crop;\x03⌌", // dstrok;[đ] dscr;[𝒹] dscy;[ѕ] dsol;[⧶]. "\x05trok;\x02đ\x03cr;\x04𝒹\x03cy;\x02ѕ\x03ol;\x03⧶", // dtdot;[⋱] dtrif;[▾] dtri;[▿]. "\x04dot;\x03⋱\x04rif;\x03▾\x03ri;\x03▿", // duarr;[⇵] duhar;[⥯]. "\x04arr;\x03⇵\x04har;\x03⥯", // dwangle;[⦦]. "\x06angle;\x03⦦", // dzigrarr;[⟿] dzcy;[џ]. "\x07igrarr;\x03⟿\x03cy;\x02џ", // eDDot;[⩷] eDot;[≑]. "\x04Dot;\x03⩷\x03ot;\x03≑", // eacute;[é] easter;[⩮] eacute[é]. "\x05cute;\x02é\x05ster;\x03⩮\x04cute\x02é", // ecaron;[ě] ecolon;[≕] ecirc;[ê] ecir;[≖] ecirc[ê] ecy;[э]. "\x05aron;\x02ě\x05olon;\x03≕\x04irc;\x02ê\x03ir;\x03≖\x03irc\x02ê\x02y;\x02э", // edot;[ė]. "\x03ot;\x02ė", // ee;[ⅇ]. "\x01;\x03ⅇ", // efDot;[≒] efr;[𝔢]. "\x04Dot;\x03≒\x02r;\x04𝔢", // egrave;[è] egsdot;[⪘] egrave[è] egs;[⪖] eg;[⪚]. "\x05rave;\x02è\x05sdot;\x03⪘\x04rave\x02è\x02s;\x03⪖\x01;\x03⪚", // elinters;[⏧] elsdot;[⪗] ell;[ℓ] els;[⪕] el;[⪙]. "\x07inters;\x03⏧\x05sdot;\x03⪗\x02l;\x03ℓ\x02s;\x03⪕\x01;\x03⪙", // emptyset;[∅] emptyv;[∅] emsp13;[ ] emsp14;[ ] emacr;[ē] empty;[∅] emsp;[ ]. "\x07ptyset;\x03∅\x05ptyv;\x03∅\x05sp13;\x03 \x05sp14;\x03 \x04acr;\x02ē\x04pty;\x03∅\x03sp;\x03 ", // ensp;[ ] eng;[ŋ]. "\x03sp;\x03 \x02g;\x02ŋ", // eogon;[ę] eopf;[𝕖]. "\x04gon;\x02ę\x03pf;\x04𝕖", // epsilon;[ε] eparsl;[⧣] eplus;[⩱] epsiv;[ϵ] epar;[⋕] epsi;[ε]. "\x06silon;\x02ε\x05arsl;\x03⧣\x04lus;\x03⩱\x04siv;\x02ϵ\x03ar;\x03⋕\x03si;\x02ε", // eqslantless;[⪕] eqslantgtr;[⪖] eqvparsl;[⧥] eqcolon;[≕] equivDD;[⩸] eqcirc;[≖] equals;[=] equest;[≟] eqsim;[≂] equiv;[≡]. "\x0aslantless;\x03⪕\x09slantgtr;\x03⪖\x07vparsl;\x03⧥\x06colon;\x03≕\x06uivDD;\x03⩸\x05circ;\x03≖\x05uals;\x01=\x05uest;\x03≟\x04sim;\x03≂\x04uiv;\x03≡", // erDot;[≓] erarr;[⥱]. "\x04Dot;\x03≓\x04arr;\x03⥱", // esdot;[≐] escr;[ℯ] esim;[≂]. "\x04dot;\x03≐\x03cr;\x03ℯ\x03im;\x03≂", // eta;[η] eth;[ð] eth[ð]. "\x02a;\x02η\x02h;\x02ð\x01h\x02ð", // euml;[ë] euro;[€] euml[ë]. "\x03ml;\x02ë\x03ro;\x03€\x02ml\x02ë", // exponentiale;[ⅇ] expectation;[ℰ] exist;[∃] excl;[!]. "\x0bponentiale;\x03ⅇ\x0apectation;\x03ℰ\x04ist;\x03∃\x03cl;\x01!", // fallingdotseq;[≒]. "\x0cllingdotseq;\x03≒", // fcy;[ф]. "\x02y;\x02ф", // female;[♀]. "\x05male;\x03♀", // ffilig;[ffi] ffllig;[ffl] fflig;[ff] ffr;[𝔣]. "\x05ilig;\x03ffi\x05llig;\x03ffl\x04lig;\x03ff\x02r;\x04𝔣", // filig;[fi]. "\x04lig;\x03fi", // fjlig;[fj]. "\x04lig;\x02fj", // fllig;[fl] fltns;[▱] flat;[♭]. "\x04lig;\x03fl\x04tns;\x03▱\x03at;\x03♭", // fnof;[ƒ]. "\x03of;\x02ƒ", // forall;[∀] forkv;[⫙] fopf;[𝕗] fork;[⋔]. "\x05rall;\x03∀\x04rkv;\x03⫙\x03pf;\x04𝕗\x03rk;\x03⋔", // fpartint;[⨍]. "\x07artint;\x03⨍", // frac12;[½] frac13;[⅓] frac14;[¼] frac15;[⅕] frac16;[⅙] frac18;[⅛] frac23;[⅔] frac25;[⅖] frac34;[¾] frac35;[⅗] frac38;[⅜] frac45;[⅘] frac56;[⅚] frac58;[⅝] frac78;[⅞] frac12[½] frac14[¼] frac34[¾] frasl;[⁄] frown;[⌢]. "\x05ac12;\x02½\x05ac13;\x03⅓\x05ac14;\x02¼\x05ac15;\x03⅕\x05ac16;\x03⅙\x05ac18;\x03⅛\x05ac23;\x03⅔\x05ac25;\x03⅖\x05ac34;\x02¾\x05ac35;\x03⅗\x05ac38;\x03⅜\x05ac45;\x03⅘\x05ac56;\x03⅚\x05ac58;\x03⅝\x05ac78;\x03⅞\x04ac12\x02½\x04ac14\x02¼\x04ac34\x02¾\x04asl;\x03⁄\x04own;\x03⌢", // fscr;[𝒻]. "\x03cr;\x04𝒻", // gEl;[⪌] gE;[≧]. "\x02l;\x03⪌\x01;\x03≧", // gacute;[ǵ] gammad;[ϝ] gamma;[γ] gap;[⪆]. "\x05cute;\x02ǵ\x05mmad;\x02ϝ\x04mma;\x02γ\x02p;\x03⪆", // gbreve;[ğ]. "\x05reve;\x02ğ", // gcirc;[ĝ] gcy;[г]. "\x04irc;\x02ĝ\x02y;\x02г", // gdot;[ġ]. "\x03ot;\x02ġ", // geqslant;[⩾] gesdotol;[⪄] gesdoto;[⪂] gesdot;[⪀] gesles;[⪔] gescc;[⪩] geqq;[≧] gesl;[⋛︀] gel;[⋛] geq;[≥] ges;[⩾] ge;[≥]. "\x07qslant;\x03⩾\x07sdotol;\x03⪄\x06sdoto;\x03⪂\x05sdot;\x03⪀\x05sles;\x03⪔\x04scc;\x03⪩\x03qq;\x03≧\x03sl;\x06⋛︀\x02l;\x03⋛\x02q;\x03≥\x02s;\x03⩾\x01;\x03≥", // gfr;[𝔤]. "\x02r;\x04𝔤", // ggg;[⋙] gg;[≫]. "\x02g;\x03⋙\x01;\x03≫", // gimel;[ℷ]. "\x04mel;\x03ℷ", // gjcy;[ѓ]. "\x03cy;\x02ѓ", // glE;[⪒] gla;[⪥] glj;[⪤] gl;[≷]. "\x02E;\x03⪒\x02a;\x03⪥\x02j;\x03⪤\x01;\x03≷", // gnapprox;[⪊] gneqq;[≩] gnsim;[⋧] gnap;[⪊] gneq;[⪈] gnE;[≩] gne;[⪈]. "\x07approx;\x03⪊\x04eqq;\x03≩\x04sim;\x03⋧\x03ap;\x03⪊\x03eq;\x03⪈\x02E;\x03≩\x02e;\x03⪈", // gopf;[𝕘]. "\x03pf;\x04𝕘", // grave;[`]. "\x04ave;\x01`", // gsime;[⪎] gsiml;[⪐] gscr;[ℊ] gsim;[≳]. "\x04ime;\x03⪎\x04iml;\x03⪐\x03cr;\x03ℊ\x03im;\x03≳", // gtreqqless;[⪌] gtrapprox;[⪆] gtreqless;[⋛] gtquest;[⩼] gtrless;[≷] gtlPar;[⦕] gtrarr;[⥸] gtrdot;[⋗] gtrsim;[≳] gtcir;[⩺] gtdot;[⋗] gtcc;[⪧] gt;[>]. "\x09reqqless;\x03⪌\x08rapprox;\x03⪆\x08reqless;\x03⋛\x06quest;\x03⩼\x06rless;\x03≷\x05lPar;\x03⦕\x05rarr;\x03⥸\x05rdot;\x03⋗\x05rsim;\x03≳\x04cir;\x03⩺\x04dot;\x03⋗\x03cc;\x03⪧\x01;\x01>", // gvertneqq;[≩︀] gvnE;[≩︀]. "\x08ertneqq;\x06≩︀\x03nE;\x06≩︀", // hArr;[⇔]. "\x03rr;\x03⇔", // harrcir;[⥈] hairsp;[ ] hamilt;[ℋ] hardcy;[ъ] harrw;[↭] half;[½] harr;[↔]. "\x06rrcir;\x03⥈\x05irsp;\x03 \x05milt;\x03ℋ\x05rdcy;\x02ъ\x04rrw;\x03↭\x03lf;\x02½\x03rr;\x03↔", // hbar;[ℏ]. "\x03ar;\x03ℏ", // hcirc;[ĥ]. "\x04irc;\x02ĥ", // heartsuit;[♥] hearts;[♥] hellip;[…] hercon;[⊹]. "\x08artsuit;\x03♥\x05arts;\x03♥\x05llip;\x03…\x05rcon;\x03⊹", // hfr;[𝔥]. "\x02r;\x04𝔥", // hksearow;[⤥] hkswarow;[⤦]. "\x07searow;\x03⤥\x07swarow;\x03⤦", // hookrightarrow;[↪] hookleftarrow;[↩] homtht;[∻] horbar;[―] hoarr;[⇿] hopf;[𝕙]. "\x0dokrightarrow;\x03↪\x0cokleftarrow;\x03↩\x05mtht;\x03∻\x05rbar;\x03―\x04arr;\x03⇿\x03pf;\x04𝕙", // hslash;[ℏ] hstrok;[ħ] hscr;[𝒽]. "\x05lash;\x03ℏ\x05trok;\x02ħ\x03cr;\x04𝒽", // hybull;[⁃] hyphen;[‐]. "\x05bull;\x03⁃\x05phen;\x03‐", // iacute;[í] iacute[í]. "\x05cute;\x02í\x04cute\x02í", // icirc;[î] icirc[î] icy;[и] ic;[⁣]. "\x04irc;\x02î\x03irc\x02î\x02y;\x02и\x01;\x03⁣", // iexcl;[¡] iecy;[е] iexcl[¡]. "\x04xcl;\x02¡\x03cy;\x02е\x03xcl\x02¡", // iff;[⇔] ifr;[𝔦]. "\x02f;\x03⇔\x02r;\x04𝔦", // igrave;[ì] igrave[ì]. "\x05rave;\x02ì\x04rave\x02ì", // iiiint;[⨌] iinfin;[⧜] iiint;[∭] iiota;[℩] ii;[ⅈ]. "\x05iint;\x03⨌\x05nfin;\x03⧜\x04int;\x03∭\x04ota;\x03℩\x01;\x03ⅈ", // ijlig;[ij]. "\x04lig;\x02ij", // imagline;[ℐ] imagpart;[ℑ] imacr;[ī] image;[ℑ] imath;[ı] imped;[Ƶ] imof;[⊷]. "\x07agline;\x03ℐ\x07agpart;\x03ℑ\x04acr;\x02ī\x04age;\x03ℑ\x04ath;\x02ı\x04ped;\x02Ƶ\x03of;\x03⊷", // infintie;[⧝] integers;[ℤ] intercal;[⊺] intlarhk;[⨗] intprod;[⨼] incare;[℅] inodot;[ı] intcal;[⊺] infin;[∞] int;[∫] in;[∈]. "\x07fintie;\x03⧝\x07tegers;\x03ℤ\x07tercal;\x03⊺\x07tlarhk;\x03⨗\x06tprod;\x03⨼\x05care;\x03℅\x05odot;\x02ı\x05tcal;\x03⊺\x04fin;\x03∞\x02t;\x03∫\x01;\x03∈", // iogon;[į] iocy;[ё] iopf;[𝕚] iota;[ι]. "\x04gon;\x02į\x03cy;\x02ё\x03pf;\x04𝕚\x03ta;\x02ι", // iprod;[⨼]. "\x04rod;\x03⨼", // iquest;[¿] iquest[¿]. "\x05uest;\x02¿\x04uest\x02¿", // isindot;[⋵] isinsv;[⋳] isinE;[⋹] isins;[⋴] isinv;[∈] iscr;[𝒾] isin;[∈]. "\x06indot;\x03⋵\x05insv;\x03⋳\x04inE;\x03⋹\x04ins;\x03⋴\x04inv;\x03∈\x03cr;\x04𝒾\x03in;\x03∈", // itilde;[ĩ] it;[⁢]. "\x05ilde;\x02ĩ\x01;\x03⁢", // iukcy;[і] iuml;[ï] iuml[ï]. "\x04kcy;\x02і\x03ml;\x02ï\x02ml\x02ï", // jcirc;[ĵ] jcy;[й]. "\x04irc;\x02ĵ\x02y;\x02й", // jfr;[𝔧]. "\x02r;\x04𝔧", // jmath;[ȷ]. "\x04ath;\x02ȷ", // jopf;[𝕛]. "\x03pf;\x04𝕛", // jsercy;[ј] jscr;[𝒿]. "\x05ercy;\x02ј\x03cr;\x04𝒿", // jukcy;[є]. "\x04kcy;\x02є", // kappav;[ϰ] kappa;[κ]. "\x05ppav;\x02ϰ\x04ppa;\x02κ", // kcedil;[ķ] kcy;[к]. "\x05edil;\x02ķ\x02y;\x02к", // kfr;[𝔨]. "\x02r;\x04𝔨", // kgreen;[ĸ]. "\x05reen;\x02ĸ", // khcy;[х]. "\x03cy;\x02х", // kjcy;[ќ]. "\x03cy;\x02ќ", // kopf;[𝕜]. "\x03pf;\x04𝕜", // kscr;[𝓀]. "\x03cr;\x04𝓀", // lAtail;[⤛] lAarr;[⇚] lArr;[⇐]. "\x05tail;\x03⤛\x04arr;\x03⇚\x03rr;\x03⇐", // lBarr;[⤎]. "\x04arr;\x03⤎", // lEg;[⪋] lE;[≦]. "\x02g;\x03⪋\x01;\x03≦", // lHar;[⥢]. "\x03ar;\x03⥢", // laemptyv;[⦴] larrbfs;[⤟] larrsim;[⥳] lacute;[ĺ] lagran;[ℒ] lambda;[λ] langle;[⟨] larrfs;[⤝] larrhk;[↩] larrlp;[↫] larrpl;[⤹] larrtl;[↢] latail;[⤙] langd;[⦑] laquo;[«] larrb;[⇤] lates;[⪭︀] lang;[⟨] laquo[«] larr;[←] late;[⪭] lap;[⪅] lat;[⪫]. "\x07emptyv;\x03⦴\x06rrbfs;\x03⤟\x06rrsim;\x03⥳\x05cute;\x02ĺ\x05gran;\x03ℒ\x05mbda;\x02λ\x05ngle;\x03⟨\x05rrfs;\x03⤝\x05rrhk;\x03↩\x05rrlp;\x03↫\x05rrpl;\x03⤹\x05rrtl;\x03↢\x05tail;\x03⤙\x04ngd;\x03⦑\x04quo;\x02«\x04rrb;\x03⇤\x04tes;\x06⪭︀\x03ng;\x03⟨\x03quo\x02«\x03rr;\x03←\x03te;\x03⪭\x02p;\x03⪅\x02t;\x03⪫", // lbrksld;[⦏] lbrkslu;[⦍] lbrace;[{] lbrack;[[] lbarr;[⤌] lbbrk;[❲] lbrke;[⦋]. "\x06rksld;\x03⦏\x06rkslu;\x03⦍\x05race;\x01{\x05rack;\x01[\x04arr;\x03⤌\x04brk;\x03❲\x04rke;\x03⦋", // lcaron;[ľ] lcedil;[ļ] lceil;[⌈] lcub;[{] lcy;[л]. "\x05aron;\x02ľ\x05edil;\x02ļ\x04eil;\x03⌈\x03ub;\x01{\x02y;\x02л", // ldrushar;[⥋] ldrdhar;[⥧] ldquor;[„] ldquo;[“] ldca;[⤶] ldsh;[↲]. "\x07rushar;\x03⥋\x06rdhar;\x03⥧\x05quor;\x03„\x04quo;\x03“\x03ca;\x03⤶\x03sh;\x03↲", // leftrightsquigarrow;[↭] leftrightharpoons;[⇋] leftharpoondown;[↽] leftrightarrows;[⇆] leftleftarrows;[⇇] leftrightarrow;[↔] leftthreetimes;[⋋] leftarrowtail;[↢] leftharpoonup;[↼] lessapprox;[⪅] lesseqqgtr;[⪋] leftarrow;[←] lesseqgtr;[⋚] leqslant;[⩽] lesdotor;[⪃] lesdoto;[⪁] lessdot;[⋖] lessgtr;[≶] lesssim;[≲] lesdot;[⩿] lesges;[⪓] lescc;[⪨] leqq;[≦] lesg;[⋚︀] leg;[⋚] leq;[≤] les;[⩽] le;[≤]. "\x12ftrightsquigarrow;\x03↭\x10ftrightharpoons;\x03⇋\x0eftharpoondown;\x03↽\x0eftrightarrows;\x03⇆\x0dftleftarrows;\x03⇇\x0dftrightarrow;\x03↔\x0dftthreetimes;\x03⋋\x0cftarrowtail;\x03↢\x0cftharpoonup;\x03↼\x09ssapprox;\x03⪅\x09sseqqgtr;\x03⪋\x08ftarrow;\x03←\x08sseqgtr;\x03⋚\x07qslant;\x03⩽\x07sdotor;\x03⪃\x06sdoto;\x03⪁\x06ssdot;\x03⋖\x06ssgtr;\x03≶\x06sssim;\x03≲\x05sdot;\x03⩿\x05sges;\x03⪓\x04scc;\x03⪨\x03qq;\x03≦\x03sg;\x06⋚︀\x02g;\x03⋚\x02q;\x03≤\x02s;\x03⩽\x01;\x03≤", // lfisht;[⥼] lfloor;[⌊] lfr;[𝔩]. "\x05isht;\x03⥼\x05loor;\x03⌊\x02r;\x04𝔩", // lgE;[⪑] lg;[≶]. "\x02E;\x03⪑\x01;\x03≶", // lharul;[⥪] lhard;[↽] lharu;[↼] lhblk;[▄]. "\x05arul;\x03⥪\x04ard;\x03↽\x04aru;\x03↼\x04blk;\x03▄", // ljcy;[љ]. "\x03cy;\x02љ", // llcorner;[⌞] llhard;[⥫] llarr;[⇇] lltri;[◺] ll;[≪]. "\x07corner;\x03⌞\x05hard;\x03⥫\x04arr;\x03⇇\x04tri;\x03◺\x01;\x03≪", // lmoustache;[⎰] lmidot;[ŀ] lmoust;[⎰]. "\x09oustache;\x03⎰\x05idot;\x02ŀ\x05oust;\x03⎰", // lnapprox;[⪉] lneqq;[≨] lnsim;[⋦] lnap;[⪉] lneq;[⪇] lnE;[≨] lne;[⪇]. "\x07approx;\x03⪉\x04eqq;\x03≨\x04sim;\x03⋦\x03ap;\x03⪉\x03eq;\x03⪇\x02E;\x03≨\x02e;\x03⪇", // longleftrightarrow;[⟷] longrightarrow;[⟶] looparrowright;[↬] longleftarrow;[⟵] looparrowleft;[↫] longmapsto;[⟼] lotimes;[⨴] lozenge;[◊] loplus;[⨭] lowast;[∗] lowbar;[_] loang;[⟬] loarr;[⇽] lobrk;[⟦] lopar;[⦅] lopf;[𝕝] lozf;[⧫] loz;[◊]. "\x11ngleftrightarrow;\x03⟷\x0dngrightarrow;\x03⟶\x0doparrowright;\x03↬\x0cngleftarrow;\x03⟵\x0coparrowleft;\x03↫\x09ngmapsto;\x03⟼\x06times;\x03⨴\x06zenge;\x03◊\x05plus;\x03⨭\x05wast;\x03∗\x05wbar;\x01_\x04ang;\x03⟬\x04arr;\x03⇽\x04brk;\x03⟦\x04par;\x03⦅\x03pf;\x04𝕝\x03zf;\x03⧫\x02z;\x03◊", // lparlt;[⦓] lpar;[(]. "\x05arlt;\x03⦓\x03ar;\x01(", // lrcorner;[⌟] lrhard;[⥭] lrarr;[⇆] lrhar;[⇋] lrtri;[⊿] lrm;[‎]. "\x07corner;\x03⌟\x05hard;\x03⥭\x04arr;\x03⇆\x04har;\x03⇋\x04tri;\x03⊿\x02m;\x03‎", // lsaquo;[‹] lsquor;[‚] lstrok;[ł] lsime;[⪍] lsimg;[⪏] lsquo;[‘] lscr;[𝓁] lsim;[≲] lsqb;[[] lsh;[↰]. "\x05aquo;\x03‹\x05quor;\x03‚\x05trok;\x02ł\x04ime;\x03⪍\x04img;\x03⪏\x04quo;\x03‘\x03cr;\x04𝓁\x03im;\x03≲\x03qb;\x01[\x02h;\x03↰", // ltquest;[⩻] lthree;[⋋] ltimes;[⋉] ltlarr;[⥶] ltrPar;[⦖] ltcir;[⩹] ltdot;[⋖] ltrie;[⊴] ltrif;[◂] ltcc;[⪦] ltri;[◃] lt;[<]. "\x06quest;\x03⩻\x05hree;\x03⋋\x05imes;\x03⋉\x05larr;\x03⥶\x05rPar;\x03⦖\x04cir;\x03⩹\x04dot;\x03⋖\x04rie;\x03⊴\x04rif;\x03◂\x03cc;\x03⪦\x03ri;\x03◃\x01;\x01<", // lurdshar;[⥊] luruhar;[⥦]. "\x07rdshar;\x03⥊\x06ruhar;\x03⥦", // lvertneqq;[≨︀] lvnE;[≨︀]. "\x08ertneqq;\x06≨︀\x03nE;\x06≨︀", // mDDot;[∺]. "\x04Dot;\x03∺", // mapstodown;[↧] mapstoleft;[↤] mapstoup;[↥] maltese;[✠] mapsto;[↦] marker;[▮] macr;[¯] male;[♂] malt;[✠] macr[¯] map;[↦]. "\x09pstodown;\x03↧\x09pstoleft;\x03↤\x07pstoup;\x03↥\x06ltese;\x03✠\x05psto;\x03↦\x05rker;\x03▮\x03cr;\x02¯\x03le;\x03♂\x03lt;\x03✠\x02cr\x02¯\x02p;\x03↦", // mcomma;[⨩] mcy;[м]. "\x05omma;\x03⨩\x02y;\x02м", // mdash;[—]. "\x04ash;\x03—", // measuredangle;[∡]. "\x0casuredangle;\x03∡", // mfr;[𝔪]. "\x02r;\x04𝔪", // mho;[℧]. "\x02o;\x03℧", // minusdu;[⨪] midast;[*] midcir;[⫰] middot;[·] minusb;[⊟] minusd;[∸] micro;[µ] middot[·] minus;[−] micro[µ] mid;[∣]. "\x06nusdu;\x03⨪\x05dast;\x01*\x05dcir;\x03⫰\x05ddot;\x02·\x05nusb;\x03⊟\x05nusd;\x03∸\x04cro;\x02µ\x04ddot\x02·\x04nus;\x03−\x03cro\x02µ\x02d;\x03∣", // mlcp;[⫛] mldr;[…]. "\x03cp;\x03⫛\x03dr;\x03…", // mnplus;[∓]. "\x05plus;\x03∓", // models;[⊧] mopf;[𝕞]. "\x05dels;\x03⊧\x03pf;\x04𝕞", // mp;[∓]. "\x01;\x03∓", // mstpos;[∾] mscr;[𝓂]. "\x05tpos;\x03∾\x03cr;\x04𝓂", // multimap;[⊸] mumap;[⊸] mu;[μ]. "\x07ltimap;\x03⊸\x04map;\x03⊸\x01;\x02μ", // nGtv;[≫̸] nGg;[⋙̸] nGt;[≫⃒]. "\x03tv;\x05≫̸\x02g;\x05⋙̸\x02t;\x06≫⃒", // nLeftrightarrow;[⇎] nLeftarrow;[⇍] nLtv;[≪̸] nLl;[⋘̸] nLt;[≪⃒]. "\x0eeftrightarrow;\x03⇎\x09eftarrow;\x03⇍\x03tv;\x05≪̸\x02l;\x05⋘̸\x02t;\x06≪⃒", // nRightarrow;[⇏]. "\x0aightarrow;\x03⇏", // nVDash;[⊯] nVdash;[⊮]. "\x05Dash;\x03⊯\x05dash;\x03⊮", // naturals;[ℕ] napprox;[≉] natural;[♮] nacute;[ń] nabla;[∇] napid;[≋̸] napos;[ʼn] natur;[♮] nang;[∠⃒] napE;[⩰̸] nap;[≉]. "\x07turals;\x03ℕ\x06pprox;\x03≉\x06tural;\x03♮\x05cute;\x02ń\x04bla;\x03∇\x04pid;\x05≋̸\x04pos;\x02ʼn\x04tur;\x03♮\x03ng;\x06∠⃒\x03pE;\x05⩰̸\x02p;\x03≉", // nbumpe;[≏̸] nbump;[≎̸] nbsp;[ ] nbsp[ ]. "\x05umpe;\x05≏̸\x04ump;\x05≎̸\x03sp;\x02 \x02sp\x02 ", // ncongdot;[⩭̸] ncaron;[ň] ncedil;[ņ] ncong;[≇] ncap;[⩃] ncup;[⩂] ncy;[н]. "\x07ongdot;\x05⩭̸\x05aron;\x02ň\x05edil;\x02ņ\x04ong;\x03≇\x03ap;\x03⩃\x03up;\x03⩂\x02y;\x02н", // ndash;[–]. "\x04ash;\x03–", // nearrow;[↗] nexists;[∄] nearhk;[⤤] nequiv;[≢] nesear;[⤨] nexist;[∄] neArr;[⇗] nearr;[↗] nedot;[≐̸] nesim;[≂̸] ne;[≠]. "\x06arrow;\x03↗\x06xists;\x03∄\x05arhk;\x03⤤\x05quiv;\x03≢\x05sear;\x03⤨\x05xist;\x03∄\x04Arr;\x03⇗\x04arr;\x03↗\x04dot;\x05≐̸\x04sim;\x05≂̸\x01;\x03≠", // nfr;[𝔫]. "\x02r;\x04𝔫", // ngeqslant;[⩾̸] ngeqq;[≧̸] ngsim;[≵] ngeq;[≱] nges;[⩾̸] ngtr;[≯] ngE;[≧̸] nge;[≱] ngt;[≯]. "\x08eqslant;\x05⩾̸\x04eqq;\x05≧̸\x04sim;\x03≵\x03eq;\x03≱\x03es;\x05⩾̸\x03tr;\x03≯\x02E;\x05≧̸\x02e;\x03≱\x02t;\x03≯", // nhArr;[⇎] nharr;[↮] nhpar;[⫲]. "\x04Arr;\x03⇎\x04arr;\x03↮\x04par;\x03⫲", // nisd;[⋺] nis;[⋼] niv;[∋] ni;[∋]. "\x03sd;\x03⋺\x02s;\x03⋼\x02v;\x03∋\x01;\x03∋", // njcy;[њ]. "\x03cy;\x02њ", // nleftrightarrow;[↮] nleftarrow;[↚] nleqslant;[⩽̸] nltrie;[⋬] nlArr;[⇍] nlarr;[↚] nleqq;[≦̸] nless;[≮] nlsim;[≴] nltri;[⋪] nldr;[‥] nleq;[≰] nles;[⩽̸] nlE;[≦̸] nle;[≰] nlt;[≮]. "\x0eeftrightarrow;\x03↮\x09eftarrow;\x03↚\x08eqslant;\x05⩽̸\x05trie;\x03⋬\x04Arr;\x03⇍\x04arr;\x03↚\x04eqq;\x05≦̸\x04ess;\x03≮\x04sim;\x03≴\x04tri;\x03⋪\x03dr;\x03‥\x03eq;\x03≰\x03es;\x05⩽̸\x02E;\x05≦̸\x02e;\x03≰\x02t;\x03≮", // nmid;[∤]. "\x03id;\x03∤", // notindot;[⋵̸] notinva;[∉] notinvb;[⋷] notinvc;[⋶] notniva;[∌] notnivb;[⋾] notnivc;[⋽] notinE;[⋹̸] notin;[∉] notni;[∌] nopf;[𝕟] not;[¬] not[¬]. "\x07tindot;\x05⋵̸\x06tinva;\x03∉\x06tinvb;\x03⋷\x06tinvc;\x03⋶\x06tniva;\x03∌\x06tnivb;\x03⋾\x06tnivc;\x03⋽\x05tinE;\x05⋹̸\x04tin;\x03∉\x04tni;\x03∌\x03pf;\x04𝕟\x02t;\x02¬\x01t\x02¬", // nparallel;[∦] npolint;[⨔] npreceq;[⪯̸] nparsl;[⫽⃥] nprcue;[⋠] npart;[∂̸] nprec;[⊀] npar;[∦] npre;[⪯̸] npr;[⊀]. "\x08arallel;\x03∦\x06olint;\x03⨔\x06receq;\x05⪯̸\x05arsl;\x06⫽⃥\x05rcue;\x03⋠\x04art;\x05∂̸\x04rec;\x03⊀\x03ar;\x03∦\x03re;\x05⪯̸\x02r;\x03⊀", // nrightarrow;[↛] nrarrc;[⤳̸] nrarrw;[↝̸] nrtrie;[⋭] nrArr;[⇏] nrarr;[↛] nrtri;[⋫]. "\x0aightarrow;\x03↛\x05arrc;\x05⤳̸\x05arrw;\x05↝̸\x05trie;\x03⋭\x04Arr;\x03⇏\x04arr;\x03↛\x04tri;\x03⋫", // nshortparallel;[∦] nsubseteqq;[⫅̸] nsupseteqq;[⫆̸] nshortmid;[∤] nsubseteq;[⊈] nsupseteq;[⊉] nsqsube;[⋢] nsqsupe;[⋣] nsubset;[⊂⃒] nsucceq;[⪰̸] nsupset;[⊃⃒] nsccue;[⋡] nsimeq;[≄] nsime;[≄] nsmid;[∤] nspar;[∦] nsubE;[⫅̸] nsube;[⊈] nsucc;[⊁] nsupE;[⫆̸] nsupe;[⊉] nsce;[⪰̸] nscr;[𝓃] nsim;[≁] nsub;[⊄] nsup;[⊅] nsc;[⊁]. "\x0dhortparallel;\x03∦\x09ubseteqq;\x05⫅̸\x09upseteqq;\x05⫆̸\x08hortmid;\x03∤\x08ubseteq;\x03⊈\x08upseteq;\x03⊉\x06qsube;\x03⋢\x06qsupe;\x03⋣\x06ubset;\x06⊂⃒\x06ucceq;\x05⪰̸\x06upset;\x06⊃⃒\x05ccue;\x03⋡\x05imeq;\x03≄\x04ime;\x03≄\x04mid;\x03∤\x04par;\x03∦\x04ubE;\x05⫅̸\x04ube;\x03⊈\x04ucc;\x03⊁\x04upE;\x05⫆̸\x04upe;\x03⊉\x03ce;\x05⪰̸\x03cr;\x04𝓃\x03im;\x03≁\x03ub;\x03⊄\x03up;\x03⊅\x02c;\x03⊁", // ntrianglerighteq;[⋭] ntrianglelefteq;[⋬] ntriangleright;[⋫] ntriangleleft;[⋪] ntilde;[ñ] ntilde[ñ] ntgl;[≹] ntlg;[≸]. "\x0frianglerighteq;\x03⋭\x0erianglelefteq;\x03⋬\x0driangleright;\x03⋫\x0criangleleft;\x03⋪\x05ilde;\x02ñ\x04ilde\x02ñ\x03gl;\x03≹\x03lg;\x03≸", // numero;[№] numsp;[ ] num;[#] nu;[ν]. "\x05mero;\x03№\x04msp;\x03 \x02m;\x01#\x01;\x02ν", // nvinfin;[⧞] nvltrie;[⊴⃒] nvrtrie;[⊵⃒] nvDash;[⊭] nvHarr;[⤄] nvdash;[⊬] nvlArr;[⤂] nvrArr;[⤃] nvsim;[∼⃒] nvap;[≍⃒] nvge;[≥⃒] nvgt;[>⃒] nvle;[≤⃒] nvlt;[<⃒]. "\x06infin;\x03⧞\x06ltrie;\x06⊴⃒\x06rtrie;\x06⊵⃒\x05Dash;\x03⊭\x05Harr;\x03⤄\x05dash;\x03⊬\x05lArr;\x03⤂\x05rArr;\x03⤃\x04sim;\x06∼⃒\x03ap;\x06≍⃒\x03ge;\x06≥⃒\x03gt;\x04>⃒\x03le;\x06≤⃒\x03lt;\x04<⃒", // nwarrow;[↖] nwarhk;[⤣] nwnear;[⤧] nwArr;[⇖] nwarr;[↖]. "\x06arrow;\x03↖\x05arhk;\x03⤣\x05near;\x03⤧\x04Arr;\x03⇖\x04arr;\x03↖", // oS;[Ⓢ]. "\x01;\x03Ⓢ", // oacute;[ó] oacute[ó] oast;[⊛]. "\x05cute;\x02ó\x04cute\x02ó\x03st;\x03⊛", // ocirc;[ô] ocir;[⊚] ocirc[ô] ocy;[о]. "\x04irc;\x02ô\x03ir;\x03⊚\x03irc\x02ô\x02y;\x02о", // odblac;[ő] odsold;[⦼] odash;[⊝] odiv;[⨸] odot;[⊙]. "\x05blac;\x02ő\x05sold;\x03⦼\x04ash;\x03⊝\x03iv;\x03⨸\x03ot;\x03⊙", // oelig;[œ]. "\x04lig;\x02œ", // ofcir;[⦿] ofr;[𝔬]. "\x04cir;\x03⦿\x02r;\x04𝔬", // ograve;[ò] ograve[ò] ogon;[˛] ogt;[⧁]. "\x05rave;\x02ò\x04rave\x02ò\x03on;\x02˛\x02t;\x03⧁", // ohbar;[⦵] ohm;[Ω]. "\x04bar;\x03⦵\x02m;\x02Ω", // oint;[∮]. "\x03nt;\x03∮", // olcross;[⦻] olarr;[↺] olcir;[⦾] oline;[‾] olt;[⧀]. "\x06cross;\x03⦻\x04arr;\x03↺\x04cir;\x03⦾\x04ine;\x03‾\x02t;\x03⧀", // omicron;[ο] ominus;[⊖] omacr;[ō] omega;[ω] omid;[⦶]. "\x06icron;\x02ο\x05inus;\x03⊖\x04acr;\x02ō\x04ega;\x02ω\x03id;\x03⦶", // oopf;[𝕠]. "\x03pf;\x04𝕠", // operp;[⦹] oplus;[⊕] opar;[⦷]. "\x04erp;\x03⦹\x04lus;\x03⊕\x03ar;\x03⦷", // orderof;[ℴ] orslope;[⩗] origof;[⊶] orarr;[↻] order;[ℴ] ordf;[ª] ordm;[º] oror;[⩖] ord;[⩝] ordf[ª] ordm[º] orv;[⩛] or;[∨]. "\x06derof;\x03ℴ\x06slope;\x03⩗\x05igof;\x03⊶\x04arr;\x03↻\x04der;\x03ℴ\x03df;\x02ª\x03dm;\x02º\x03or;\x03⩖\x02d;\x03⩝\x02df\x02ª\x02dm\x02º\x02v;\x03⩛\x01;\x03∨", // oslash;[ø] oslash[ø] oscr;[ℴ] osol;[⊘]. "\x05lash;\x02ø\x04lash\x02ø\x03cr;\x03ℴ\x03ol;\x03⊘", // otimesas;[⨶] otilde;[õ] otimes;[⊗] otilde[õ]. "\x07imesas;\x03⨶\x05ilde;\x02õ\x05imes;\x03⊗\x04ilde\x02õ", // ouml;[ö] ouml[ö]. "\x03ml;\x02ö\x02ml\x02ö", // ovbar;[⌽]. "\x04bar;\x03⌽", // parallel;[∥] parsim;[⫳] parsl;[⫽] para;[¶] part;[∂] par;[∥] para[¶]. "\x07rallel;\x03∥\x05rsim;\x03⫳\x04rsl;\x03⫽\x03ra;\x02¶\x03rt;\x03∂\x02r;\x03∥\x02ra\x02¶", // pcy;[п]. "\x02y;\x02п", // pertenk;[‱] percnt;[%] period;[.] permil;[‰] perp;[⊥]. "\x06rtenk;\x03‱\x05rcnt;\x01%\x05riod;\x01.\x05rmil;\x03‰\x03rp;\x03⊥", // pfr;[𝔭]. "\x02r;\x04𝔭", // phmmat;[ℳ] phone;[☎] phiv;[ϕ] phi;[φ]. "\x05mmat;\x03ℳ\x04one;\x03☎\x03iv;\x02ϕ\x02i;\x02φ", // pitchfork;[⋔] piv;[ϖ] pi;[π]. "\x08tchfork;\x03⋔\x02v;\x02ϖ\x01;\x02π", // plusacir;[⨣] planckh;[ℎ] pluscir;[⨢] plussim;[⨦] plustwo;[⨧] planck;[ℏ] plankv;[ℏ] plusdo;[∔] plusdu;[⨥] plusmn;[±] plusb;[⊞] pluse;[⩲] plusmn[±] plus;[+]. "\x07usacir;\x03⨣\x06anckh;\x03ℎ\x06uscir;\x03⨢\x06ussim;\x03⨦\x06ustwo;\x03⨧\x05anck;\x03ℏ\x05ankv;\x03ℏ\x05usdo;\x03∔\x05usdu;\x03⨥\x05usmn;\x02±\x04usb;\x03⊞\x04use;\x03⩲\x04usmn\x02±\x03us;\x01+", // pm;[±]. "\x01;\x02±", // pointint;[⨕] pound;[£] popf;[𝕡] pound[£]. "\x07intint;\x03⨕\x04und;\x02£\x03pf;\x04𝕡\x03und\x02£", // preccurlyeq;[≼] precnapprox;[⪹] precapprox;[⪷] precneqq;[⪵] precnsim;[⋨] profalar;[⌮] profline;[⌒] profsurf;[⌓] precsim;[≾] preceq;[⪯] primes;[ℙ] prnsim;[⋨] propto;[∝] prurel;[⊰] prcue;[≼] prime;[′] prnap;[⪹] prsim;[≾] prap;[⪷] prec;[≺] prnE;[⪵] prod;[∏] prop;[∝] prE;[⪳] pre;[⪯] pr;[≺]. "\x0aeccurlyeq;\x03≼\x0aecnapprox;\x03⪹\x09ecapprox;\x03⪷\x07ecneqq;\x03⪵\x07ecnsim;\x03⋨\x07ofalar;\x03⌮\x07ofline;\x03⌒\x07ofsurf;\x03⌓\x06ecsim;\x03≾\x05eceq;\x03⪯\x05imes;\x03ℙ\x05nsim;\x03⋨\x05opto;\x03∝\x05urel;\x03⊰\x04cue;\x03≼\x04ime;\x03′\x04nap;\x03⪹\x04sim;\x03≾\x03ap;\x03⪷\x03ec;\x03≺\x03nE;\x03⪵\x03od;\x03∏\x03op;\x03∝\x02E;\x03⪳\x02e;\x03⪯\x01;\x03≺", // pscr;[𝓅] psi;[ψ]. "\x03cr;\x04𝓅\x02i;\x02ψ", // puncsp;[ ]. "\x05ncsp;\x03 ", // qfr;[𝔮]. "\x02r;\x04𝔮", // qint;[⨌]. "\x03nt;\x03⨌", // qopf;[𝕢]. "\x03pf;\x04𝕢", // qprime;[⁗]. "\x05rime;\x03⁗", // qscr;[𝓆]. "\x03cr;\x04𝓆", // quaternions;[ℍ] quatint;[⨖] questeq;[≟] quest;[?] quot;[\"] quot[\"]. "\x0aaternions;\x03ℍ\x06atint;\x03⨖\x06esteq;\x03≟\x04est;\x01?\x03ot;\x01\"\x02ot\x01\"", // rAtail;[⤜] rAarr;[⇛] rArr;[⇒]. "\x05tail;\x03⤜\x04arr;\x03⇛\x03rr;\x03⇒", // rBarr;[⤏]. "\x04arr;\x03⤏", // rHar;[⥤]. "\x03ar;\x03⥤", // rationals;[ℚ] raemptyv;[⦳] rarrbfs;[⤠] rarrsim;[⥴] racute;[ŕ] rangle;[⟩] rarrap;[⥵] rarrfs;[⤞] rarrhk;[↪] rarrlp;[↬] rarrpl;[⥅] rarrtl;[↣] ratail;[⤚] radic;[√] rangd;[⦒] range;[⦥] raquo;[»] rarrb;[⇥] rarrc;[⤳] rarrw;[↝] ratio;[∶] race;[∽̱] rang;[⟩] raquo[»] rarr;[→]. "\x08tionals;\x03ℚ\x07emptyv;\x03⦳\x06rrbfs;\x03⤠\x06rrsim;\x03⥴\x05cute;\x02ŕ\x05ngle;\x03⟩\x05rrap;\x03⥵\x05rrfs;\x03⤞\x05rrhk;\x03↪\x05rrlp;\x03↬\x05rrpl;\x03⥅\x05rrtl;\x03↣\x05tail;\x03⤚\x04dic;\x03√\x04ngd;\x03⦒\x04nge;\x03⦥\x04quo;\x02»\x04rrb;\x03⇥\x04rrc;\x03⤳\x04rrw;\x03↝\x04tio;\x03∶\x03ce;\x05∽̱\x03ng;\x03⟩\x03quo\x02»\x03rr;\x03→", // rbrksld;[⦎] rbrkslu;[⦐] rbrace;[}] rbrack;[]] rbarr;[⤍] rbbrk;[❳] rbrke;[⦌]. "\x06rksld;\x03⦎\x06rkslu;\x03⦐\x05race;\x01}\x05rack;\x01]\x04arr;\x03⤍\x04brk;\x03❳\x04rke;\x03⦌", // rcaron;[ř] rcedil;[ŗ] rceil;[⌉] rcub;[}] rcy;[р]. "\x05aron;\x02ř\x05edil;\x02ŗ\x04eil;\x03⌉\x03ub;\x01}\x02y;\x02р", // rdldhar;[⥩] rdquor;[”] rdquo;[”] rdca;[⤷] rdsh;[↳]. "\x06ldhar;\x03⥩\x05quor;\x03”\x04quo;\x03”\x03ca;\x03⤷\x03sh;\x03↳", // realpart;[ℜ] realine;[ℛ] reals;[ℝ] real;[ℜ] rect;[▭] reg;[®] reg[®]. "\x07alpart;\x03ℜ\x06aline;\x03ℛ\x04als;\x03ℝ\x03al;\x03ℜ\x03ct;\x03▭\x02g;\x02®\x01g\x02®", // rfisht;[⥽] rfloor;[⌋] rfr;[𝔯]. "\x05isht;\x03⥽\x05loor;\x03⌋\x02r;\x04𝔯", // rharul;[⥬] rhard;[⇁] rharu;[⇀] rhov;[ϱ] rho;[ρ]. "\x05arul;\x03⥬\x04ard;\x03⇁\x04aru;\x03⇀\x03ov;\x02ϱ\x02o;\x02ρ", // rightleftharpoons;[⇌] rightharpoondown;[⇁] rightrightarrows;[⇉] rightleftarrows;[⇄] rightsquigarrow;[↝] rightthreetimes;[⋌] rightarrowtail;[↣] rightharpoonup;[⇀] risingdotseq;[≓] rightarrow;[→] ring;[˚]. "\x10ghtleftharpoons;\x03⇌\x0fghtharpoondown;\x03⇁\x0fghtrightarrows;\x03⇉\x0eghtleftarrows;\x03⇄\x0eghtsquigarrow;\x03↝\x0eghtthreetimes;\x03⋌\x0dghtarrowtail;\x03↣\x0dghtharpoonup;\x03⇀\x0bsingdotseq;\x03≓\x09ghtarrow;\x03→\x03ng;\x02˚", // rlarr;[⇄] rlhar;[⇌] rlm;[‏]. "\x04arr;\x03⇄\x04har;\x03⇌\x02m;\x03‏", // rmoustache;[⎱] rmoust;[⎱]. "\x09oustache;\x03⎱\x05oust;\x03⎱", // rnmid;[⫮]. "\x04mid;\x03⫮", // rotimes;[⨵] roplus;[⨮] roang;[⟭] roarr;[⇾] robrk;[⟧] ropar;[⦆] ropf;[𝕣]. "\x06times;\x03⨵\x05plus;\x03⨮\x04ang;\x03⟭\x04arr;\x03⇾\x04brk;\x03⟧\x04par;\x03⦆\x03pf;\x04𝕣", // rppolint;[⨒] rpargt;[⦔] rpar;[)]. "\x07polint;\x03⨒\x05argt;\x03⦔\x03ar;\x01)", // rrarr;[⇉]. "\x04arr;\x03⇉", // rsaquo;[›] rsquor;[’] rsquo;[’] rscr;[𝓇] rsqb;[]] rsh;[↱]. "\x05aquo;\x03›\x05quor;\x03’\x04quo;\x03’\x03cr;\x04𝓇\x03qb;\x01]\x02h;\x03↱", // rtriltri;[⧎] rthree;[⋌] rtimes;[⋊] rtrie;[⊵] rtrif;[▸] rtri;[▹]. "\x07riltri;\x03⧎\x05hree;\x03⋌\x05imes;\x03⋊\x04rie;\x03⊵\x04rif;\x03▸\x03ri;\x03▹", // ruluhar;[⥨]. "\x06luhar;\x03⥨", // rx;[℞]. "\x01;\x03℞", // sacute;[ś]. "\x05cute;\x02ś", // sbquo;[‚]. "\x04quo;\x03‚", // scpolint;[⨓] scaron;[š] scedil;[ş] scnsim;[⋩] sccue;[≽] scirc;[ŝ] scnap;[⪺] scsim;[≿] scap;[⪸] scnE;[⪶] scE;[⪴] sce;[⪰] scy;[с] sc;[≻]. "\x07polint;\x03⨓\x05aron;\x02š\x05edil;\x02ş\x05nsim;\x03⋩\x04cue;\x03≽\x04irc;\x02ŝ\x04nap;\x03⪺\x04sim;\x03≿\x03ap;\x03⪸\x03nE;\x03⪶\x02E;\x03⪴\x02e;\x03⪰\x02y;\x02с\x01;\x03≻", // sdotb;[⊡] sdote;[⩦] sdot;[⋅]. "\x04otb;\x03⊡\x04ote;\x03⩦\x03ot;\x03⋅", // setminus;[∖] searrow;[↘] searhk;[⤥] seswar;[⤩] seArr;[⇘] searr;[↘] setmn;[∖] sect;[§] semi;[;] sext;[✶] sect[§]. "\x07tminus;\x03∖\x06arrow;\x03↘\x05arhk;\x03⤥\x05swar;\x03⤩\x04Arr;\x03⇘\x04arr;\x03↘\x04tmn;\x03∖\x03ct;\x02§\x03mi;\x01;\x03xt;\x03✶\x02ct\x02§", // sfrown;[⌢] sfr;[𝔰]. "\x05rown;\x03⌢\x02r;\x04𝔰", // shortparallel;[∥] shortmid;[∣] shchcy;[щ] sharp;[♯] shcy;[ш] shy;[­] shy[­]. "\x0cortparallel;\x03∥\x07ortmid;\x03∣\x05chcy;\x02щ\x04arp;\x03♯\x03cy;\x02ш\x02y;\x02­\x01y\x02­", // simplus;[⨤] simrarr;[⥲] sigmaf;[ς] sigmav;[ς] simdot;[⩪] sigma;[σ] simeq;[≃] simgE;[⪠] simlE;[⪟] simne;[≆] sime;[≃] simg;[⪞] siml;[⪝] sim;[∼]. "\x06mplus;\x03⨤\x06mrarr;\x03⥲\x05gmaf;\x02ς\x05gmav;\x02ς\x05mdot;\x03⩪\x04gma;\x02σ\x04meq;\x03≃\x04mgE;\x03⪠\x04mlE;\x03⪟\x04mne;\x03≆\x03me;\x03≃\x03mg;\x03⪞\x03ml;\x03⪝\x02m;\x03∼", // slarr;[←]. "\x04arr;\x03←", // smallsetminus;[∖] smeparsl;[⧤] smashp;[⨳] smile;[⌣] smtes;[⪬︀] smid;[∣] smte;[⪬] smt;[⪪]. "\x0callsetminus;\x03∖\x07eparsl;\x03⧤\x05ashp;\x03⨳\x04ile;\x03⌣\x04tes;\x06⪬︀\x03id;\x03∣\x03te;\x03⪬\x02t;\x03⪪", // softcy;[ь] solbar;[⌿] solb;[⧄] sopf;[𝕤] sol;[/]. "\x05ftcy;\x02ь\x05lbar;\x03⌿\x03lb;\x03⧄\x03pf;\x04𝕤\x02l;\x01/", // spadesuit;[♠] spades;[♠] spar;[∥]. "\x08adesuit;\x03♠\x05ades;\x03♠\x03ar;\x03∥", // sqsubseteq;[⊑] sqsupseteq;[⊒] sqsubset;[⊏] sqsupset;[⊐] sqcaps;[⊓︀] sqcups;[⊔︀] sqsube;[⊑] sqsupe;[⊒] square;[□] squarf;[▪] sqcap;[⊓] sqcup;[⊔] sqsub;[⊏] sqsup;[⊐] squf;[▪] squ;[□]. "\x09subseteq;\x03⊑\x09supseteq;\x03⊒\x07subset;\x03⊏\x07supset;\x03⊐\x05caps;\x06⊓︀\x05cups;\x06⊔︀\x05sube;\x03⊑\x05supe;\x03⊒\x05uare;\x03□\x05uarf;\x03▪\x04cap;\x03⊓\x04cup;\x03⊔\x04sub;\x03⊏\x04sup;\x03⊐\x03uf;\x03▪\x02u;\x03□", // srarr;[→]. "\x04arr;\x03→", // ssetmn;[∖] ssmile;[⌣] sstarf;[⋆] sscr;[𝓈]. "\x05etmn;\x03∖\x05mile;\x03⌣\x05tarf;\x03⋆\x03cr;\x04𝓈", // straightepsilon;[ϵ] straightphi;[ϕ] starf;[★] strns;[¯] star;[☆]. "\x0eraightepsilon;\x02ϵ\x0araightphi;\x02ϕ\x04arf;\x03★\x04rns;\x02¯\x03ar;\x03☆", // succcurlyeq;[≽] succnapprox;[⪺] subsetneqq;[⫋] succapprox;[⪸] supsetneqq;[⫌] subseteqq;[⫅] subsetneq;[⊊] supseteqq;[⫆] supsetneq;[⊋] subseteq;[⊆] succneqq;[⪶] succnsim;[⋩] supseteq;[⊇] subedot;[⫃] submult;[⫁] subplus;[⪿] subrarr;[⥹] succsim;[≿] supdsub;[⫘] supedot;[⫄] suphsol;[⟉] suphsub;[⫗] suplarr;[⥻] supmult;[⫂] supplus;[⫀] subdot;[⪽] subset;[⊂] subsim;[⫇] subsub;[⫕] subsup;[⫓] succeq;[⪰] supdot;[⪾] supset;[⊃] supsim;[⫈] supsub;[⫔] supsup;[⫖] subnE;[⫋] subne;[⊊] supnE;[⫌] supne;[⊋] subE;[⫅] sube;[⊆] succ;[≻] sung;[♪] sup1;[¹] sup2;[²] sup3;[³] supE;[⫆] supe;[⊇] sub;[⊂] sum;[∑] sup1[¹] sup2[²] sup3[³] sup;[⊃]. "\x0acccurlyeq;\x03≽\x0accnapprox;\x03⪺\x09bsetneqq;\x03⫋\x09ccapprox;\x03⪸\x09psetneqq;\x03⫌\x08bseteqq;\x03⫅\x08bsetneq;\x03⊊\x08pseteqq;\x03⫆\x08psetneq;\x03⊋\x07bseteq;\x03⊆\x07ccneqq;\x03⪶\x07ccnsim;\x03⋩\x07pseteq;\x03⊇\x06bedot;\x03⫃\x06bmult;\x03⫁\x06bplus;\x03⪿\x06brarr;\x03⥹\x06ccsim;\x03≿\x06pdsub;\x03⫘\x06pedot;\x03⫄\x06phsol;\x03⟉\x06phsub;\x03⫗\x06plarr;\x03⥻\x06pmult;\x03⫂\x06pplus;\x03⫀\x05bdot;\x03⪽\x05bset;\x03⊂\x05bsim;\x03⫇\x05bsub;\x03⫕\x05bsup;\x03⫓\x05cceq;\x03⪰\x05pdot;\x03⪾\x05pset;\x03⊃\x05psim;\x03⫈\x05psub;\x03⫔\x05psup;\x03⫖\x04bnE;\x03⫋\x04bne;\x03⊊\x04pnE;\x03⫌\x04pne;\x03⊋\x03bE;\x03⫅\x03be;\x03⊆\x03cc;\x03≻\x03ng;\x03♪\x03p1;\x02¹\x03p2;\x02²\x03p3;\x02³\x03pE;\x03⫆\x03pe;\x03⊇\x02b;\x03⊂\x02m;\x03∑\x02p1\x02¹\x02p2\x02²\x02p3\x02³\x02p;\x03⊃", // swarrow;[↙] swarhk;[⤦] swnwar;[⤪] swArr;[⇙] swarr;[↙]. "\x06arrow;\x03↙\x05arhk;\x03⤦\x05nwar;\x03⤪\x04Arr;\x03⇙\x04arr;\x03↙", // szlig;[ß] szlig[ß]. "\x04lig;\x02ß\x03lig\x02ß", // target;[⌖] tau;[τ]. "\x05rget;\x03⌖\x02u;\x02τ", // tbrk;[⎴]. "\x03rk;\x03⎴", // tcaron;[ť] tcedil;[ţ] tcy;[т]. "\x05aron;\x02ť\x05edil;\x02ţ\x02y;\x02т", // tdot;[⃛]. "\x03ot;\x03⃛", // telrec;[⌕]. "\x05lrec;\x03⌕", // tfr;[𝔱]. "\x02r;\x04𝔱", // thickapprox;[≈] therefore;[∴] thetasym;[ϑ] thicksim;[∼] there4;[∴] thetav;[ϑ] thinsp;[ ] thksim;[∼] theta;[θ] thkap;[≈] thorn;[þ] thorn[þ]. "\x0aickapprox;\x03≈\x08erefore;\x03∴\x07etasym;\x02ϑ\x07icksim;\x03∼\x05ere4;\x03∴\x05etav;\x02ϑ\x05insp;\x03 \x05ksim;\x03∼\x04eta;\x02θ\x04kap;\x03≈\x04orn;\x02þ\x03orn\x02þ", // timesbar;[⨱] timesb;[⊠] timesd;[⨰] tilde;[˜] times;[×] times[×] tint;[∭]. "\x07mesbar;\x03⨱\x05mesb;\x03⊠\x05mesd;\x03⨰\x04lde;\x02˜\x04mes;\x02×\x03mes\x02×\x03nt;\x03∭", // topfork;[⫚] topbot;[⌶] topcir;[⫱] toea;[⤨] topf;[𝕥] tosa;[⤩] top;[⊤]. "\x06pfork;\x03⫚\x05pbot;\x03⌶\x05pcir;\x03⫱\x03ea;\x03⤨\x03pf;\x04𝕥\x03sa;\x03⤩\x02p;\x03⊤", // tprime;[‴]. "\x05rime;\x03‴", // trianglerighteq;[⊵] trianglelefteq;[⊴] triangleright;[▹] triangledown;[▿] triangleleft;[◃] triangleq;[≜] triangle;[▵] triminus;[⨺] trpezium;[⏢] triplus;[⨹] tritime;[⨻] tridot;[◬] trade;[™] trisb;[⧍] trie;[≜]. "\x0eianglerighteq;\x03⊵\x0dianglelefteq;\x03⊴\x0ciangleright;\x03▹\x0biangledown;\x03▿\x0biangleleft;\x03◃\x08iangleq;\x03≜\x07iangle;\x03▵\x07iminus;\x03⨺\x07pezium;\x03⏢\x06iplus;\x03⨹\x06itime;\x03⨻\x05idot;\x03◬\x04ade;\x03™\x04isb;\x03⧍\x03ie;\x03≜", // tstrok;[ŧ] tshcy;[ћ] tscr;[𝓉] tscy;[ц]. "\x05trok;\x02ŧ\x04hcy;\x02ћ\x03cr;\x04𝓉\x03cy;\x02ц", // twoheadrightarrow;[↠] twoheadleftarrow;[↞] twixt;[≬]. "\x10oheadrightarrow;\x03↠\x0foheadleftarrow;\x03↞\x04ixt;\x03≬", // uArr;[⇑]. "\x03rr;\x03⇑", // uHar;[⥣]. "\x03ar;\x03⥣", // uacute;[ú] uacute[ú] uarr;[↑]. "\x05cute;\x02ú\x04cute\x02ú\x03rr;\x03↑", // ubreve;[ŭ] ubrcy;[ў]. "\x05reve;\x02ŭ\x04rcy;\x02ў", // ucirc;[û] ucirc[û] ucy;[у]. "\x04irc;\x02û\x03irc\x02û\x02y;\x02у", // udblac;[ű] udarr;[⇅] udhar;[⥮]. "\x05blac;\x02ű\x04arr;\x03⇅\x04har;\x03⥮", // ufisht;[⥾] ufr;[𝔲]. "\x05isht;\x03⥾\x02r;\x04𝔲", // ugrave;[ù] ugrave[ù]. "\x05rave;\x02ù\x04rave\x02ù", // uharl;[↿] uharr;[↾] uhblk;[▀]. "\x04arl;\x03↿\x04arr;\x03↾\x04blk;\x03▀", // ulcorner;[⌜] ulcorn;[⌜] ulcrop;[⌏] ultri;[◸]. "\x07corner;\x03⌜\x05corn;\x03⌜\x05crop;\x03⌏\x04tri;\x03◸", // umacr;[ū] uml;[¨] uml[¨]. "\x04acr;\x02ū\x02l;\x02¨\x01l\x02¨", // uogon;[ų] uopf;[𝕦]. "\x04gon;\x02ų\x03pf;\x04𝕦", // upharpoonright;[↾] upharpoonleft;[↿] updownarrow;[↕] upuparrows;[⇈] uparrow;[↑] upsilon;[υ] uplus;[⊎] upsih;[ϒ] upsi;[υ]. "\x0dharpoonright;\x03↾\x0charpoonleft;\x03↿\x0adownarrow;\x03↕\x09uparrows;\x03⇈\x06arrow;\x03↑\x06silon;\x02υ\x04lus;\x03⊎\x04sih;\x02ϒ\x03si;\x02υ", // urcorner;[⌝] urcorn;[⌝] urcrop;[⌎] uring;[ů] urtri;[◹]. "\x07corner;\x03⌝\x05corn;\x03⌝\x05crop;\x03⌎\x04ing;\x02ů\x04tri;\x03◹", // uscr;[𝓊]. "\x03cr;\x04𝓊", // utilde;[ũ] utdot;[⋰] utrif;[▴] utri;[▵]. "\x05ilde;\x02ũ\x04dot;\x03⋰\x04rif;\x03▴\x03ri;\x03▵", // uuarr;[⇈] uuml;[ü] uuml[ü]. "\x04arr;\x03⇈\x03ml;\x02ü\x02ml\x02ü", // uwangle;[⦧]. "\x06angle;\x03⦧", // vArr;[⇕]. "\x03rr;\x03⇕", // vBarv;[⫩] vBar;[⫨]. "\x04arv;\x03⫩\x03ar;\x03⫨", // vDash;[⊨]. "\x04ash;\x03⊨", // vartriangleright;[⊳] vartriangleleft;[⊲] varsubsetneqq;[⫋︀] varsupsetneqq;[⫌︀] varsubsetneq;[⊊︀] varsupsetneq;[⊋︀] varepsilon;[ϵ] varnothing;[∅] varpropto;[∝] varkappa;[ϰ] varsigma;[ς] vartheta;[ϑ] vangrt;[⦜] varphi;[ϕ] varrho;[ϱ] varpi;[ϖ] varr;[↕]. "\x0frtriangleright;\x03⊳\x0ertriangleleft;\x03⊲\x0crsubsetneqq;\x06⫋︀\x0crsupsetneqq;\x06⫌︀\x0brsubsetneq;\x06⊊︀\x0brsupsetneq;\x06⊋︀\x09repsilon;\x02ϵ\x09rnothing;\x03∅\x08rpropto;\x03∝\x07rkappa;\x02ϰ\x07rsigma;\x02ς\x07rtheta;\x02ϑ\x05ngrt;\x03⦜\x05rphi;\x02ϕ\x05rrho;\x02ϱ\x04rpi;\x02ϖ\x03rr;\x03↕", // vcy;[в]. "\x02y;\x02в", // vdash;[⊢]. "\x04ash;\x03⊢", // veebar;[⊻] vellip;[⋮] verbar;[|] veeeq;[≚] vert;[|] vee;[∨]. "\x05ebar;\x03⊻\x05llip;\x03⋮\x05rbar;\x01|\x04eeq;\x03≚\x03rt;\x01|\x02e;\x03∨", // vfr;[𝔳]. "\x02r;\x04𝔳", // vltri;[⊲]. "\x04tri;\x03⊲", // vnsub;[⊂⃒] vnsup;[⊃⃒]. "\x04sub;\x06⊂⃒\x04sup;\x06⊃⃒", // vopf;[𝕧]. "\x03pf;\x04𝕧", // vprop;[∝]. "\x04rop;\x03∝", // vrtri;[⊳]. "\x04tri;\x03⊳", // vsubnE;[⫋︀] vsubne;[⊊︀] vsupnE;[⫌︀] vsupne;[⊋︀] vscr;[𝓋]. "\x05ubnE;\x06⫋︀\x05ubne;\x06⊊︀\x05upnE;\x06⫌︀\x05upne;\x06⊋︀\x03cr;\x04𝓋", // vzigzag;[⦚]. "\x06igzag;\x03⦚", // wcirc;[ŵ]. "\x04irc;\x02ŵ", // wedbar;[⩟] wedgeq;[≙] weierp;[℘] wedge;[∧]. "\x05dbar;\x03⩟\x05dgeq;\x03≙\x05ierp;\x03℘\x04dge;\x03∧", // wfr;[𝔴]. "\x02r;\x04𝔴", // wopf;[𝕨]. "\x03pf;\x04𝕨", // wp;[℘]. "\x01;\x03℘", // wreath;[≀] wr;[≀]. "\x05eath;\x03≀\x01;\x03≀", // wscr;[𝓌]. "\x03cr;\x04𝓌", // xcirc;[◯] xcap;[⋂] xcup;[⋃]. "\x04irc;\x03◯\x03ap;\x03⋂\x03up;\x03⋃", // xdtri;[▽]. "\x04tri;\x03▽", // xfr;[𝔵]. "\x02r;\x04𝔵", // xhArr;[⟺] xharr;[⟷]. "\x04Arr;\x03⟺\x04arr;\x03⟷", // xi;[ξ]. "\x01;\x02ξ", // xlArr;[⟸] xlarr;[⟵]. "\x04Arr;\x03⟸\x04arr;\x03⟵", // xmap;[⟼]. "\x03ap;\x03⟼", // xnis;[⋻]. "\x03is;\x03⋻", // xoplus;[⨁] xotime;[⨂] xodot;[⨀] xopf;[𝕩]. "\x05plus;\x03⨁\x05time;\x03⨂\x04dot;\x03⨀\x03pf;\x04𝕩", // xrArr;[⟹] xrarr;[⟶]. "\x04Arr;\x03⟹\x04arr;\x03⟶", // xsqcup;[⨆] xscr;[𝓍]. "\x05qcup;\x03⨆\x03cr;\x04𝓍", // xuplus;[⨄] xutri;[△]. "\x05plus;\x03⨄\x04tri;\x03△", // xvee;[⋁]. "\x03ee;\x03⋁", // xwedge;[⋀]. "\x05edge;\x03⋀", // yacute;[ý] yacute[ý] yacy;[я]. "\x05cute;\x02ý\x04cute\x02ý\x03cy;\x02я", // ycirc;[ŷ] ycy;[ы]. "\x04irc;\x02ŷ\x02y;\x02ы", // yen;[¥] yen[¥]. "\x02n;\x02¥\x01n\x02¥", // yfr;[𝔶]. "\x02r;\x04𝔶", // yicy;[ї]. "\x03cy;\x02ї", // yopf;[𝕪]. "\x03pf;\x04𝕪", // yscr;[𝓎]. "\x03cr;\x04𝓎", // yucy;[ю] yuml;[ÿ] yuml[ÿ]. "\x03cy;\x02ю\x03ml;\x02ÿ\x02ml\x02ÿ", // zacute;[ź]. "\x05cute;\x02ź", // zcaron;[ž] zcy;[з]. "\x05aron;\x02ž\x02y;\x02з", // zdot;[ż]. "\x03ot;\x02ż", // zeetrf;[ℨ] zeta;[ζ]. "\x05etrf;\x03ℨ\x03ta;\x02ζ", // zfr;[𝔷]. "\x02r;\x04𝔷", // zhcy;[ж]. "\x03cy;\x02ж", // zigrarr;[⇝]. "\x06grarr;\x03⇝", // zopf;[𝕫]. "\x03pf;\x04𝕫", // zscr;[𝓏]. "\x03cr;\x04𝓏", // zwnj;[‌] zwj;[‍]. "\x03nj;\x03‌\x02j;\x03‍", ), "small_words" => "GT\x00LT\x00gt\x00lt\x00", "small_mappings" => array( ">", "<", ">", "<", ) ) ); class-wp-html-processor-state.php000064400000026121147333266670013126 0ustar00 */ public $stack_of_template_insertion_modes = array(); /** * Tracks open elements while scanning HTML. * * This property is initialized in the constructor and never null. * * @since 6.4.0 * * @see https://html.spec.whatwg.org/#stack-of-open-elements * * @var WP_HTML_Open_Elements */ public $stack_of_open_elements; /** * Tracks open formatting elements, used to handle mis-nested formatting element tags. * * This property is initialized in the constructor and never null. * * @since 6.4.0 * * @see https://html.spec.whatwg.org/#list-of-active-formatting-elements * * @var WP_HTML_Active_Formatting_Elements */ public $active_formatting_elements; /** * Refers to the currently-matched tag, if any. * * @since 6.4.0 * * @var WP_HTML_Token|null */ public $current_token = null; /** * Tree construction insertion mode. * * @since 6.4.0 * * @see https://html.spec.whatwg.org/#insertion-mode * * @var string */ public $insertion_mode = self::INSERTION_MODE_INITIAL; /** * Context node initializing fragment parser, if created as a fragment parser. * * @since 6.4.0 * * @see https://html.spec.whatwg.org/#concept-frag-parse-context * * @var [string, array]|null */ public $context_node = null; /** * The recognized encoding of the input byte stream. * * > The stream of code points that comprises the input to the tokenization * > stage will be initially seen by the user agent as a stream of bytes * > (typically coming over the network or from the local file system). * > The bytes encode the actual characters according to a particular character * > encoding, which the user agent uses to decode the bytes into characters. * * @since 6.7.0 * * @var string|null */ public $encoding = null; /** * The parser's confidence in the input encoding. * * > When the HTML parser is decoding an input byte stream, it uses a character * > encoding and a confidence. The confidence is either tentative, certain, or * > irrelevant. The encoding used, and whether the confidence in that encoding * > is tentative or certain, is used during the parsing to determine whether to * > change the encoding. If no encoding is necessary, e.g. because the parser is * > operating on a Unicode stream and doesn't have to use a character encoding * > at all, then the confidence is irrelevant. * * @since 6.7.0 * * @var string */ public $encoding_confidence = 'tentative'; /** * HEAD element pointer. * * @since 6.7.0 * * @see https://html.spec.whatwg.org/multipage/parsing.html#head-element-pointer * * @var WP_HTML_Token|null */ public $head_element = null; /** * FORM element pointer. * * > points to the last form element that was opened and whose end tag has * > not yet been seen. It is used to make form controls associate with * > forms in the face of dramatically bad markup, for historical reasons. * > It is ignored inside template elements. * * @todo This may be invalidated by a seek operation. * * @see https://html.spec.whatwg.org/#form-element-pointer * * @since 6.7.0 * * @var WP_HTML_Token|null */ public $form_element = null; /** * The frameset-ok flag indicates if a `FRAMESET` element is allowed in the current state. * * > The frameset-ok flag is set to "ok" when the parser is created. It is set to "not ok" after certain tokens are seen. * * @since 6.4.0 * * @see https://html.spec.whatwg.org/#frameset-ok-flag * * @var bool */ public $frameset_ok = true; /** * Constructor - creates a new and empty state value. * * @since 6.4.0 * * @see WP_HTML_Processor */ public function __construct() { $this->stack_of_open_elements = new WP_HTML_Open_Elements(); $this->active_formatting_elements = new WP_HTML_Active_Formatting_Elements(); } } class-wp-html-open-elements.php000064400000054174147333266670012555 0ustar00 Initially, the stack of open elements is empty. The stack grows * > downwards; the topmost node on the stack is the first one added * > to the stack, and the bottommost node of the stack is the most * > recently added node in the stack (notwithstanding when the stack * > is manipulated in a random access fashion as part of the handling * > for misnested tags). * * @since 6.4.0 * * @access private * * @see https://html.spec.whatwg.org/#stack-of-open-elements * @see WP_HTML_Processor */ class WP_HTML_Open_Elements { /** * Holds the stack of open element references. * * @since 6.4.0 * * @var WP_HTML_Token[] */ public $stack = array(); /** * Whether a P element is in button scope currently. * * This class optimizes scope lookup by pre-calculating * this value when elements are added and removed to the * stack of open elements which might change its value. * This avoids frequent iteration over the stack. * * @since 6.4.0 * * @var bool */ private $has_p_in_button_scope = false; /** * A function that will be called when an item is popped off the stack of open elements. * * The function will be called with the popped item as its argument. * * @since 6.6.0 * * @var Closure|null */ private $pop_handler = null; /** * A function that will be called when an item is pushed onto the stack of open elements. * * The function will be called with the pushed item as its argument. * * @since 6.6.0 * * @var Closure|null */ private $push_handler = null; /** * Sets a pop handler that will be called when an item is popped off the stack of * open elements. * * The function will be called with the pushed item as its argument. * * @since 6.6.0 * * @param Closure $handler The handler function. */ public function set_pop_handler( Closure $handler ): void { $this->pop_handler = $handler; } /** * Sets a push handler that will be called when an item is pushed onto the stack of * open elements. * * The function will be called with the pushed item as its argument. * * @since 6.6.0 * * @param Closure $handler The handler function. */ public function set_push_handler( Closure $handler ): void { $this->push_handler = $handler; } /** * Returns the name of the node at the nth position on the stack * of open elements, or `null` if no such position exists. * * Note that this uses a 1-based index, which represents the * "nth item" on the stack, counting from the top, where the * top-most element is the 1st, the second is the 2nd, etc... * * @since 6.7.0 * * @param int $nth Retrieve the nth item on the stack, with 1 being * the top element, 2 being the second, etc... * @return WP_HTML_Token|null Name of the node on the stack at the given location, * or `null` if the location isn't on the stack. */ public function at( int $nth ): ?WP_HTML_Token { foreach ( $this->walk_down() as $item ) { if ( 0 === --$nth ) { return $item; } } return null; } /** * Reports if a node of a given name is in the stack of open elements. * * @since 6.7.0 * * @param string $node_name Name of node for which to check. * @return bool Whether a node of the given name is in the stack of open elements. */ public function contains( string $node_name ): bool { foreach ( $this->walk_up() as $item ) { if ( $node_name === $item->node_name ) { return true; } } return false; } /** * Reports if a specific node is in the stack of open elements. * * @since 6.4.0 * * @param WP_HTML_Token $token Look for this node in the stack. * @return bool Whether the referenced node is in the stack of open elements. */ public function contains_node( WP_HTML_Token $token ): bool { foreach ( $this->walk_up() as $item ) { if ( $token === $item ) { return true; } } return false; } /** * Returns how many nodes are currently in the stack of open elements. * * @since 6.4.0 * * @return int How many node are in the stack of open elements. */ public function count(): int { return count( $this->stack ); } /** * Returns the node at the end of the stack of open elements, * if one exists. If the stack is empty, returns null. * * @since 6.4.0 * * @return WP_HTML_Token|null Last node in the stack of open elements, if one exists, otherwise null. */ public function current_node(): ?WP_HTML_Token { $current_node = end( $this->stack ); return $current_node ? $current_node : null; } /** * Indicates if the current node is of a given type or name. * * It's possible to pass either a node type or a node name to this function. * In the case there is no current element it will always return `false`. * * Example: * * // Is the current node a text node? * $stack->current_node_is( '#text' ); * * // Is the current node a DIV element? * $stack->current_node_is( 'DIV' ); * * // Is the current node any element/tag? * $stack->current_node_is( '#tag' ); * * @see WP_HTML_Tag_Processor::get_token_type * @see WP_HTML_Tag_Processor::get_token_name * * @since 6.7.0 * * @access private * * @param string $identity Check if the current node has this name or type (depending on what is provided). * @return bool Whether there is a current element that matches the given identity, whether a token name or type. */ public function current_node_is( string $identity ): bool { $current_node = end( $this->stack ); if ( false === $current_node ) { return false; } $current_node_name = $current_node->node_name; return ( $current_node_name === $identity || ( '#doctype' === $identity && 'html' === $current_node_name ) || ( '#tag' === $identity && ctype_upper( $current_node_name ) ) ); } /** * Returns whether an element is in a specific scope. * * @since 6.4.0 * * @see https://html.spec.whatwg.org/#has-an-element-in-the-specific-scope * * @param string $tag_name Name of tag check. * @param string[] $termination_list List of elements that terminate the search. * @return bool Whether the element was found in a specific scope. */ public function has_element_in_specific_scope( string $tag_name, $termination_list ): bool { foreach ( $this->walk_up() as $node ) { $namespaced_name = 'html' === $node->namespace ? $node->node_name : "{$node->namespace} {$node->node_name}"; if ( $namespaced_name === $tag_name ) { return true; } if ( '(internal: H1 through H6 - do not use)' === $tag_name && in_array( $namespaced_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true ) ) { return true; } if ( in_array( $namespaced_name, $termination_list, true ) ) { return false; } } return false; } /** * Returns whether a particular element is in scope. * * > The stack of open elements is said to have a particular element in * > scope when it has that element in the specific scope consisting of * > the following element types: * > * > - applet * > - caption * > - html * > - table * > - td * > - th * > - marquee * > - object * > - template * > - MathML mi * > - MathML mo * > - MathML mn * > - MathML ms * > - MathML mtext * > - MathML annotation-xml * > - SVG foreignObject * > - SVG desc * > - SVG title * * @since 6.4.0 * @since 6.7.0 Full support. * * @see https://html.spec.whatwg.org/#has-an-element-in-scope * * @param string $tag_name Name of tag to check. * @return bool Whether given element is in scope. */ public function has_element_in_scope( string $tag_name ): bool { return $this->has_element_in_specific_scope( $tag_name, array( 'APPLET', 'CAPTION', 'HTML', 'TABLE', 'TD', 'TH', 'MARQUEE', 'OBJECT', 'TEMPLATE', 'math MI', 'math MO', 'math MN', 'math MS', 'math MTEXT', 'math ANNOTATION-XML', 'svg FOREIGNOBJECT', 'svg DESC', 'svg TITLE', ) ); } /** * Returns whether a particular element is in list item scope. * * > The stack of open elements is said to have a particular element * > in list item scope when it has that element in the specific scope * > consisting of the following element types: * > * > - All the element types listed above for the has an element in scope algorithm. * > - ol in the HTML namespace * > - ul in the HTML namespace * * @since 6.4.0 * @since 6.5.0 Implemented: no longer throws on every invocation. * @since 6.7.0 Supports all required HTML elements. * * @see https://html.spec.whatwg.org/#has-an-element-in-list-item-scope * * @param string $tag_name Name of tag to check. * @return bool Whether given element is in scope. */ public function has_element_in_list_item_scope( string $tag_name ): bool { return $this->has_element_in_specific_scope( $tag_name, array( 'APPLET', 'BUTTON', 'CAPTION', 'HTML', 'TABLE', 'TD', 'TH', 'MARQUEE', 'OBJECT', 'OL', 'TEMPLATE', 'UL', 'math MI', 'math MO', 'math MN', 'math MS', 'math MTEXT', 'math ANNOTATION-XML', 'svg FOREIGNOBJECT', 'svg DESC', 'svg TITLE', ) ); } /** * Returns whether a particular element is in button scope. * * > The stack of open elements is said to have a particular element * > in button scope when it has that element in the specific scope * > consisting of the following element types: * > * > - All the element types listed above for the has an element in scope algorithm. * > - button in the HTML namespace * * @since 6.4.0 * @since 6.7.0 Supports all required HTML elements. * * @see https://html.spec.whatwg.org/#has-an-element-in-button-scope * * @param string $tag_name Name of tag to check. * @return bool Whether given element is in scope. */ public function has_element_in_button_scope( string $tag_name ): bool { return $this->has_element_in_specific_scope( $tag_name, array( 'APPLET', 'BUTTON', 'CAPTION', 'HTML', 'TABLE', 'TD', 'TH', 'MARQUEE', 'OBJECT', 'TEMPLATE', 'math MI', 'math MO', 'math MN', 'math MS', 'math MTEXT', 'math ANNOTATION-XML', 'svg FOREIGNOBJECT', 'svg DESC', 'svg TITLE', ) ); } /** * Returns whether a particular element is in table scope. * * > The stack of open elements is said to have a particular element * > in table scope when it has that element in the specific scope * > consisting of the following element types: * > * > - html in the HTML namespace * > - table in the HTML namespace * > - template in the HTML namespace * * @since 6.4.0 * @since 6.7.0 Full implementation. * * @see https://html.spec.whatwg.org/#has-an-element-in-table-scope * * @param string $tag_name Name of tag to check. * @return bool Whether given element is in scope. */ public function has_element_in_table_scope( string $tag_name ): bool { return $this->has_element_in_specific_scope( $tag_name, array( 'HTML', 'TABLE', 'TEMPLATE', ) ); } /** * Returns whether a particular element is in select scope. * * This test differs from the others like it, in that its rules are inverted. * Instead of arriving at a match when one of any tag in a termination group * is reached, this one terminates if any other tag is reached. * * > The stack of open elements is said to have a particular element in select scope when it has * > that element in the specific scope consisting of all element types except the following: * > - optgroup in the HTML namespace * > - option in the HTML namespace * * @since 6.4.0 Stub implementation (throws). * @since 6.7.0 Full implementation. * * @see https://html.spec.whatwg.org/#has-an-element-in-select-scope * * @param string $tag_name Name of tag to check. * @return bool Whether the given element is in SELECT scope. */ public function has_element_in_select_scope( string $tag_name ): bool { foreach ( $this->walk_up() as $node ) { if ( $node->node_name === $tag_name ) { return true; } if ( 'OPTION' !== $node->node_name && 'OPTGROUP' !== $node->node_name ) { return false; } } return false; } /** * Returns whether a P is in BUTTON scope. * * @since 6.4.0 * * @see https://html.spec.whatwg.org/#has-an-element-in-button-scope * * @return bool Whether a P is in BUTTON scope. */ public function has_p_in_button_scope(): bool { return $this->has_p_in_button_scope; } /** * Pops a node off of the stack of open elements. * * @since 6.4.0 * * @see https://html.spec.whatwg.org/#stack-of-open-elements * * @return bool Whether a node was popped off of the stack. */ public function pop(): bool { $item = array_pop( $this->stack ); if ( null === $item ) { return false; } if ( 'context-node' === $item->bookmark_name ) { $this->stack[] = $item; return false; } $this->after_element_pop( $item ); return true; } /** * Pops nodes off of the stack of open elements until an HTML tag with the given name has been popped. * * @since 6.4.0 * * @see WP_HTML_Open_Elements::pop * * @param string $html_tag_name Name of tag that needs to be popped off of the stack of open elements. * @return bool Whether a tag of the given name was found and popped off of the stack of open elements. */ public function pop_until( string $html_tag_name ): bool { foreach ( $this->walk_up() as $item ) { $this->pop(); if ( 'html' !== $item->namespace ) { continue; } if ( '(internal: H1 through H6 - do not use)' === $html_tag_name && in_array( $item->node_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true ) ) { return true; } if ( $html_tag_name === $item->node_name ) { return true; } } return false; } /** * Pushes a node onto the stack of open elements. * * @since 6.4.0 * * @see https://html.spec.whatwg.org/#stack-of-open-elements * * @param WP_HTML_Token $stack_item Item to add onto stack. */ public function push( WP_HTML_Token $stack_item ): void { $this->stack[] = $stack_item; $this->after_element_push( $stack_item ); } /** * Removes a specific node from the stack of open elements. * * @since 6.4.0 * * @param WP_HTML_Token $token The node to remove from the stack of open elements. * @return bool Whether the node was found and removed from the stack of open elements. */ public function remove_node( WP_HTML_Token $token ): bool { if ( 'context-node' === $token->bookmark_name ) { return false; } foreach ( $this->walk_up() as $position_from_end => $item ) { if ( $token->bookmark_name !== $item->bookmark_name ) { continue; } $position_from_start = $this->count() - $position_from_end - 1; array_splice( $this->stack, $position_from_start, 1 ); $this->after_element_pop( $item ); return true; } return false; } /** * Steps through the stack of open elements, starting with the top element * (added first) and walking downwards to the one added last. * * This generator function is designed to be used inside a "foreach" loop. * * Example: * * $html = 'We are here'; * foreach ( $stack->walk_down() as $node ) { * echo "{$node->node_name} -> "; * } * > EM -> STRONG -> A -> * * To start with the most-recently added element and walk towards the top, * see WP_HTML_Open_Elements::walk_up(). * * @since 6.4.0 */ public function walk_down() { $count = count( $this->stack ); for ( $i = 0; $i < $count; $i++ ) { yield $this->stack[ $i ]; } } /** * Steps through the stack of open elements, starting with the bottom element * (added last) and walking upwards to the one added first. * * This generator function is designed to be used inside a "foreach" loop. * * Example: * * $html = 'We are here'; * foreach ( $stack->walk_up() as $node ) { * echo "{$node->node_name} -> "; * } * > A -> STRONG -> EM -> * * To start with the first added element and walk towards the bottom, * see WP_HTML_Open_Elements::walk_down(). * * @since 6.4.0 * @since 6.5.0 Accepts $above_this_node to start traversal above a given node, if it exists. * * @param WP_HTML_Token|null $above_this_node Optional. Start traversing above this node, * if provided and if the node exists. */ public function walk_up( ?WP_HTML_Token $above_this_node = null ) { $has_found_node = null === $above_this_node; for ( $i = count( $this->stack ) - 1; $i >= 0; $i-- ) { $node = $this->stack[ $i ]; if ( ! $has_found_node ) { $has_found_node = $node === $above_this_node; continue; } yield $node; } } /* * Internal helpers. */ /** * Updates internal flags after adding an element. * * Certain conditions (such as "has_p_in_button_scope") are maintained here as * flags that are only modified when adding and removing elements. This allows * the HTML Processor to quickly check for these conditions instead of iterating * over the open stack elements upon each new tag it encounters. These flags, * however, need to be maintained as items are added and removed from the stack. * * @since 6.4.0 * * @param WP_HTML_Token $item Element that was added to the stack of open elements. */ public function after_element_push( WP_HTML_Token $item ): void { $namespaced_name = 'html' === $item->namespace ? $item->node_name : "{$item->namespace} {$item->node_name}"; /* * When adding support for new elements, expand this switch to trap * cases where the precalculated value needs to change. */ switch ( $namespaced_name ) { case 'APPLET': case 'BUTTON': case 'CAPTION': case 'HTML': case 'TABLE': case 'TD': case 'TH': case 'MARQUEE': case 'OBJECT': case 'TEMPLATE': case 'math MI': case 'math MO': case 'math MN': case 'math MS': case 'math MTEXT': case 'math ANNOTATION-XML': case 'svg FOREIGNOBJECT': case 'svg DESC': case 'svg TITLE': $this->has_p_in_button_scope = false; break; case 'P': $this->has_p_in_button_scope = true; break; } if ( null !== $this->push_handler ) { ( $this->push_handler )( $item ); } } /** * Updates internal flags after removing an element. * * Certain conditions (such as "has_p_in_button_scope") are maintained here as * flags that are only modified when adding and removing elements. This allows * the HTML Processor to quickly check for these conditions instead of iterating * over the open stack elements upon each new tag it encounters. These flags, * however, need to be maintained as items are added and removed from the stack. * * @since 6.4.0 * * @param WP_HTML_Token $item Element that was removed from the stack of open elements. */ public function after_element_pop( WP_HTML_Token $item ): void { /* * When adding support for new elements, expand this switch to trap * cases where the precalculated value needs to change. */ switch ( $item->node_name ) { case 'APPLET': case 'BUTTON': case 'CAPTION': case 'HTML': case 'P': case 'TABLE': case 'TD': case 'TH': case 'MARQUEE': case 'OBJECT': case 'TEMPLATE': case 'math MI': case 'math MO': case 'math MN': case 'math MS': case 'math MTEXT': case 'math ANNOTATION-XML': case 'svg FOREIGNOBJECT': case 'svg DESC': case 'svg TITLE': $this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' ); break; } if ( null !== $this->pop_handler ) { ( $this->pop_handler )( $item ); } } /** * Clear the stack back to a table context. * * > When the steps above require the UA to clear the stack back to a table context, it means * > that the UA must, while the current node is not a table, template, or html element, pop * > elements from the stack of open elements. * * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-context * * @since 6.7.0 */ public function clear_to_table_context(): void { foreach ( $this->walk_up() as $item ) { if ( 'TABLE' === $item->node_name || 'TEMPLATE' === $item->node_name || 'HTML' === $item->node_name ) { break; } $this->pop(); } } /** * Clear the stack back to a table body context. * * > When the steps above require the UA to clear the stack back to a table body context, it * > means that the UA must, while the current node is not a tbody, tfoot, thead, template, or * > html element, pop elements from the stack of open elements. * * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-body-context * * @since 6.7.0 */ public function clear_to_table_body_context(): void { foreach ( $this->walk_up() as $item ) { if ( 'TBODY' === $item->node_name || 'TFOOT' === $item->node_name || 'THEAD' === $item->node_name || 'TEMPLATE' === $item->node_name || 'HTML' === $item->node_name ) { break; } $this->pop(); } } /** * Clear the stack back to a table row context. * * > When the steps above require the UA to clear the stack back to a table row context, it * > means that the UA must, while the current node is not a tr, template, or html element, pop * > elements from the stack of open elements. * * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-row-context * * @since 6.7.0 */ public function clear_to_table_row_context(): void { foreach ( $this->walk_up() as $item ) { if ( 'TR' === $item->node_name || 'TEMPLATE' === $item->node_name || 'HTML' === $item->node_name ) { break; } $this->pop(); } } /** * Wakeup magic method. * * @since 6.6.0 */ public function __wakeup() { throw new \LogicException( __CLASS__ . ' should never be unserialized' ); } } class-wp-html-doctype-info.php000064400000061415147333266670012376 0ustar00`. * * > DOCTYPEs are required for legacy reasons. When omitted, browsers tend to use a different * > rendering mode that is incompatible with some specifications. Including the DOCTYPE in a * > document ensures that the browser makes a best-effort attempt at following the * > relevant specifications. * * @see https://html.spec.whatwg.org/#the-doctype * * DOCTYPE declarations comprise four properties: a name, public identifier, system identifier, * and an indication of which document compatability mode they would imply if an HTML parser * hadn't already determined it from other information. * * @see https://html.spec.whatwg.org/#the-initial-insertion-mode * * Historically, the DOCTYPE declaration was used in SGML documents to instruct a parser how * to interpret the various tags and entities within a document. Its role in HTML diverged * from how it was used in SGML and no meaning should be back-read into HTML based on how it * is used in SGML, XML, or XHTML documents. * * @see https://www.iso.org/standard/16387.html * * @since 6.7.0 * * @see WP_HTML_Processor */ class WP_HTML_Doctype_Info { /** * Name of the DOCTYPE: should be "html" for HTML documents. * * This value should be considered "read only" and not modified. * * Historically the DOCTYPE name indicates name of the document's root element. * * * ╰──┴── name is "html". * * @see https://html.spec.whatwg.org/#tokenization * * @since 6.7.0 * * @var string|null */ public $name = null; /** * Public identifier of the DOCTYPE. * * This value should be considered "read only" and not modified. * * The public identifier is optional and should not appear in HTML documents. * A `null` value indicates that no public identifier was present in the DOCTYPE. * * Historically the presence of the public identifier indicated that a document * was meant to be shared between computer systems and the value indicated to a * knowledgeable parser how to find the relevant document type definition (DTD). * * * │ │ ╰─── public identifier ─────╯ * ╰──┴── name is "html". * * @see https://html.spec.whatwg.org/#tokenization * * @since 6.7.0 * * @var string|null */ public $public_identifier = null; /** * System identifier of the DOCTYPE. * * This value should be considered "read only" and not modified. * * The system identifier is optional and should not appear in HTML documents. * A `null` value indicates that no system identifier was present in the DOCTYPE. * * Historically the system identifier specified where a relevant document type * declaration for the given document is stored and may be retrieved. * * * │ │ ╰──── system identifier ────╯ * ╰──┴── name is "html". * * If a public identifier were provided it would indicate to a knowledgeable * parser how to interpret the system identifier. * * * │ │ ╰─── public identifier ─────╯ ╰──── system identifier ────╯ * ╰──┴── name is "html". * * @see https://html.spec.whatwg.org/#tokenization * * @since 6.7.0 * * @var string|null */ public $system_identifier = null; /** * Which document compatability mode this DOCTYPE declaration indicates. * * This value should be considered "read only" and not modified. * * When an HTML parser has not already set the document compatability mode, * (e.g. "quirks" or "no-quirks" mode), it will infer if from the properties * of the appropriate DOCTYPE declaration, if one exists. The DOCTYPE can * indicate one of three possible document compatability modes: * * - "no-quirks" and "limited-quirks" modes (also called "standards" mode). * - "quirks" mode (also called `CSS1Compat` mode). * * An appropriate DOCTYPE is one encountered in the "initial" insertion mode, * before the HTML element has been opened and before finding any other * DOCTYPE declaration tokens. * * @see https://html.spec.whatwg.org/#the-initial-insertion-mode * * @since 6.7.0 * * @var string One of "no-quirks", "limited-quirks", or "quirks". */ public $indicated_compatability_mode; /** * Constructor. * * This class should not be instantiated directly. * Use the static {@see self::from_doctype_token} method instead. * * The arguments to this constructor correspond to the "DOCTYPE token" * as defined in the HTML specification. * * > DOCTYPE tokens have a name, a public identifier, a system identifier, * > and a force-quirks flag. When a DOCTYPE token is created, its name, public identifier, * > and system identifier must be marked as missing (which is a distinct state from the * > empty string), and the force-quirks flag must be set to off (its other state is on). * * @see https://html.spec.whatwg.org/multipage/parsing.html#tokenization * * @since 6.7.0 * * @param string|null $name Name of the DOCTYPE. * @param string|null $public_identifier Public identifier of the DOCTYPE. * @param string|null $system_identifier System identifier of the DOCTYPE. * @param bool $force_quirks_flag Whether the force-quirks flag is set for the token. */ private function __construct( ?string $name, ?string $public_identifier, ?string $system_identifier, bool $force_quirks_flag ) { $this->name = $name; $this->public_identifier = $public_identifier; $this->system_identifier = $system_identifier; /* * > If the DOCTYPE token matches one of the conditions in the following list, * > then set the Document to quirks mode: */ /* * > The force-quirks flag is set to on. */ if ( $force_quirks_flag ) { $this->indicated_compatability_mode = 'quirks'; return; } /* * Normative documents will contain the literal `` with no * public or system identifiers; short-circuit to avoid extra parsing. */ if ( 'html' === $name && null === $public_identifier && null === $system_identifier ) { $this->indicated_compatability_mode = 'no-quirks'; return; } /* * > The name is not "html". * * The tokenizer must report the name in lower case even if provided in * the document in upper case; thus no conversion is required here. */ if ( 'html' !== $name ) { $this->indicated_compatability_mode = 'quirks'; return; } /* * Set up some variables to handle the rest of the conditions. * * > set...the public identifier...to...the empty string if the public identifier was missing. * > set...the system identifier...to...the empty string if the system identifier was missing. * > * > The system identifier and public identifier strings must be compared... * > in an ASCII case-insensitive manner. * > * > A system identifier whose value is the empty string is not considered missing * > for the purposes of the conditions above. */ $system_identifier_is_missing = null === $system_identifier; $public_identifier = null === $public_identifier ? '' : strtolower( $public_identifier ); $system_identifier = null === $system_identifier ? '' : strtolower( $system_identifier ); /* * > The public identifier is set to… */ if ( '-//w3o//dtd w3 html strict 3.0//en//' === $public_identifier || '-/w3c/dtd html 4.0 transitional/en' === $public_identifier || 'html' === $public_identifier ) { $this->indicated_compatability_mode = 'quirks'; return; } /* * > The system identifier is set to… */ if ( 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd' === $system_identifier ) { $this->indicated_compatability_mode = 'quirks'; return; } /* * All of the following conditions depend on matching the public identifier. * If the public identifier is empty, none of the following conditions will match. */ if ( '' === $public_identifier ) { $this->indicated_compatability_mode = 'no-quirks'; return; } /* * > The public identifier starts with… * * @todo Optimize this matching. It shouldn't be a large overall performance issue, * however, as only a single DOCTYPE declaration token should ever be parsed, * and normative documents will have exited before reaching this condition. */ if ( str_starts_with( $public_identifier, '+//silmaril//dtd html pro v0r11 19970101//' ) || str_starts_with( $public_identifier, '-//as//dtd html 3.0 aswedit + extensions//' ) || str_starts_with( $public_identifier, '-//advasoft ltd//dtd html 3.0 aswedit + extensions//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html 2.0 level 1//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html 2.0 level 2//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html 2.0 strict level 1//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html 2.0 strict level 2//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html 2.0 strict//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html 2.0//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html 2.1e//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html 3.0//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html 3.2 final//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html 3.2//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html 3//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html level 0//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html level 1//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html level 2//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html level 3//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html strict level 0//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html strict level 1//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html strict level 2//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html strict level 3//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html strict//' ) || str_starts_with( $public_identifier, '-//ietf//dtd html//' ) || str_starts_with( $public_identifier, '-//metrius//dtd metrius presentational//' ) || str_starts_with( $public_identifier, '-//microsoft//dtd internet explorer 2.0 html strict//' ) || str_starts_with( $public_identifier, '-//microsoft//dtd internet explorer 2.0 html//' ) || str_starts_with( $public_identifier, '-//microsoft//dtd internet explorer 2.0 tables//' ) || str_starts_with( $public_identifier, '-//microsoft//dtd internet explorer 3.0 html strict//' ) || str_starts_with( $public_identifier, '-//microsoft//dtd internet explorer 3.0 html//' ) || str_starts_with( $public_identifier, '-//microsoft//dtd internet explorer 3.0 tables//' ) || str_starts_with( $public_identifier, '-//netscape comm. corp.//dtd html//' ) || str_starts_with( $public_identifier, '-//netscape comm. corp.//dtd strict html//' ) || str_starts_with( $public_identifier, "-//o'reilly and associates//dtd html 2.0//" ) || str_starts_with( $public_identifier, "-//o'reilly and associates//dtd html extended 1.0//" ) || str_starts_with( $public_identifier, "-//o'reilly and associates//dtd html extended relaxed 1.0//" ) || str_starts_with( $public_identifier, '-//sq//dtd html 2.0 hotmetal + extensions//' ) || str_starts_with( $public_identifier, '-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//' ) || str_starts_with( $public_identifier, '-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//' ) || str_starts_with( $public_identifier, '-//spyglass//dtd html 2.0 extended//' ) || str_starts_with( $public_identifier, '-//sun microsystems corp.//dtd hotjava html//' ) || str_starts_with( $public_identifier, '-//sun microsystems corp.//dtd hotjava strict html//' ) || str_starts_with( $public_identifier, '-//w3c//dtd html 3 1995-03-24//' ) || str_starts_with( $public_identifier, '-//w3c//dtd html 3.2 draft//' ) || str_starts_with( $public_identifier, '-//w3c//dtd html 3.2 final//' ) || str_starts_with( $public_identifier, '-//w3c//dtd html 3.2//' ) || str_starts_with( $public_identifier, '-//w3c//dtd html 3.2s draft//' ) || str_starts_with( $public_identifier, '-//w3c//dtd html 4.0 frameset//' ) || str_starts_with( $public_identifier, '-//w3c//dtd html 4.0 transitional//' ) || str_starts_with( $public_identifier, '-//w3c//dtd html experimental 19960712//' ) || str_starts_with( $public_identifier, '-//w3c//dtd html experimental 970421//' ) || str_starts_with( $public_identifier, '-//w3c//dtd w3 html//' ) || str_starts_with( $public_identifier, '-//w3o//dtd w3 html 3.0//' ) || str_starts_with( $public_identifier, '-//webtechs//dtd mozilla html 2.0//' ) || str_starts_with( $public_identifier, '-//webtechs//dtd mozilla html//' ) ) { $this->indicated_compatability_mode = 'quirks'; return; } /* * > The system identifier is missing and the public identifier starts with… */ if ( $system_identifier_is_missing && ( str_starts_with( $public_identifier, '-//w3c//dtd html 4.01 frameset//' ) || str_starts_with( $public_identifier, '-//w3c//dtd html 4.01 transitional//' ) ) ) { $this->indicated_compatability_mode = 'quirks'; return; } /* * > Otherwise, if the DOCTYPE token matches one of the conditions in * > the following list, then set the Document to limited-quirks mode. */ /* * > The public identifier starts with… */ if ( str_starts_with( $public_identifier, '-//w3c//dtd xhtml 1.0 frameset//' ) || str_starts_with( $public_identifier, '-//w3c//dtd xhtml 1.0 transitional//' ) ) { $this->indicated_compatability_mode = 'limited-quirks'; return; } /* * > The system identifier is not missing and the public identifier starts with… */ if ( ! $system_identifier_is_missing && ( str_starts_with( $public_identifier, '-//w3c//dtd html 4.01 frameset//' ) || str_starts_with( $public_identifier, '-//w3c//dtd html 4.01 transitional//' ) ) ) { $this->indicated_compatability_mode = 'limited-quirks'; return; } $this->indicated_compatability_mode = 'no-quirks'; } /** * Creates a WP_HTML_Doctype_Info instance by parsing a raw DOCTYPE declaration token. * * Use this method to parse a DOCTYPE declaration token and get access to its properties * via the returned WP_HTML_Doctype_Info class instance. The provided input must parse * properly as a DOCTYPE declaration, though it must not represent a valid DOCTYPE. * * Example: * * // Normative HTML DOCTYPE declaration. * $doctype = WP_HTML_Doctype_Info::from_doctype_token( '' ); * 'no-quirks' === $doctype->indicated_compatability_mode; * * // A nonsensical DOCTYPE is still valid, and will indicate "quirks" mode. * $doctype = WP_HTML_Doctype_Info::from_doctype_token( '' ); * 'quirks' === $doctype->indicated_compatability_mode; * * // Textual quirks present in raw HTML are handled appropriately. * $doctype = WP_HTML_Doctype_Info::from_doctype_token( "" ); * 'no-quirks' === $doctype->indicated_compatability_mode; * * // Anything other than a proper DOCTYPE declaration token fails to parse. * null === WP_HTML_Doctype_Info::from_doctype_token( ' ' ); * null === WP_HTML_Doctype_Info::from_doctype_token( '

' ); * null === WP_HTML_Doctype_Info::from_doctype_token( '' ); * null === WP_HTML_Doctype_Info::from_doctype_token( 'html' ); * null === WP_HTML_Doctype_Info::from_doctype_token( '' ); * * @since 6.7.0 * * @param string $doctype_html The complete raw DOCTYPE HTML string, e.g. ``. * * @return WP_HTML_Doctype_Info|null A WP_HTML_Doctype_Info instance will be returned if the * provided DOCTYPE HTML is a valid DOCTYPE. Otherwise, null. */ public static function from_doctype_token( string $doctype_html ): ?self { $doctype_name = null; $doctype_public_id = null; $doctype_system_id = null; $end = strlen( $doctype_html ) - 1; /* * This parser combines the rules for parsing DOCTYPE tokens found in the HTML * specification for the DOCTYPE related tokenizer states. * * @see https://html.spec.whatwg.org/#doctype-state */ /* * - Valid DOCTYPE HTML token must be at least `` assuming a complete token not * ending in end-of-file. * - It must start with an ASCII case-insensitive match for `` must be the final byte in the HTML string. */ if ( $end < 9 || 0 !== substr_compare( $doctype_html, '`? if ( '>' !== $doctype_html[ $end ] || ( strcspn( $doctype_html, '>', $at ) + $at ) < $end ) { return null; } /* * Perform newline normalization and ensure the $end value is correct after normalization. * * @see https://html.spec.whatwg.org/#preprocessing-the-input-stream * @see https://infra.spec.whatwg.org/#normalize-newlines */ $doctype_html = str_replace( "\r\n", "\n", $doctype_html ); $doctype_html = str_replace( "\r", "\n", $doctype_html ); $end = strlen( $doctype_html ) - 1; /* * In this state, the doctype token has been found and its "content" optionally including the * name, public identifier, and system identifier is between the current position and the end. * * "" * ╰─ $at ╰─ $end * * It's also possible that the declaration part is empty. * * ╭─ $at * "" * ╰─ $end * * Rules for parsing ">" which terminates the DOCTYPE do not need to be considered as they * have been handled above in the condition that the provided DOCTYPE HTML must contain * exactly one ">" character in the final position. */ /* * * Parsing effectively begins in "Before DOCTYPE name state". Ignore whitespace and * proceed to the next state. * * @see https://html.spec.whatwg.org/#before-doctype-name-state */ $at += strspn( $doctype_html, " \t\n\f\r", $at ); if ( $at >= $end ) { return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true ); } $name_length = strcspn( $doctype_html, " \t\n\f\r", $at, $end - $at ); $doctype_name = str_replace( "\0", "\u{FFFD}", strtolower( substr( $doctype_html, $at, $name_length ) ) ); $at += $name_length; $at += strspn( $doctype_html, " \t\n\f\r", $at, $end - $at ); if ( $at >= $end ) { return new self( $doctype_name, $doctype_public_id, $doctype_system_id, false ); } /* * "After DOCTYPE name state" * * Find a case-insensitive match for "PUBLIC" or "SYSTEM" at this point. * Otherwise, set force-quirks and enter bogus DOCTYPE state (skip the rest of the doctype). * * @see https://html.spec.whatwg.org/#after-doctype-name-state */ if ( $at + 6 >= $end ) { return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true ); } /* * > If the six characters starting from the current input character are an ASCII * > case-insensitive match for the word "PUBLIC", then consume those characters * > and switch to the after DOCTYPE public keyword state. */ if ( 0 === substr_compare( $doctype_html, 'PUBLIC', $at, 6, true ) ) { $at += 6; $at += strspn( $doctype_html, " \t\n\f\r", $at, $end - $at ); if ( $at >= $end ) { return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true ); } goto parse_doctype_public_identifier; } /* * > Otherwise, if the six characters starting from the current input character are an ASCII * > case-insensitive match for the word "SYSTEM", then consume those characters and switch * > to the after DOCTYPE system keyword state. */ if ( 0 === substr_compare( $doctype_html, 'SYSTEM', $at, 6, true ) ) { $at += 6; $at += strspn( $doctype_html, " \t\n\f\r", $at, $end - $at ); if ( $at >= $end ) { return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true ); } goto parse_doctype_system_identifier; } /* * > Otherwise, this is an invalid-character-sequence-after-doctype-name parse error. * > Set the current DOCTYPE token's force-quirks flag to on. Reconsume in the bogus * > DOCTYPE state. */ return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true ); parse_doctype_public_identifier: /* * The parser should enter "DOCTYPE public identifier (double-quoted) state" or * "DOCTYPE public identifier (single-quoted) state" by finding one of the valid quotes. * Anything else forces quirks mode and ignores the rest of the contents. * * @see https://html.spec.whatwg.org/#doctype-public-identifier-(double-quoted)-state * @see https://html.spec.whatwg.org/#doctype-public-identifier-(single-quoted)-state */ $closer_quote = $doctype_html[ $at ]; /* * > This is a missing-quote-before-doctype-public-identifier parse error. Set the * > current DOCTYPE token's force-quirks flag to on. Reconsume in the bogus DOCTYPE state. */ if ( '"' !== $closer_quote && "'" !== $closer_quote ) { return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true ); } ++$at; $identifier_length = strcspn( $doctype_html, $closer_quote, $at, $end - $at ); $doctype_public_id = str_replace( "\0", "\u{FFFD}", substr( $doctype_html, $at, $identifier_length ) ); $at += $identifier_length; if ( $at >= $end || $closer_quote !== $doctype_html[ $at ] ) { return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true ); } ++$at; /* * "Between DOCTYPE public and system identifiers state" * * Advance through whitespace between public and system identifiers. * * @see https://html.spec.whatwg.org/#between-doctype-public-and-system-identifiers-state */ $at += strspn( $doctype_html, " \t\n\f\r", $at, $end - $at ); if ( $at >= $end ) { return new self( $doctype_name, $doctype_public_id, $doctype_system_id, false ); } parse_doctype_system_identifier: /* * The parser should enter "DOCTYPE system identifier (double-quoted) state" or * "DOCTYPE system identifier (single-quoted) state" by finding one of the valid quotes. * Anything else forces quirks mode and ignores the rest of the contents. * * @see https://html.spec.whatwg.org/#doctype-system-identifier-(double-quoted)-state * @see https://html.spec.whatwg.org/#doctype-system-identifier-(single-quoted)-state */ $closer_quote = $doctype_html[ $at ]; /* * > This is a missing-quote-before-doctype-system-identifier parse error. Set the * > current DOCTYPE token's force-quirks flag to on. Reconsume in the bogus DOCTYPE state. */ if ( '"' !== $closer_quote && "'" !== $closer_quote ) { return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true ); } ++$at; $identifier_length = strcspn( $doctype_html, $closer_quote, $at, $end - $at ); $doctype_system_id = str_replace( "\0", "\u{FFFD}", substr( $doctype_html, $at, $identifier_length ) ); $at += $identifier_length; if ( $at >= $end || $closer_quote !== $doctype_html[ $at ] ) { return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true ); } return new self( $doctype_name, $doctype_public_id, $doctype_system_id, false ); } } class-wp-html-processor.php000064400000616340147333266670012020 0ustar00next_tag( array( 'breadcrumbs' => array( 'DIV', 'FIGURE', 'IMG' ) ) ) ) { * $processor->add_class( 'responsive-image' ); * } * * #### Breadcrumbs * * Breadcrumbs represent the stack of open elements from the root * of the document or fragment down to the currently-matched node, * if one is currently selected. Call WP_HTML_Processor::get_breadcrumbs() * to inspect the breadcrumbs for a matched tag. * * Breadcrumbs can specify nested HTML structure and are equivalent * to a CSS selector comprising tag names separated by the child * combinator, such as "DIV > FIGURE > IMG". * * Since all elements find themselves inside a full HTML document * when parsed, the return value from `get_breadcrumbs()` will always * contain any implicit outermost elements. For example, when parsing * with `create_fragment()` in the `BODY` context (the default), any * tag in the given HTML document will contain `array( 'HTML', 'BODY', … )` * in its breadcrumbs. * * Despite containing the implied outermost elements in their breadcrumbs, * tags may be found with the shortest-matching breadcrumb query. That is, * `array( 'IMG' )` matches all IMG elements and `array( 'P', 'IMG' )` * matches all IMG elements directly inside a P element. To ensure that no * partial matches erroneously match it's possible to specify in a query * the full breadcrumb match all the way down from the root HTML element. * * Example: * * $html = '

A lovely day outside
'; * // ----- Matches here. * $processor->next_tag( array( 'breadcrumbs' => array( 'FIGURE', 'IMG' ) ) ); * * $html = '
A lovely day outside
'; * // ---- Matches here. * $processor->next_tag( array( 'breadcrumbs' => array( 'FIGURE', 'FIGCAPTION', 'EM' ) ) ); * * $html = '
'; * // ----- Matches here, because IMG must be a direct child of the implicit BODY. * $processor->next_tag( array( 'breadcrumbs' => array( 'BODY', 'IMG' ) ) ); * * ## HTML Support * * This class implements a small part of the HTML5 specification. * It's designed to operate within its support and abort early whenever * encountering circumstances it can't properly handle. This is * the principle way in which this class remains as simple as possible * without cutting corners and breaking compliance. * * ### Supported elements * * If any unsupported element appears in the HTML input the HTML Processor * will abort early and stop all processing. This draconian measure ensures * that the HTML Processor won't break any HTML it doesn't fully understand. * * The HTML Processor supports all elements other than a specific set: * * - Any element inside a TABLE. * - Any element inside foreign content, including SVG and MATH. * - Any element outside the IN BODY insertion mode, e.g. doctype declarations, meta, links. * * ### Supported markup * * Some kinds of non-normative HTML involve reconstruction of formatting elements and * re-parenting of mis-nested elements. For example, a DIV tag found inside a TABLE * may in fact belong _before_ the table in the DOM. If the HTML Processor encounters * such a case it will stop processing. * * The following list illustrates some common examples of unexpected HTML inputs that * the HTML Processor properly parses and represents: * * - HTML with optional tags omitted, e.g. `

one

two`. * - HTML with unexpected tag closers, e.g. `

one more

`. * - Non-void tags with self-closing flag, e.g. `
the DIV is still open.
`. * - Heading elements which close open heading elements of another level, e.g. `

Closed by

`. * - Elements containing text that looks like other tags but isn't, e.g. `The <img> is plaintext`. * - SCRIPT and STYLE tags containing text that looks like HTML but isn't, e.g. ``. * - SCRIPT content which has been escaped, e.g. ``. * * ### Unsupported Features * * This parser does not report parse errors. * * Normally, when additional HTML or BODY tags are encountered in a document, if there * are any additional attributes on them that aren't found on the previous elements, * the existing HTML and BODY elements adopt those missing attribute values. This * parser does not add those additional attributes. * * In certain situations, elements are moved to a different part of the document in * a process called "adoption" and "fostering." Because the nodes move to a location * in the document that the parser had already processed, this parser does not support * these situations and will bail. * * @since 6.4.0 * * @see WP_HTML_Tag_Processor * @see https://html.spec.whatwg.org/ */ class WP_HTML_Processor extends WP_HTML_Tag_Processor { /** * The maximum number of bookmarks allowed to exist at any given time. * * HTML processing requires more bookmarks than basic tag processing, * so this class constant from the Tag Processor is overwritten. * * @since 6.4.0 * * @var int */ const MAX_BOOKMARKS = 100; /** * Holds the working state of the parser, including the stack of * open elements and the stack of active formatting elements. * * Initialized in the constructor. * * @since 6.4.0 * * @var WP_HTML_Processor_State */ private $state; /** * Used to create unique bookmark names. * * This class sets a bookmark for every tag in the HTML document that it encounters. * The bookmark name is auto-generated and increments, starting with `1`. These are * internal bookmarks and are automatically released when the referring WP_HTML_Token * goes out of scope and is garbage-collected. * * @since 6.4.0 * * @see WP_HTML_Processor::$release_internal_bookmark_on_destruct * * @var int */ private $bookmark_counter = 0; /** * Stores an explanation for why something failed, if it did. * * @see self::get_last_error * * @since 6.4.0 * * @var string|null */ private $last_error = null; /** * Stores context for why the parser bailed on unsupported HTML, if it did. * * @see self::get_unsupported_exception * * @since 6.7.0 * * @var WP_HTML_Unsupported_Exception|null */ private $unsupported_exception = null; /** * Releases a bookmark when PHP garbage-collects its wrapping WP_HTML_Token instance. * * This function is created inside the class constructor so that it can be passed to * the stack of open elements and the stack of active formatting elements without * exposing it as a public method on the class. * * @since 6.4.0 * * @var Closure|null */ private $release_internal_bookmark_on_destruct = null; /** * Stores stack events which arise during parsing of the * HTML document, which will then supply the "match" events. * * @since 6.6.0 * * @var WP_HTML_Stack_Event[] */ private $element_queue = array(); /** * Stores the current breadcrumbs. * * @since 6.7.0 * * @var string[] */ private $breadcrumbs = array(); /** * Current stack event, if set, representing a matched token. * * Because the parser may internally point to a place further along in a document * than the nodes which have already been processed (some "virtual" nodes may have * appeared while scanning the HTML document), this will point at the "current" node * being processed. It comes from the front of the element queue. * * @since 6.6.0 * * @var WP_HTML_Stack_Event|null */ private $current_element = null; /** * Context node if created as a fragment parser. * * @var WP_HTML_Token|null */ private $context_node = null; /* * Public Interface Functions */ /** * Creates an HTML processor in the fragment parsing mode. * * Use this for cases where you are processing chunks of HTML that * will be found within a bigger HTML document, such as rendered * block output that exists within a post, `the_content` inside a * rendered site layout. * * Fragment parsing occurs within a context, which is an HTML element * that the document will eventually be placed in. It becomes important * when special elements have different rules than others, such as inside * a TEXTAREA or a TITLE tag where things that look like tags are text, * or inside a SCRIPT tag where things that look like HTML syntax are JS. * * The context value should be a representation of the tag into which the * HTML is found. For most cases this will be the body element. The HTML * form is provided because a context element may have attributes that * impact the parse, such as with a SCRIPT tag and its `type` attribute. * * ## Current HTML Support * * - The only supported context is ``, which is the default value. * - The only supported document encoding is `UTF-8`, which is the default value. * * @since 6.4.0 * @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances. * * @param string $html Input HTML fragment to process. * @param string $context Context element for the fragment, must be default of ``. * @param string $encoding Text encoding of the document; must be default of 'UTF-8'. * @return static|null The created processor if successful, otherwise null. */ public static function create_fragment( $html, $context = '', $encoding = 'UTF-8' ) { if ( '' !== $context || 'UTF-8' !== $encoding ) { return null; } $processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE ); $processor->state->context_node = array( 'BODY', array() ); $processor->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; $processor->state->encoding = $encoding; $processor->state->encoding_confidence = 'certain'; // @todo Create "fake" bookmarks for non-existent but implied nodes. $processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 ); $processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 ); $root_node = new WP_HTML_Token( 'root-node', 'HTML', false ); $processor->state->stack_of_open_elements->push( $root_node ); $context_node = new WP_HTML_Token( 'context-node', $processor->state->context_node[0], false ); $processor->context_node = $context_node; $processor->breadcrumbs = array( 'HTML', $context_node->node_name ); return $processor; } /** * Creates an HTML processor in the full parsing mode. * * It's likely that a fragment parser is more appropriate, unless sending an * entire HTML document from start to finish. Consider a fragment parser with * a context node of ``. * * Since UTF-8 is the only currently-accepted charset, if working with a * document that isn't UTF-8, it's important to convert the document before * creating the processor: pass in the converted HTML. * * @param string $html Input HTML document to process. * @param string|null $known_definite_encoding Optional. If provided, specifies the charset used * in the input byte stream. Currently must be UTF-8. * @return static|null The created processor if successful, otherwise null. */ public static function create_full_parser( $html, $known_definite_encoding = 'UTF-8' ) { if ( 'UTF-8' !== $known_definite_encoding ) { return null; } $processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE ); $processor->state->encoding = $known_definite_encoding; $processor->state->encoding_confidence = 'certain'; return $processor; } /** * Constructor. * * Do not use this method. Use the static creator methods instead. * * @access private * * @since 6.4.0 * * @see WP_HTML_Processor::create_fragment() * * @param string $html HTML to process. * @param string|null $use_the_static_create_methods_instead This constructor should not be called manually. */ public function __construct( $html, $use_the_static_create_methods_instead = null ) { parent::__construct( $html ); if ( self::CONSTRUCTOR_UNLOCK_CODE !== $use_the_static_create_methods_instead ) { _doing_it_wrong( __METHOD__, sprintf( /* translators: %s: WP_HTML_Processor::create_fragment(). */ __( 'Call %s to create an HTML Processor instead of calling the constructor directly.' ), 'WP_HTML_Processor::create_fragment()' ), '6.4.0' ); } $this->state = new WP_HTML_Processor_State(); $this->state->stack_of_open_elements->set_push_handler( function ( WP_HTML_Token $token ): void { $is_virtual = ! isset( $this->state->current_token ) || $this->is_tag_closer(); $same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name; $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance ); $this->change_parsing_namespace( $token->integration_node_type ? 'html' : $token->namespace ); } ); $this->state->stack_of_open_elements->set_pop_handler( function ( WP_HTML_Token $token ): void { $is_virtual = ! isset( $this->state->current_token ) || ! $this->is_tag_closer(); $same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name; $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::POP, $provenance ); $adjusted_current_node = $this->get_adjusted_current_node(); if ( $adjusted_current_node ) { $this->change_parsing_namespace( $adjusted_current_node->integration_node_type ? 'html' : $adjusted_current_node->namespace ); } else { $this->change_parsing_namespace( 'html' ); } } ); /* * Create this wrapper so that it's possible to pass * a private method into WP_HTML_Token classes without * exposing it to any public API. */ $this->release_internal_bookmark_on_destruct = function ( string $name ): void { parent::release_bookmark( $name ); }; } /** * Stops the parser and terminates its execution when encountering unsupported markup. * * @throws WP_HTML_Unsupported_Exception Halts execution of the parser. * * @since 6.7.0 * * @param string $message Explains support is missing in order to parse the current node. */ private function bail( string $message ) { $here = $this->bookmarks[ $this->state->current_token->bookmark_name ]; $token = substr( $this->html, $here->start, $here->length ); $open_elements = array(); foreach ( $this->state->stack_of_open_elements->stack as $item ) { $open_elements[] = $item->node_name; } $active_formats = array(); foreach ( $this->state->active_formatting_elements->walk_down() as $item ) { $active_formats[] = $item->node_name; } $this->last_error = self::ERROR_UNSUPPORTED; $this->unsupported_exception = new WP_HTML_Unsupported_Exception( $message, $this->state->current_token->node_name, $here->start, $token, $open_elements, $active_formats ); throw $this->unsupported_exception; } /** * Returns the last error, if any. * * Various situations lead to parsing failure but this class will * return `false` in all those cases. To determine why something * failed it's possible to request the last error. This can be * helpful to know to distinguish whether a given tag couldn't * be found or if content in the document caused the processor * to give up and abort processing. * * Example * * $processor = WP_HTML_Processor::create_fragment( '