diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 541b5baf0b..79d96dc8be 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -242,6 +242,8 @@ * unquoted values will appear in the output with double-quotes. * * @since 6.2.0 + * @since 6.2.1 Fix: Support for various invalid comments; attribute updates are case-insensitive. + * @since 6.3.2 Fix: Skip HTML-like content inside rawtext elements such as STYLE. */ class WP_HTML_Tag_Processor { /** @@ -568,7 +570,14 @@ class WP_HTML_Tag_Processor { * of the tag name as a pre-check avoids a string allocation when it's not needed. */ $t = $this->html[ $this->tag_name_starts_at ]; - if ( ! $this->is_closing_tag && ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) ) { + if ( + ! $this->is_closing_tag && + ( + 'i' === $t || 'I' === $t || + 'n' === $t || 'N' === $t || + 's' === $t || 'S' === $t || + 't' === $t || 'T' === $t + ) ) { $tag_name = $this->get_tag(); if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) { @@ -580,6 +589,25 @@ class WP_HTML_Tag_Processor { ) { $this->bytes_already_parsed = strlen( $this->html ); return false; + } elseif ( + ( + 'IFRAME' === $tag_name || + 'NOEMBED' === $tag_name || + 'NOFRAMES' === $tag_name || + 'NOSCRIPT' === $tag_name || + 'STYLE' === $tag_name + ) && + ! $this->skip_rawtext( $tag_name ) + ) { + /* + * "XMP" should be here too but its rules are more complicated and require the + * complexity of the HTML Processor (it needs to close out any open P element, + * meaning it can't be skipped here or else the HTML Processor will lose its + * place). For now, it can be ignored as it's a rare HTML tag in practice and + * any normative HTML should be using PRE instead. + */ + $this->bytes_already_parsed = strlen( $this->html ); + return false; } } } while ( $already_found < $this->sought_match_offset ); @@ -710,15 +738,33 @@ class WP_HTML_Tag_Processor { return true; } + /** + * Skips contents of generic rawtext elements. + * + * @since 6.3.2 + * + * @see https://html.spec.whatwg.org/#generic-raw-text-element-parsing-algorithm + * + * @param string $tag_name The uppercase tag name which will close the RAWTEXT region. + * @return bool Whether an end to the RAWTEXT region was found before the end of the document. + */ + private function skip_rawtext( $tag_name ) { + /* + * These two functions distinguish themselves on whether character references are + * decoded, and since functionality to read the inner markup isn't supported, it's + * not necessary to implement these two functions separately. + */ + return $this->skip_rcdata( $tag_name ); + } /** - * Skips contents of title and textarea tags. + * Skips contents of RCDATA elements, namely title and textarea tags. * * @since 6.2.0 * * @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state * - * @param string $tag_name The lowercase tag name which will close the RCDATA region. + * @param string $tag_name The uppercase tag name which will close the RCDATA region. * @return bool Whether an end to the RCDATA region was found before the end of the document. */ private function skip_rcdata( $tag_name ) { diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index 659ffb848e..31430d07a9 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -1871,6 +1871,43 @@ HTML; ); } + /** + * @ticket 59292 + * + * @covers WP_HTML_Tag_Processor::next_tag + * + * @dataProvider data_next_tag_ignores_contents_of_rawtext_tags + * + * @param string $rawtext_element_then_target_node HTML starting with a RAWTEXT-specifying element such as STYLE, + * then an element afterward containing the "target" attribute. + */ + public function test_next_tag_ignores_contents_of_rawtext_tags( $rawtext_element_then_target_node ) { + $processor = new WP_HTML_Tag_Processor( $rawtext_element_then_target_node ); + $processor->next_tag(); + + $processor->next_tag(); + $this->assertNotNull( + $processor->get_attribute( 'target' ), + "Expected to find element with target attribute but found {$processor->get_tag()} instead." + ); + } + + /** + * Data provider. + * + * @return array[]. + */ + public function data_next_tag_ignores_contents_of_rawtext_tags() { + return array( + 'IFRAME' => array( '
' ), + 'NOEMBED' => array( '<p></p>
' ), + 'NOFRAMES' => array( '<p>Check the rules here.</p>
' ), + 'NOSCRIPT' => array( '

' ), + 'STYLE' => array( '

' ), + 'STYLE hiding DIV' => array( '
' ), + ); + } + /** * Ensures that the invalid comment closing syntax "--!>" properly closes a comment. *