diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 541b5baf0b..79d96dc8be 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -242,6 +242,8 @@
* unquoted values will appear in the output with double-quotes.
*
* @since 6.2.0
+ * @since 6.2.1 Fix: Support for various invalid comments; attribute updates are case-insensitive.
+ * @since 6.3.2 Fix: Skip HTML-like content inside rawtext elements such as STYLE.
*/
class WP_HTML_Tag_Processor {
/**
@@ -568,7 +570,14 @@ class WP_HTML_Tag_Processor {
* of the tag name as a pre-check avoids a string allocation when it's not needed.
*/
$t = $this->html[ $this->tag_name_starts_at ];
- if ( ! $this->is_closing_tag && ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) ) {
+ if (
+ ! $this->is_closing_tag &&
+ (
+ 'i' === $t || 'I' === $t ||
+ 'n' === $t || 'N' === $t ||
+ 's' === $t || 'S' === $t ||
+ 't' === $t || 'T' === $t
+ ) ) {
$tag_name = $this->get_tag();
if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) {
@@ -580,6 +589,25 @@ class WP_HTML_Tag_Processor {
) {
$this->bytes_already_parsed = strlen( $this->html );
return false;
+ } elseif (
+ (
+ 'IFRAME' === $tag_name ||
+ 'NOEMBED' === $tag_name ||
+ 'NOFRAMES' === $tag_name ||
+ 'NOSCRIPT' === $tag_name ||
+ 'STYLE' === $tag_name
+ ) &&
+ ! $this->skip_rawtext( $tag_name )
+ ) {
+ /*
+ * "XMP" should be here too but its rules are more complicated and require the
+ * complexity of the HTML Processor (it needs to close out any open P element,
+ * meaning it can't be skipped here or else the HTML Processor will lose its
+ * place). For now, it can be ignored as it's a rare HTML tag in practice and
+ * any normative HTML should be using PRE instead.
+ */
+ $this->bytes_already_parsed = strlen( $this->html );
+ return false;
}
}
} while ( $already_found < $this->sought_match_offset );
@@ -710,15 +738,33 @@ class WP_HTML_Tag_Processor {
return true;
}
+ /**
+ * Skips contents of generic rawtext elements.
+ *
+ * @since 6.3.2
+ *
+ * @see https://html.spec.whatwg.org/#generic-raw-text-element-parsing-algorithm
+ *
+ * @param string $tag_name The uppercase tag name which will close the RAWTEXT region.
+ * @return bool Whether an end to the RAWTEXT region was found before the end of the document.
+ */
+ private function skip_rawtext( $tag_name ) {
+ /*
+ * These two functions distinguish themselves on whether character references are
+ * decoded, and since functionality to read the inner markup isn't supported, it's
+ * not necessary to implement these two functions separately.
+ */
+ return $this->skip_rcdata( $tag_name );
+ }
/**
- * Skips contents of title and textarea tags.
+ * Skips contents of RCDATA elements, namely title and textarea tags.
*
* @since 6.2.0
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
*
- * @param string $tag_name The lowercase tag name which will close the RCDATA region.
+ * @param string $tag_name The uppercase tag name which will close the RCDATA region.
* @return bool Whether an end to the RCDATA region was found before the end of the document.
*/
private function skip_rcdata( $tag_name ) {
diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
index 659ffb848e..31430d07a9 100644
--- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
+++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
@@ -1871,6 +1871,43 @@ HTML;
);
}
+ /**
+ * @ticket 59292
+ *
+ * @covers WP_HTML_Tag_Processor::next_tag
+ *
+ * @dataProvider data_next_tag_ignores_contents_of_rawtext_tags
+ *
+ * @param string $rawtext_element_then_target_node HTML starting with a RAWTEXT-specifying element such as STYLE,
+ * then an element afterward containing the "target" attribute.
+ */
+ public function test_next_tag_ignores_contents_of_rawtext_tags( $rawtext_element_then_target_node ) {
+ $processor = new WP_HTML_Tag_Processor( $rawtext_element_then_target_node );
+ $processor->next_tag();
+
+ $processor->next_tag();
+ $this->assertNotNull(
+ $processor->get_attribute( 'target' ),
+ "Expected to find element with target attribute but found {$processor->get_tag()} instead."
+ );
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array[].
+ */
+ public function data_next_tag_ignores_contents_of_rawtext_tags() {
+ return array(
+ 'IFRAME' => array( '' ),
+ 'NOEMBED' => array( '
' ),
+ 'NOFRAMES' => array( '
Check the rules here.
' ),
+ 'NOSCRIPT' => array( '
' ),
+ 'STYLE' => array( '
' ),
+ 'STYLE hiding DIV' => array( '
' ),
+ );
+ }
+
/**
* Ensures that the invalid comment closing syntax "--!>" properly closes a comment.
*