From 32dd59bb9a1b72bfc04c1dd5397a5542a81ec73c Mon Sep 17 00:00:00 2001 From: bernhard-reiter Date: Wed, 13 Dec 2023 17:51:42 +0000 Subject: [PATCH] HTML API: Add support for H1-H6 elements in the HTML Processor. Previously these have been unsupported, but in this patch, support is added for the tags so that the HTML Processor can process documents containing them. There was a design discussion about introducing a constant to communicate "any of the H1 - H6 elements" but this posed a number of challenges that don't need to be answered in this patch. For the time being, because the HTML specification treats H1 - H6 specially as a single kind of element, the HTML Processor uses an internal hard-coded string to indicate this. By using a hard-coded string it's possible to avoid introducing a class constant which cannot be made private due to PHP's class design. In the future, this will probably appear as a special constant in a new constant-containing class. Props dmsnell, jonsurrell. Fixes #60060. git-svn-id: https://develop.svn.wordpress.org/trunk@57186 602fd350-edb4-49c9-b593-d223f7449a82 --- .../html-api/class-wp-html-open-elements.php | 14 ++ .../html-api/class-wp-html-processor.php | 56 +++++++- .../tests/html-api/wpHtmlProcessor.php | 2 - .../html-api/wpHtmlProcessorBreadcrumbs.php | 54 ++++---- .../html-api/wpHtmlProcessorSemanticRules.php | 16 +-- ...lProcessorSemanticRulesHeadingElements.php | 126 ++++++++++++++++++ 6 files changed, 228 insertions(+), 40 deletions(-) create mode 100644 tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRulesHeadingElements.php diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php index fe5625545b..55c4d3a663 100644 --- a/src/wp-includes/html-api/class-wp-html-open-elements.php +++ b/src/wp-includes/html-api/class-wp-html-open-elements.php @@ -116,6 +116,13 @@ class WP_HTML_Open_Elements { return true; } + if ( + '(internal: H1 through H6 - do not use)' === $tag_name && + in_array( $node->node_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true ) + ) { + return true; + } + switch ( $node->node_name ) { case 'HTML': return false; @@ -270,6 +277,13 @@ class WP_HTML_Open_Elements { foreach ( $this->walk_up() as $item ) { $this->pop(); + if ( + '(internal: H1 through H6 - do not use)' === $tag_name && + in_array( $item->node_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true ) + ) { + return true; + } + if ( $tag_name === $item->node_name ) { return true; } diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 779c7afec3..14dfc3aa86 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -102,7 +102,7 @@ * - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY. * - Form elements: BUTTON, FIELDSET, SEARCH. * - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U. - * - Heading elements: HGROUP. + * - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP. * - Links: A. * - Lists: DL. * - Media elements: FIGCAPTION, FIGURE, IMG. @@ -697,6 +697,60 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { $this->state->stack_of_open_elements->pop_until( $tag_name ); return true; + /* + * > A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" + */ + case '+H1': + case '+H2': + case '+H3': + case '+H4': + case '+H5': + case '+H6': + if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { + $this->close_a_p_element(); + } + + if ( + in_array( + $this->state->stack_of_open_elements->current_node()->node_name, + array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), + true + ) + ) { + // @TODO: Indicate a parse error once it's possible. + $this->state->stack_of_open_elements->pop(); + } + + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" + */ + case '-H1': + case '-H2': + case '-H3': + case '-H4': + case '-H5': + case '-H6': + if ( ! $this->state->stack_of_open_elements->has_element_in_scope( '(internal: H1 through H6 - do not use)' ) ) { + /* + * This is a parse error; ignore the token. + * + * @TODO: Indicate a parse error once it's possible. + */ + return $this->step(); + } + + $this->generate_implied_end_tags(); + + if ( $this->state->stack_of_open_elements->current_node()->node_name !== $tag_name ) { + // @TODO: Record parse error: this error doesn't impact parsing. + } + + $this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' ); + return true; + /* * > An end tag whose tag name is "p" */ diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 37e3aa5de8..a9af5d790f 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -91,8 +91,6 @@ class Tests_HtmlApi_WpHtmlProcessor extends WP_UnitTestCase { * * @covers WP_HTML_Processor::next_tag * @covers WP_HTML_Processor::seek - * - * @throws WP_HTML_Unsupported_Exception */ public function test_clear_to_navigate_after_seeking() { $p = WP_HTML_Processor::create_fragment( '

' ); diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index b5e006c69f..2fd852e434 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -56,6 +56,12 @@ class Tests_HtmlApi_WpHtmlProcessorBreadcrumbs extends WP_UnitTestCase { 'FIGURE', 'FONT', 'FOOTER', + 'H1', + 'H2', + 'H3', + 'H4', + 'H5', + 'H6', 'HEADER', 'HGROUP', 'I', @@ -142,12 +148,6 @@ class Tests_HtmlApi_WpHtmlProcessorBreadcrumbs extends WP_UnitTestCase { 'FORM', 'FRAME', 'FRAMESET', - 'H1', - 'H2', - 'H3', - 'H4', - 'H5', - 'H6', 'HEAD', 'HR', 'HTML', @@ -352,6 +352,14 @@ class Tests_HtmlApi_WpHtmlProcessorBreadcrumbs extends WP_UnitTestCase { ), 'MAIN inside MAIN inside SPAN' => array( '
', array( 'HTML', 'BODY', 'SPAN', 'MAIN', 'MAIN' ), 1 ), 'MAIN next to unclosed P' => array( '

', array( 'HTML', 'BODY', 'MAIN' ), 1 ), + + // H1 - H6 close out _any_ H1 - H6 when encountering _any_ of H1 - H6, making this section surprising. + 'EM inside H3 after unclosed P' => array( '

Important Message

', array( 'HTML', 'BODY', 'H3', 'EM' ), 1 ), + 'H4 after H2' => array( '

Major

Minor

', array( 'HTML', 'BODY', 'H4' ), 1 ), + 'H4 after unclosed H2' => array( '

Major

Minor

', array( 'HTML', 'BODY', 'H4' ), 1 ), + 'H4 inside H2' => array( '

Major

Minor

', array( 'HTML', 'BODY', 'H2', 'SPAN', 'H4' ), 1 ), + 'H5 after unclosed H4 inside H2' => array( '

Major

Minor

', array( 'HTML', 'BODY', 'H2', 'SPAN', 'H5' ), 1 ), + 'H5 after H4 inside H2' => array( '

Major

Minor

', array( 'HTML', 'BODY', 'H5' ), 1 ), ); } @@ -387,29 +395,29 @@ class Tests_HtmlApi_WpHtmlProcessorBreadcrumbs extends WP_UnitTestCase { public function data_html_with_breadcrumbs_of_various_specificity() { return array( // Test with void elements. - 'Inner IMG' => array( '
', array( 'span', 'figure', 'img' ), true ), - 'Inner IMG wildcard' => array( '
', array( 'span', '*', 'img' ), true ), - 'Inner IMG no wildcard' => array( '
', array( 'span', 'img' ), false ), - 'Full specification' => array( '
', array( 'html', 'body', 'div', 'span', 'figure', 'img' ), true ), - 'Invalid Full specification' => array( '
', array( 'html', 'div', 'span', 'figure', 'img' ), false ), + 'Inner IMG' => array( '
', array( 'span', 'figure', 'img' ), true ), + 'Inner IMG wildcard' => array( '
', array( 'span', '*', 'img' ), true ), + 'Inner IMG no wildcard' => array( '
', array( 'span', 'img' ), false ), + 'Full specification' => array( '
', array( 'html', 'body', 'div', 'span', 'figure', 'img' ), true ), + 'Invalid Full specification' => array( '
', array( 'html', 'div', 'span', 'figure', 'img' ), false ), // Test also with non-void elements that open and close. - 'Inner P' => array( '

', array( 'span', 'figure', 'p' ), true ), - 'Inner P wildcard' => array( '

', array( 'span', '*', 'p' ), true ), - 'Inner P no wildcard' => array( '

', array( 'span', 'p' ), false ), - 'Full specification (P)' => array( '

', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), true ), - 'Invalid Full specification (P)' => array( '

', array( 'html', 'div', 'span', 'figure', 'p' ), false ), + 'Inner P' => array( '

', array( 'span', 'figure', 'p' ), true ), + 'Inner P wildcard' => array( '

', array( 'span', '*', 'p' ), true ), + 'Inner P no wildcard' => array( '

', array( 'span', 'p' ), false ), + 'Full specification (P)' => array( '

', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), true ), + 'Invalid Full specification (P)' => array( '

', array( 'html', 'div', 'span', 'figure', 'p' ), false ), // Ensure that matches aren't on tag closers. - 'Inner P' => array( '

', array( 'span', 'figure', 'p' ), false ), - 'Inner P wildcard' => array( '

', array( 'span', '*', 'p' ), false ), - 'Inner P no wildcard' => array( '

', array( 'span', 'p' ), false ), - 'Full specification (P)' => array( '

', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), false ), - 'Invalid Full specification (P)' => array( '

', array( 'html', 'div', 'span', 'figure', 'p' ), false ), + 'Inner P (Closer)' => array( '

', array( 'span', 'figure', 'p' ), false ), + 'Inner P wildcard (Closer)' => array( '

', array( 'span', '*', 'p' ), false ), + 'Inner P no wildcard (Closer)' => array( '

', array( 'span', 'p' ), false ), + 'Full specification (P) (Closer)' => array( '

', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), false ), + 'Invalid Full specification (P) (Closer)' => array( '

', array( 'html', 'div', 'span', 'figure', 'p' ), false ), // Test wildcard behaviors. - 'Single wildcard element' => array( '

', array( '*' ), true ), - 'Child of wildcard element' => array( '

', array( 'SPAN', '*' ), true ), + 'Single wildcard element' => array( '

', array( '*' ), true ), + 'Child of wildcard element' => array( '

', array( 'SPAN', '*' ), true ), ); } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php index cb351eed61..7bd243d8dc 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php @@ -120,10 +120,6 @@ class Tests_HtmlApi_WpHtmlProcessorSemanticRules extends WP_UnitTestCase { * element in scope, that it skips the tag entirely. * * @ticket 58961 - * - * @since 6.4.0 - * - * @throws Exception */ public function test_in_body_skips_unexpected_button_closer() { $p = WP_HTML_Processor::create_fragment( '
Test
' ); @@ -145,10 +141,6 @@ class Tests_HtmlApi_WpHtmlProcessorSemanticRules extends WP_UnitTestCase { * Verifies insertion of a BUTTON element when no existing BUTTON is already in scope. * * @ticket 58961 - * - * @since 6.4.0 - * - * @throws WP_HTML_Unsupported_Exception */ public function test_in_body_button_with_no_button_in_scope() { $p = WP_HTML_Processor::create_fragment( '

Click the button !

' ); @@ -174,8 +166,6 @@ class Tests_HtmlApi_WpHtmlProcessorSemanticRules extends WP_UnitTestCase { * @ticket 58961 * * @since 6.4.0 - * - * @throws WP_HTML_Unsupported_Exception */ public function test_in_body_button_with_button_in_scope_as_parent() { $p = WP_HTML_Processor::create_fragment( '

Click the button !

' ); @@ -209,8 +199,6 @@ class Tests_HtmlApi_WpHtmlProcessorSemanticRules extends WP_UnitTestCase { * @ticket 58961 * * @since 6.4.0 - * - * @throws WP_HTML_Unsupported_Exception */ public function test_in_body_button_with_button_in_scope_as_ancestor() { $p = WP_HTML_Processor::create_fragment( '
!

' ); @@ -236,7 +224,7 @@ class Tests_HtmlApi_WpHtmlProcessorSemanticRules extends WP_UnitTestCase { $this->assertSame( array( 'HTML', 'BODY', 'BUTTON' ), $p->get_breadcrumbs(), 'Failed to produce expected DOM nesting for third button.' ); } - /* + /** * Verifies that when "in body" and encountering "any other end tag" * that the HTML processor ignores the end tag if there's a special * element on the stack of open elements before the matching opening. @@ -259,7 +247,7 @@ class Tests_HtmlApi_WpHtmlProcessorSemanticRules extends WP_UnitTestCase { $this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN', 'DIV' ), $p->get_breadcrumbs(), 'Failed to produce expected DOM nesting: SPAN should still be open and DIV should be its child.' ); } - /* + /** * Verifies that when "in body" and encountering "any other end tag" * that the HTML processor closes appropriate elements on the stack of * open elements up to the matching opening. diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRulesHeadingElements.php b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRulesHeadingElements.php new file mode 100644 index 0000000000..d8d70acb61 --- /dev/null +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRulesHeadingElements.php @@ -0,0 +1,126 @@ +Open<{$tag_name}>Closed P

" + ); + + $processor->next_tag( $tag_name ); + $this->assertSame( + array( 'HTML', 'BODY', $tag_name ), + $processor->get_breadcrumbs(), + "Expected {$tag_name} to be a direct child of the BODY, having closed the open P element." + ); + + $processor->next_tag( 'IMG' ); + $this->assertSame( + array( 'HTML', 'BODY', 'IMG' ), + $processor->get_breadcrumbs(), + 'Expected IMG to be a direct child of BODY, having closed the open P element.' + ); + } + + /** + * Data provider. + * + * @return array[]. + */ + public function data_heading_elements() { + return array( + 'H1' => array( 'H1' ), + 'H2' => array( 'H2' ), + 'H3' => array( 'H3' ), + 'H4' => array( 'H4' ), + 'H5' => array( 'H5' ), + 'H6' => array( 'H5' ), + ); + } + + /** + * Verifies that H1 through H6 elements close an open H1 through H6 element. + * + * @ticket 60060 + * + * @covers WP_HTML_Processor::step + * + * @dataProvider data_heading_combinations + * + * @param string $first_heading H1 - H6 element appearing (unclosed) before the second. + * @param string $second_heading H1 - H6 element appearing after the first. + */ + public function test_in_body_heading_element_closes_other_heading_elements( $first_heading, $second_heading ) { + $processor = WP_HTML_Processor::create_fragment( + "
<{$first_heading} first> then <{$second_heading} second> and end
" + ); + + while ( $processor->next_tag() && null === $processor->get_attribute( 'second' ) ) { + continue; + } + + $this->assertTrue( + $processor->get_attribute( 'second' ), + "Failed to find expected {$second_heading} tag." + ); + + $this->assertSame( + array( 'HTML', 'BODY', 'DIV', $second_heading ), + $processor->get_breadcrumbs(), + "Expected {$second_heading} to be a direct child of the DIV, having closed the open {$first_heading} element." + ); + + $processor->next_tag( 'IMG' ); + $this->assertSame( + array( 'HTML', 'BODY', 'DIV', 'IMG' ), + $processor->get_breadcrumbs(), + "Expected IMG to be a direct child of DIV, having closed the open {$first_heading} element." + ); + } + + /** + * Data provider. + * + * @return array[] + */ + public function data_heading_combinations() { + $headings = array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ); + + $combinations = array(); + + // Create all unique pairs of H1 - H6 elements. + foreach ( $headings as $first_tag ) { + foreach ( $headings as $second_tag ) { + $combinations[ "{$first_tag} then {$second_tag}" ] = array( $first_tag, $second_tag ); + } + } + + return $combinations; + } +}