From 3d06660c636430d9b256c660062756d9142e008d Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Fri, 17 Nov 2023 06:11:31 +0000 Subject: [PATCH] HTML API: Add support for containers elements, including ARTICLE. There are a handful of elements which behave similarly and are generically container elements. These are the following elements: ADDRESS, ARTICLE, ASIDE, BLOCKQUOTE, CENTER, DETAILS, DIALOG, DIR, DL, DIV, FIELDSET, FIGCAPTION, FIGURE, FOOTER, HEADER, HGROUP, MAIN, MENU, NAV, SEARCH, SECTION, SUMMARY This patch adds support to the HTML Processor for handling these elements. They do not require any additional logic in the rest of the class, and carry no specific semantic rules for parsing beyond what is listed in their group in the IN BODY section of the HTML5 specification. Props dmsnell. Fixes #59914. git-svn-id: https://develop.svn.wordpress.org/trunk@57115 602fd350-edb4-49c9-b593-d223f7449a82 --- .../html-api/class-wp-html-processor.php | 48 +++++++- .../html-api/wpHtmlProcessorBreadcrumbs.php | 36 +++--- .../html-api/wpHtmlProcessorSemanticRules.php | 107 +++++++++++++++++- 3 files changed, 167 insertions(+), 24 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index b8e1093054..779c7afec3 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -99,12 +99,16 @@ * * The following list specifies the HTML tags that _are_ supported: * + * - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY. + * - Form elements: BUTTON, FIELDSET, SEARCH. + * - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U. + * - Heading elements: HGROUP. * - Links: A. - * - The formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U. - * - Containers: DIV, FIGCAPTION, FIGURE, SPAN. - * - Form elements: BUTTON. + * - Lists: DL. + * - Media elements: FIGCAPTION, FIGURE, IMG. * - Paragraph: P. - * - Void elements: IMG. + * - Sectioning elements: ARTICLE, ASIDE, NAV, SECTION + * - Deprecated elements: CENTER, DIR * * ### Supported markup * @@ -621,11 +625,29 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * > "fieldset", "figcaption", "figure", "footer", "header", "hgroup", * > "main", "menu", "nav", "ol", "p", "search", "section", "summary", "ul" */ + case '+ADDRESS': + case '+ARTICLE': + case '+ASIDE': case '+BLOCKQUOTE': + case '+CENTER': + case '+DETAILS': + case '+DIALOG': + case '+DIR': case '+DIV': + case '+DL': + case '+FIELDSET': case '+FIGCAPTION': case '+FIGURE': + case '+FOOTER': + case '+HEADER': + case '+HGROUP': + case '+MAIN': + case '+MENU': + case '+NAV': case '+P': + case '+SEARCH': + case '+SECTION': + case '+SUMMARY': if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { $this->close_a_p_element(); } @@ -639,11 +661,29 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * > "figcaption", "figure", "footer", "header", "hgroup", "listing", "main", * > "menu", "nav", "ol", "pre", "search", "section", "summary", "ul" */ + case '-ADDRESS': + case '-ARTICLE': + case '-ASIDE': case '-BLOCKQUOTE': case '-BUTTON': + case '-CENTER': + case '-DETAILS': + case '-DIALOG': + case '-DIR': case '-DIV': + case '-DL': + case '-FIELDSET': case '-FIGCAPTION': case '-FIGURE': + case '-FOOTER': + case '-HEADER': + case '-HGROUP': + case '-MAIN': + case '-MENU': + case '-NAV': + case '-SEARCH': + case '-SECTION': + case '-SUMMARY': if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name ) ) { // @TODO: Report parse error. // Ignore the token. diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index d454ab4842..b5e006c69f 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -37,22 +37,40 @@ class Tests_HtmlApi_WpHtmlProcessorBreadcrumbs extends WP_UnitTestCase { public function data_single_tag_of_supported_elements() { $supported_elements = array( 'A', + 'ADDRESS', + 'ARTICLE', + 'ASIDE', 'B', 'BIG', 'BUTTON', + 'CENTER', // Neutralized 'CODE', + 'DETAILS', + 'DIALOG', + 'DIR', 'DIV', + 'DL', 'EM', + 'FIELDSET', 'FIGCAPTION', 'FIGURE', 'FONT', + 'FOOTER', + 'HEADER', + 'HGROUP', 'I', 'IMG', + 'MAIN', + 'MENU', + 'NAV', 'P', + 'SEARCH', + 'SECTION', 'SMALL', 'SPAN', 'STRIKE', 'STRONG', + 'SUMMARY', 'TT', 'U', ); @@ -99,11 +117,8 @@ class Tests_HtmlApi_WpHtmlProcessorBreadcrumbs extends WP_UnitTestCase { $unsupported_elements = array( 'ABBR', 'ACRONYM', // Neutralized - 'ADDRESS', 'APPLET', // Deprecated 'AREA', - 'ARTICLE', - 'ASIDE', 'AUDIO', 'BASE', 'BDI', @@ -114,7 +129,6 @@ class Tests_HtmlApi_WpHtmlProcessorBreadcrumbs extends WP_UnitTestCase { 'BR', 'CANVAS', 'CAPTION', - 'CENTER', // Neutralized 'CITE', 'COL', 'COLGROUP', @@ -122,14 +136,9 @@ class Tests_HtmlApi_WpHtmlProcessorBreadcrumbs extends WP_UnitTestCase { 'DATALIST', 'DD', 'DEL', - 'DETAILS', 'DEFN', - 'DIALOG', - 'DL', 'DT', 'EMBED', - 'FIELDSET', - 'FOOTER', 'FORM', 'FRAME', 'FRAMESET', @@ -140,8 +149,6 @@ class Tests_HtmlApi_WpHtmlProcessorBreadcrumbs extends WP_UnitTestCase { 'H5', 'H6', 'HEAD', - 'HEADER', - 'HGROUP', 'HR', 'HTML', 'IFRAME', @@ -155,16 +162,13 @@ class Tests_HtmlApi_WpHtmlProcessorBreadcrumbs extends WP_UnitTestCase { 'LI', 'LINK', 'LISTING', // Deprecated, use PRE instead. - 'MAIN', 'MAP', 'MARK', 'MARQUEE', // Deprecated 'MATH', - 'MENU', 'META', 'METER', 'MULTICOL', // Deprecated - 'NAV', 'NEXTID', // Deprecated 'NOBR', // Neutralized 'NOEMBED', // Neutralized @@ -187,14 +191,12 @@ class Tests_HtmlApi_WpHtmlProcessorBreadcrumbs extends WP_UnitTestCase { 'RUBY', 'SAMP', 'SCRIPT', - 'SECTION', 'SELECT', 'SLOT', 'SOURCE', 'SPACER', // Deprecated 'STYLE', 'SUB', - 'SUMMARY', 'SUP', 'SVG', 'TABLE', @@ -348,6 +350,8 @@ class Tests_HtmlApi_WpHtmlProcessorBreadcrumbs extends WP_UnitTestCase { array( 'HTML', 'BODY', 'DIV', 'DIV', 'DIV', 'DIV', 'DIV', 'DIV', 'DIV', 'DIV', 'DIV', 'STRONG', 'EM', 'CODE' ), 2, ), + 'MAIN inside MAIN inside SPAN' => array( '
', array( 'HTML', 'BODY', 'SPAN', 'MAIN', 'MAIN' ), 1 ), + 'MAIN next to unclosed P' => array( '

', array( 'HTML', 'BODY', 'MAIN' ), 1 ), ); } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php index 01bb41ba84..cb351eed61 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php @@ -16,6 +16,105 @@ class Tests_HtmlApi_WpHtmlProcessorSemanticRules extends WP_UnitTestCase { * RULES FOR "IN BODY" MODE *******************************************************************/ + /** + * Verifies that tags in the container group, including the ARTICLE element, + * close out an open P element if one exists. + * + * @covers WP_HTML_Processor::step_in_body + * + * @ticket 59914 + * + * @dataProvider data_article_container_group + * + * @param string $tag_name Name of tag in group under test. + */ + public function test_in_body_article_group_closes_open_p_element( $tag_name ) { + $processor = WP_HTML_Processor::create_fragment( "

<{$tag_name} target>" ); + + while ( $processor->next_tag() && null === $processor->get_attribute( 'target' ) ) { + continue; + } + + $this->assertEquals( + $tag_name, + $processor->get_tag(), + "Expected to find {$tag_name} but found {$processor->get_tag()} instead." + ); + + $this->assertSame( + array( 'HTML', 'BODY', $tag_name ), + $processor->get_breadcrumbs(), + "Expected to find {$tag_name} as direct child of BODY as a result of implicitly closing an open P element." + ); + } + + /** + * Verifies that tags in the container group, including the ARTICLE element, + * nest inside each other despite being invalid in most cases. + * + * @covers WP_HTML_Processor::step_in_body + * + * @ticket 59914 + * + * @dataProvider data_article_container_group + * + * @param string $tag_name Name of tag in group under test. + */ + public function test_in_body_article_group_can_nest_inside_itself( $tag_name ) { + $processor = WP_HTML_Processor::create_fragment( "

<{$tag_name}><{$tag_name}><{$tag_name}><{$tag_name} target>" ); + + while ( $processor->next_tag() && null === $processor->get_attribute( 'target' ) ) { + continue; + } + + $this->assertSame( + array( 'HTML', 'BODY', 'DIV', $tag_name, $tag_name, 'SPAN', $tag_name ), + $processor->get_breadcrumbs(), + "Expected to find {$tag_name} deeply nested inside itself." + ); + } + + /** + * Data provider. + * + * @return array[]. + */ + public function data_article_container_group() { + $group = array(); + + foreach ( + array( + 'ADDRESS', + 'ARTICLE', + 'ASIDE', + 'BLOCKQUOTE', + 'CENTER', + 'DETAILS', + 'DIALOG', + 'DIR', + 'DL', + 'DIV', + 'FIELDSET', + 'FIGCAPTION', + 'FIGURE', + 'FOOTER', + 'HEADER', + 'HGROUP', + 'MAIN', + 'MENU', + 'NAV', + 'SEARCH', + 'SECTION', + 'SUMMARY', + ) + as $tag_name + ) { + $group[ $tag_name ] = array( $tag_name ); + } + + return $group; + } + /** * Verifies that when encountering an end tag for which there is no corresponding * element in scope, that it skips the tag entirely. @@ -142,11 +241,11 @@ class Tests_HtmlApi_WpHtmlProcessorSemanticRules extends WP_UnitTestCase { * that the HTML processor ignores the end tag if there's a special * element on the stack of open elements before the matching opening. * + * @covers WP_HTML_Processor::step_in_body + * * @ticket 58907 * * @since 6.4.0 - * - * @covers WP_HTML_Processor::step_in_body */ public function test_in_body_any_other_end_tag_with_unclosed_special_element() { $p = WP_HTML_Processor::create_fragment( '

' ); @@ -165,11 +264,11 @@ class Tests_HtmlApi_WpHtmlProcessorSemanticRules extends WP_UnitTestCase { * that the HTML processor closes appropriate elements on the stack of * open elements up to the matching opening. * + * @covers WP_HTML_Processor::step_in_body + * * @ticket 58907 * * @since 6.4.0 - * - * @covers WP_HTML_Processor::step_in_body */ public function test_in_body_any_other_end_tag_with_unclosed_non_special_element() { $p = WP_HTML_Processor::create_fragment( '
' );