HTML API: Fix void tag nesting with next_token

When `next_token()` was introduced, it introduced a regression in the HTML
Processor whereby void tags remain on the stack of open elements when they
shouldn't. This led to invalid values returned from `get_breadcrumbs()`.

The reason was that calling `next_token()` works through a different code path
than the HTML Processor runs everything else. To solve this, its sub-classed
`next_token()` called `step( self::REPROCESS_CURRENT_TOKEN )` so that the proper
HTML accounting takes place.

Unfortunately that same reprocessing code path skipped the step whereby void
and self-closing elements are popped from the stack of open elements.

In this patch, that step is run with a third mode for `step()`, which is the
new `self::PROCESS_CURRENT_TOKEN`. This mode acts as if `self::PROCESS_NEXT_NODE`
were called, except it doesn't advance the parser.

Developed in https://github.com/WordPress/wordpress-develop/pull/5975
Discussed in https://core.trac.wordpress.org/ticket/60382

Follow-up to [57348]

Props dmsnell, jonsurrell
Fixes #60382



git-svn-id: https://develop.svn.wordpress.org/trunk@57507 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Dennis Snell 2024-02-01 00:41:40 +00:00
parent 4a2aa99d51
commit cdb218b200
2 changed files with 66 additions and 2 deletions

View File

@ -431,7 +431,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
$found_a_token = parent::next_token();
if ( '#tag' === $this->get_token_type() ) {
$this->step( self::REPROCESS_CURRENT_NODE );
$this->step( self::PROCESS_CURRENT_NODE );
}
return $found_a_token;
@ -513,7 +513,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
return false;
}
if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
if ( self::REPROCESS_CURRENT_NODE !== $node_to_process ) {
/*
* Void elements still hop onto the stack of open elements even though
* there's no corresponding closing tag. This is important for managing
@ -532,7 +532,9 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
if ( $top_node && self::is_void( $top_node->node_name ) ) {
$this->state->stack_of_open_elements->pop();
}
}
if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
while ( parent::next_token() && '#tag' !== $this->get_token_type() ) {
continue;
}
@ -1781,6 +1783,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
*/
const REPROCESS_CURRENT_NODE = 'reprocess-current-node';
/**
* Indicates that the current HTML token should be processed without advancing the parser.
*
* @since 6.5.0
*
* @var string
*/
const PROCESS_CURRENT_NODE = 'process-current-node';
/**
* Indicates that the parser encountered unsupported markup and has bailed.
*

View File

@ -188,6 +188,59 @@ class Tests_HtmlApi_WpHtmlProcessor extends WP_UnitTestCase {
);
}
/**
* Ensure non-nesting tags do not nest when processing tokens.
*
* @ticket 60382
*
* @dataProvider data_void_tags
*
* @param string $tag_name Name of void tag under test.
*/
public function test_cannot_nest_void_tags_next_token( $tag_name ) {
$processor = WP_HTML_Processor::create_fragment( "<{$tag_name}><div>" );
/*
* This HTML represents the same as the following HTML,
* assuming that it were provided `<img>` as the tag:
*
* <html>
* <body>
* <img>
* <div></div>
* </body>
* </html>
*/
$found_tag = $processor->next_token();
if ( WP_HTML_Processor::ERROR_UNSUPPORTED === $processor->get_last_error() ) {
$this->markTestSkipped( "Tag {$tag_name} is not supported." );
}
$this->assertTrue(
$found_tag,
"Could not find first {$tag_name}."
);
$this->assertSame(
array( 'HTML', 'BODY', $tag_name ),
$processor->get_breadcrumbs(),
'Found incorrect nesting of first element.'
);
$this->assertTrue(
$processor->next_token(),
'Should have found the DIV as the second tag.'
);
$this->assertSame(
array( 'HTML', 'BODY', 'DIV' ),
$processor->get_breadcrumbs(),
"DIV should have been a sibling of the {$tag_name}."
);
}
/**
* Data provider.
*