HTML API: Fix CDATA lookalike matching invalid CDATA

When `next_token()` was introduced to the HTML Tag Processor, it started
classifying comments that look like they were intended to be CDATA sections.
In one of the changes made during development, however, a typo slipped
through code review that treated comments as CDATA even if they only
ended in `]>` and not the required `]]>`.

The consequences of this defect were minor because in all cases these are
treated as HTML comments from invalid syntax, but this patch adds the
missing check to ensure the proper reporting of CDATA-lookalikes.

Follow-up to [57348]

Props jonsurrell
Fixes #60406



git-svn-id: https://develop.svn.wordpress.org/trunk@57506 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Dennis Snell 2024-02-01 00:10:19 +00:00
parent 5e33f4be2d
commit 4a2aa99d51
2 changed files with 40 additions and 1 deletions

View File

@ -1762,7 +1762,8 @@ class WP_HTML_Tag_Processor {
'T' === $html[ $this->token_starts_at + 6 ] &&
'A' === $html[ $this->token_starts_at + 7 ] &&
'[' === $html[ $this->token_starts_at + 8 ] &&
']' === $html[ $closer_at - 1 ]
']' === $html[ $closer_at - 1 ] &&
']' === $html[ $closer_at - 2 ]
) {
$this->parser_state = self::STATE_COMMENT;
$this->comment_type = self::COMMENT_AS_CDATA_LOOKALIKE;

View File

@ -347,6 +347,38 @@ HTML
);
}
/**
* Ensures that normative CDATA sections are properly parsed.
*
* @ticket 60406
*
* @since 6.5.0
*
* @covers WP_HTML_Tag_Processor::next_token
*/
public function test_cdata_comment_with_incorrect_closer() {
$processor = new WP_HTML_Tag_Processor( '<![CDATA[this is missing a closing square bracket]>' );
$processor->next_token();
$this->assertSame(
'#comment',
$processor->get_token_name(),
"Should have found comment token but found {$processor->get_token_name()} instead."
);
$this->assertSame(
WP_HTML_Processor::COMMENT_AS_INVALID_HTML,
$processor->get_comment_type(),
'Should have detected invalid HTML comment.'
);
$this->assertSame(
'[CDATA[this is missing a closing square bracket]',
$processor->get_modifiable_text(),
'Found incorrect modifiable text.'
);
}
/**
* Ensures that abruptly-closed CDATA sections are properly parsed as comments.
*
@ -366,6 +398,12 @@ HTML
"Should have found a bogus comment but found {$processor->get_token_name()} instead."
);
$this->assertSame(
WP_HTML_Processor::COMMENT_AS_INVALID_HTML,
$processor->get_comment_type(),
'Should have detected invalid HTML comment.'
);
$this->assertNull(
$processor->get_tag(),
'Should not have been able to query tag name on non-element token.'