HTML API: Add class name utilities has_class() and class_list().

This patch adds two new public methods to the HTML Tag Processor:
 - `has_class()` indicates if a matched tag contains a given CSS class name.
 - `class_list()` returns a generator to iterate over all the class names in a matched tag.

Included in this patch is a refactoring of the internal logic when matching
a tag to reuse the new `has_class()` function. Previously it was relying on
optimized code in the `matches()` function which performed byte-for-byte
class name comparison. With the change in this patch it will perform class
name matching on the decoded value, which might differ if a class attribute
contains character references.

These methods may be useful for running more complicated queries based
on the presence or absence of CSS class names. The use of these methods
avoids the need to manually decode the class attribute as reported by
`$process->get_attribute( 'class' )`.

Props dmsnell.
Fixes #59209.

git-svn-id: https://develop.svn.wordpress.org/trunk@56703 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Bernie Reiter
2023-09-26 09:15:21 +00:00
parent 086010a380
commit cecc810f91
2 changed files with 244 additions and 58 deletions

View File

@@ -498,6 +498,17 @@ class Tests_HtmlApi_WpHtmlTagProcessor extends WP_UnitTestCase {
$this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
}
/**
* @ticket 59209
*
* @covers WP_HTML_Tag_Processor::next_tag
*/
public function test_next_tag_matches_decoded_class_names() {
$p = new WP_HTML_Tag_Processor( '<div class="&lt;egg&gt;">' );
$this->assertTrue( $p->next_tag( array( 'class_name' => '<egg>' ) ), 'Failed to find tag with HTML-encoded class name.' );
}
/**
* @ticket 56299
* @ticket 57852
@@ -1957,6 +1968,150 @@ HTML;
);
}
/**
* @ticket 59209
*
* @covers WP_HTML_Tag_Processor::class_list
*/
public function test_class_list_empty_when_missing_class() {
$p = new WP_HTML_Tag_Processor( '<div>' );
$p->next_tag();
$found_classes = false;
foreach ( $p->class_list() as $class ) {
$found_classes = true;
}
$this->assertFalse( $found_classes, 'Found classes when none exist.' );
}
/**
* @ticket 59209
*
* @covers WP_HTML_Tag_Processor::class_list
*/
public function test_class_list_empty_when_class_is_boolean() {
$p = new WP_HTML_Tag_Processor( '<div class>' );
$p->next_tag();
$found_classes = false;
foreach ( $p->class_list() as $class ) {
$found_classes = true;
}
$this->assertFalse( $found_classes, 'Found classes when none exist.' );
}
/**
* @ticket 59209
*
* @covers WP_HTML_Tag_Processor::class_list
*/
public function test_class_list_empty_when_class_is_empty() {
$p = new WP_HTML_Tag_Processor( '<div class="">' );
$p->next_tag();
$found_classes = false;
foreach ( $p->class_list() as $class ) {
$found_classes = true;
}
$this->assertFalse( $found_classes, 'Found classes when none exist.' );
}
/**
* @ticket 59209
*
* @covers WP_HTML_Tag_Processor::class_list
*/
public function test_class_list_visits_each_class_in_order() {
$p = new WP_HTML_Tag_Processor( '<div class="one two three">' );
$p->next_tag();
$found_classes = array();
foreach ( $p->class_list() as $class ) {
$found_classes[] = $class;
}
$this->assertSame( array( 'one', 'two', 'three' ), $found_classes, 'Failed to visit the class names in their original order.' );
}
/**
* @ticket 59209
*
* @covers WP_HTML_Tag_Processor::class_list
*/
public function test_class_list_decodes_class_names() {
$p = new WP_HTML_Tag_Processor( '<div class="&notin;-class &lt;egg&gt; &#xff03;">' );
$p->next_tag();
$found_classes = array();
foreach ( $p->class_list() as $class ) {
$found_classes[] = $class;
}
$this->assertSame( array( '∉-class', '<egg>', "\u{ff03}" ), $found_classes, 'Failed to report class names in their decoded form.' );
}
/**
* @ticket 59209
*
* @covers WP_HTML_Tag_Processor::class_list
*/
public function test_class_list_visits_unique_class_names_only_once() {
$p = new WP_HTML_Tag_Processor( '<div class="one one &#x6f;ne">' );
$p->next_tag();
$found_classes = array();
foreach ( $p->class_list() as $class ) {
$found_classes[] = $class;
}
$this->assertSame( array( 'one' ), $found_classes, 'Visited multiple copies of the same class name when it should have skipped the duplicates.' );
}
/**
* @ticket 59209
*
* @covers WP_HTML_Tag_Processor::has_class
*
* @dataProvider data_html_with_variations_of_class_values_and_sought_class_names
*
* @param string $html Contains a tag optionally containing a `class` attribute.
* @param string $sought_class Name of class to find in the input tag's `class`.
* @param bool $has_class Whether the sought class exists in the given HTML.
*/
public function test_has_class_handles_expected_class_name_variations( $html, $sought_class, $has_class ) {
$p = new WP_HTML_Tag_Processor( $html );
$p->next_tag();
if ( $has_class ) {
$this->assertTrue( $p->has_class( $sought_class ), "Failed to find expected class {$sought_class}." );
} else {
$this->assertFalse( $p->has_class( $sought_class ), "Found class {$sought_class} when it doesn't exist." );
}
}
/**
* Data provider.
*
* @return array[]
*/
public function data_html_with_variations_of_class_values_and_sought_class_names() {
return array(
'Tag without any classes' => array( '<div>', 'foo', false ),
'Tag with boolean class' => array( '<img class>', 'foo', false ),
'Tag with empty class' => array( '<p class="">', 'foo', false ),
'Tag with exact match' => array( '<button class="foo">', 'foo', true ),
'Tag with duplicate matches' => array( '<span class="foo bar foo">', 'foo', true ),
'Tag with non-initial match' => array( '<section class="bar foo">', 'foo', true ),
'Tag with encoded match' => array( '<main class="&hellip;">', '…', true ),
'Class with tab separator' => array( "<div class='one\ttwo'>", 'two', true ),
'Class with newline separator' => array( "<div class='one\ntwo\n'>", 'two', true ),
'False duplicate attribute' => array( '<img class=dog class=cat>', 'cat', false ),
);
}
/**
* Ensures that the invalid comment closing syntax "--!>" properly closes a comment.
*