Permalinks: Sanitize non-visible characters inside sanitize_title_with_dashes().

This change prevents non-visible characters in titles from creating encoded values in permalinks, opting instead for the following replacement strategy:

* Non-visible non-zero-width characters are replaced with hyphens
* Non-visible zero-width characters are removed entirely

Included with this change are 64 additional PHPUnit assertions to confirm that only the targeted non-visible characters are sanitized as intended.

Before this change, URLs would unintentionally contain encoded values where these non-visible characters were. After this change, URLs intentionally strip out or hyphenate these non-visible characters.

Props costdev, dhanendran, hellofromtonya, paaljoachim, peterwilsoncc, poena, sergeybiryukov.

Fixes #47912.

git-svn-id: https://develop.svn.wordpress.org/trunk@51984 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
John James Jacoby 2021-11-02 18:46:36 +00:00
parent d7518d1927
commit 8f9eea80f9
2 changed files with 244 additions and 0 deletions

View File

@ -2288,11 +2288,45 @@ function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'displa
'%cc%80',
'%cc%84',
'%cc%8c',
// Non-visible characters that display without a width.
'%e2%80%8b',
'%e2%80%8c',
'%e2%80%8d',
'%e2%80%8e',
'%e2%80%8f',
'%e2%80%aa',
'%e2%80%ab',
'%e2%80%ac',
'%e2%80%ad',
'%e2%80%ae',
'%ef%bb%bf',
),
'',
$title
);
// Convert non-visible characters that display with a width to hyphen.
$title = str_replace(
array(
'%e2%80%80',
'%e2%80%81',
'%e2%80%82',
'%e2%80%83',
'%e2%80%84',
'%e2%80%85',
'%e2%80%86',
'%e2%80%87',
'%e2%80%88',
'%e2%80%89',
'%e2%80%8a',
'%e2%80%a8',
'%e2%80%a9',
'%e2%80%af',
),
'-',
$title
);
// Convert &times to 'x'.
$title = str_replace( '%c3%97', 'x', $title );
}

View File

@ -147,4 +147,214 @@ class Tests_Formatting_SanitizeTitleWithDashes extends WP_UnitTestCase {
$this->assertSame( 'aaaa', sanitize_title_with_dashes( 'ááa´aˊ', '', 'save' ) );
}
/**
* @ticket 47912
* @dataProvider data_removes_non_visible_characters_without_width
*
* @param string $title The title to be sanitized.
* @param string $expected Expected sanitized title.
*/
public function test_removes_non_visible_characters_without_width( $title, $expected = '' ) {
$this->assertSame( $expected, sanitize_title_with_dashes( $title, '', 'save' ) );
}
/**
* Data provider.
*
* @return array
*/
public function data_removes_non_visible_characters_without_width() {
return array(
// Only the non-visible characters.
'only %e2%80%8b' => array( '%e2%80%8b' ),
'only %e2%80%8c' => array( '%e2%80%8c' ),
'only %e2%80%8d' => array( '%e2%80%8d' ),
'only %e2%80%8e' => array( '%e2%80%8e' ),
'only %e2%80%8f' => array( '%e2%80%8f' ),
'only %e2%80%aa' => array( '%e2%80%aa' ),
'only %e2%80%ab' => array( '%e2%80%ab' ),
'only %e2%80%ac' => array( '%e2%80%ac' ),
'only %e2%80%ad' => array( '%e2%80%ad' ),
'only %e2%80%ae' => array( '%e2%80%ae' ),
'only %ef%bb%bf' => array( '%ef%bb%bf' ),
// Non-visible characters within the title.
'in middle of title' => array(
'title' => 'Nonvisible %ef%bb%bfin middle of title',
'expected' => 'nonvisible-in-middle-of-title',
),
'at start of title' => array(
'title' => '%e2%80%8bNonvisible at start of title',
'expected' => 'nonvisible-at-start-of-title',
),
'at end of title' => array(
'title' => 'Nonvisible at end of title %e2%80%8b',
'expected' => 'nonvisible-at-end-of-title',
),
'randomly in title' => array(
'title' => 'Nonvisible%ef%bb%bf %e2%80%aerandomly %e2%80%8ein the %e2%80%8e title%e2%80%8e',
'expected' => 'nonvisible-randomly-in-the-title',
),
);
}
/**
* @ticket 47912
* @dataProvider data_non_visible_characters_without_width_when_not_save
*
* @param string $title The title to be sanitized.
* @param string $expected Expected sanitized title.
*/
public function test_non_visible_characters_without_width_when_not_save( $title, $expected = '' ) {
$this->assertSame( $expected, sanitize_title_with_dashes( $title ) );
}
/**
* Data provider.
*
* @return array
*/
public function data_non_visible_characters_without_width_when_not_save() {
return array(
// Just the non-visible characters.
'only %e2%80%8b' => array( '%e2%80%8b', '%e2%80%8b' ),
'only %e2%80%8c' => array( '%e2%80%8c', '%e2%80%8c' ),
'only %e2%80%8d' => array( '%e2%80%8d', '%e2%80%8d' ),
'only %e2%80%8e' => array( '%e2%80%8e', '%e2%80%8e' ),
'only %e2%80%8f' => array( '%e2%80%8f', '%e2%80%8f' ),
'only %e2%80%aa' => array( '%e2%80%aa', '%e2%80%aa' ),
'only %e2%80%ab' => array( '%e2%80%ab', '%e2%80%ab' ),
'only %e2%80%ac' => array( '%e2%80%ac', '%e2%80%ac' ),
'only %e2%80%ad' => array( '%e2%80%ad', '%e2%80%ad' ),
'only %e2%80%ae' => array( '%e2%80%ae', '%e2%80%ae' ),
'only %ef%bb%bf' => array( '%ef%bb%bf', '%ef%bb%bf' ),
// Non-visible characters within the title.
'in middle of title' => array(
'title' => 'Nonvisible %ef%bb%bfin middle of title',
'expected' => 'nonvisible-%ef%bb%bfin-middle-of-title',
),
'at start of title' => array(
'title' => '%e2%80%8bNonvisible at start of title',
'expected' => '%e2%80%8bnonvisible-at-start-of-title',
),
'at end of title' => array(
'title' => 'Nonvisible at end of title %e2%80%8b',
'expected' => 'nonvisible-at-end-of-title-%e2%80%8b',
),
'randomly in title' => array(
'title' => 'Nonvisible%ef%bb%bf %e2%80%aerandomly %e2%80%8ein the %e2%80%8e title%e2%80%8e',
'expected' => 'nonvisible%ef%bb%bf-%e2%80%aerandomly-%e2%80%8ein-the-%e2%80%8e-title%e2%80%8e',
),
);
}
/**
* @ticket 47912
* @dataProvider data_converts_non_visible_characters_with_width_to_hyphen
*
* @param string $title The title to be sanitized.
* @param string $expected Expected sanitized title.
*/
public function test_converts_non_visible_characters_with_width_to_hyphen( $title, $expected = '' ) {
$this->assertSame( $expected, sanitize_title_with_dashes( $title, '', 'save' ) );
}
/**
* Data provider.
*
* @return array
*/
public function data_converts_non_visible_characters_with_width_to_hyphen() {
return array(
// Only the non-visible characters.
'only %e2%80%80' => array( '%e2%80%80' ),
'only %e2%80%81' => array( '%e2%80%81' ),
'only %e2%80%82' => array( '%e2%80%82' ),
'only %e2%80%83' => array( '%e2%80%83' ),
'only %e2%80%84' => array( '%e2%80%84' ),
'only %e2%80%85' => array( '%e2%80%85' ),
'only %e2%80%86' => array( '%e2%80%86' ),
'only %e2%80%87' => array( '%e2%80%87' ),
'only %e2%80%88' => array( '%e2%80%88' ),
'only %e2%80%89' => array( '%e2%80%89' ),
'only %e2%80%8a' => array( '%e2%80%8a' ),
'only %e2%80%a8' => array( '%e2%80%a8' ),
'only %e2%80%a9' => array( '%e2%80%a9' ),
'only %e2%80%af' => array( '%e2%80%af' ),
// Non-visible characters within the title.
'in middle of title' => array(
'title' => 'Nonvisible %e2%80%82 in middle of title',
'expected' => 'nonvisible-in-middle-of-title',
),
'at start of title' => array(
'title' => '%e2%80%83Nonvisible at start of title',
'expected' => 'nonvisible-at-start-of-title',
),
'at end of title' => array(
'title' => 'Nonvisible at end of title %e2%80%81',
'expected' => 'nonvisible-at-end-of-title',
),
'two end of title' => array(
'title' => 'Nonvisible at end of title %e2%80%81 %e2%80%af',
'expected' => 'nonvisible-at-end-of-title',
),
'randomly in title' => array(
'title' => 'Nonvisible%e2%80%80 %e2%80%a9randomly %e2%80%87in the %e2%80%a8 title%e2%80%af',
'expected' => 'nonvisible-randomly-in-the-title',
),
);
}
/**
* @ticket 47912
* @dataProvider data_non_visible_characters_with_width_to_hyphen_when_not_save
*
* @param string $title The title to be sanitized.
* @param string $expected Expected sanitized title.
*/
public function test_non_visible_characters_with_width_to_hyphen_when_not_save( $title, $expected = '' ) {
$this->assertSame( $expected, sanitize_title_with_dashes( $title ) );
}
/**
* Data provider.
*
* @return array
*/
public function data_non_visible_characters_with_width_to_hyphen_when_not_save() {
return array(
// Just the non-visible characters.
'only %e2%80%8b' => array( '%e2%80%8b', '%e2%80%8b' ),
'only %e2%80%8c' => array( '%e2%80%8c', '%e2%80%8c' ),
'only %e2%80%8d' => array( '%e2%80%8d', '%e2%80%8d' ),
'only %e2%80%8e' => array( '%e2%80%8e', '%e2%80%8e' ),
'only %e2%80%8f' => array( '%e2%80%8f', '%e2%80%8f' ),
'only %e2%80%aa' => array( '%e2%80%aa', '%e2%80%aa' ),
'only %e2%80%ab' => array( '%e2%80%ab', '%e2%80%ab' ),
'only %e2%80%ac' => array( '%e2%80%ac', '%e2%80%ac' ),
'only %e2%80%ad' => array( '%e2%80%ad', '%e2%80%ad' ),
'only %e2%80%ae' => array( '%e2%80%ae', '%e2%80%ae' ),
'only %ef%bb%bf' => array( '%ef%bb%bf', '%ef%bb%bf' ),
// Non-visible characters within the title.
'in middle of title' => array(
'title' => 'Nonvisible %e2%80%82 in middle of title',
'expected' => 'nonvisible-%e2%80%82-in-middle-of-title',
),
'at start of title' => array(
'title' => '%e2%80%83Nonvisible at start of title',
'expected' => '%e2%80%83nonvisible-at-start-of-title',
),
'at end of title' => array(
'title' => 'Nonvisible at end of title %e2%80%81',
'expected' => 'nonvisible-at-end-of-title-%e2%80%81',
),
'randomly in title' => array(
'title' => 'Nonvisible%e2%80%80 %e2%80%aerandomly %e2%80%87in the %e2%80%a8 title%e2%80%af',
'expected' => 'nonvisible%e2%80%80-%e2%80%aerandomly-%e2%80%87in-the-%e2%80%a8-title%e2%80%af',
),
);
}
}