mirror of
https://github.com/gosticks/wordpress-develop.git
synced 2026-02-27 03:02:53 +00:00
Database: Account for utf8 being renamed to utf8mb3 in newer MariaDB and MySQL versions.
From [https://mariadb.com/kb/en/mariadb-1061-release-notes/ MariaDB 10.6.1 release notes]: > The `utf8` [https://mariadb.com/kb/en/character-sets/ character set] (and related collations) is now by default an alias for `utf8mb3` rather than the other way around. It can be set to imply `utf8mb4` by changing the value of the [https://mariadb.com/kb/en/server-system-variables/#old_mode old_mode] system variable ([https://jira.mariadb.org/browse/MDEV-8334 MDEV-8334]). From [https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-30.html#mysqld-8-0-30-charset MySQL 8.0.30 release notes]: > **Important Change:** A previous change renamed character sets having deprecated names prefixed with `utf8_` to use `utf8mb3_` instead. In this release, we rename the `utf8_` collations as well, using the `utf8mb3_` prefix; this is to make the collation names consistent with those of the character sets, not to rely any longer on the deprecated collation names, and to clarify the distinction between `utf8mb3` and `utf8mb4`. The names using the `utf8mb3_` prefix are now used exclusively for these collations in the output of `SHOW` statements such as `SHOW CREATE TABLE`, as well as in the values displayed in the columns of Information Schema tables including the `COLLATIONS` and `COLUMNS` tables. This commit adds `utf8mb3_bin` and `utf8mb3_general_ci` to the list of safe collations recognized by `wpdb::check_safe_collation()`. The full list is now as follows: * `utf8_bin` * `utf8_general_ci` * `utf8mb3_bin` * `utf8mb3_general_ci` * `utf8mb4_bin` * `utf8mb4_general_ci` The change is covered by existing database charset unit tests: six tests which previously failed on MariaDB 10.6.1+ or MySQL 8.0.30+ now pass. Includes: * Adjusting the expected test results based on MariaDB and MySQL version. * Using named data providers for the affected tests to make test output more descriptive. * Adding a failure message to each assertion when multiple assertions are used in the test. References: * [https://mariadb.com/kb/en/mariadb-1061-release-notes/ MariaDB 10.6.1 release notes] * [https://jira.mariadb.org/browse/MDEV-8334 MDEV-8334 Rename utf8 to utf8mb3] * [https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-30.html#mysqld-8-0-30-charset MySQL 8.0.30 release notes] * [https://dev.mysql.com/doc/refman/8.0/en/charset-unicode-utf8mb3.html The utf8mb3 Character Set (3-Byte UTF-8 Unicode Encoding)] Follow-up to [30345], [32162], [37320]. Props skithund, ayeshrajans, JavierCasares, SergeyBiryukov. Fixes #53623. git-svn-id: https://develop.svn.wordpress.org/trunk@53918 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
parent
45fbc51aaa
commit
5d78ecbe25
@ -3376,12 +3376,21 @@ class wpdb {
|
||||
}
|
||||
|
||||
// If any of the columns don't have one of these collations, it needs more sanity checking.
|
||||
$safe_collations = array(
|
||||
'utf8_bin',
|
||||
'utf8_general_ci',
|
||||
'utf8mb3_bin',
|
||||
'utf8mb3_general_ci',
|
||||
'utf8mb4_bin',
|
||||
'utf8mb4_general_ci',
|
||||
);
|
||||
|
||||
foreach ( $this->col_meta[ $table ] as $col ) {
|
||||
if ( empty( $col->Collation ) ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( ! in_array( $col->Collation, array( 'utf8_general_ci', 'utf8_bin', 'utf8mb4_general_ci', 'utf8mb4_bin' ), true ) ) {
|
||||
if ( ! in_array( $col->Collation, $safe_collations, true ) ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -9,18 +9,32 @@
|
||||
class Tests_DB_Charset extends WP_UnitTestCase {
|
||||
|
||||
/**
|
||||
* Our special WPDB
|
||||
* Our special WPDB.
|
||||
*
|
||||
* @var resource
|
||||
*/
|
||||
protected static $_wpdb;
|
||||
|
||||
/**
|
||||
* The version of the MySQL server.
|
||||
* Whether to expect utf8mb3 instead of utf8 in various commands output.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
private static $utf8_is_utf8mb3 = false;
|
||||
|
||||
/**
|
||||
* The database server version.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private static $server_info;
|
||||
private static $db_version;
|
||||
|
||||
/**
|
||||
* Full database server information.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private static $db_server_info;
|
||||
|
||||
public static function set_up_before_class() {
|
||||
parent::set_up_before_class();
|
||||
@ -29,7 +43,18 @@ class Tests_DB_Charset extends WP_UnitTestCase {
|
||||
|
||||
self::$_wpdb = new WpdbExposedMethodsForTesting();
|
||||
|
||||
self::$server_info = self::$_wpdb->db_server_info();
|
||||
self::$db_version = self::$_wpdb->db_version();
|
||||
self::$db_server_info = self::$_wpdb->db_server_info();
|
||||
|
||||
/*
|
||||
* MariaDB 10.6.1 or later and MySQL 8.0.30 or later
|
||||
* use utf8mb3 instead of utf8 in various commands output.
|
||||
*/
|
||||
if ( str_contains( self::$db_server_info, 'MariaDB' ) && version_compare( self::$db_version, '10.6.1', '>=' )
|
||||
|| ! str_contains( self::$db_server_info, 'MariaDB' ) && version_compare( self::$db_version, '8.0.30', '>=' )
|
||||
) {
|
||||
self::$utf8_is_utf8mb3 = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -492,7 +517,9 @@ class Tests_DB_Charset extends WP_UnitTestCase {
|
||||
$this->markTestSkipped( "The current MySQL server doesn't support the utf8mb4 character set." );
|
||||
}
|
||||
|
||||
if ( 'big5' === $new_charset && 'byte' === $data[0]['length']['type'] && false !== strpos( self::$server_info, 'MariaDB' ) ) {
|
||||
if ( 'big5' === $new_charset && 'byte' === $data[0]['length']['type']
|
||||
&& str_contains( self::$db_server_info, 'MariaDB' )
|
||||
) {
|
||||
$this->markTestSkipped( "MariaDB doesn't support this data set. See https://core.trac.wordpress.org/ticket/33171." );
|
||||
}
|
||||
|
||||
@ -808,6 +835,10 @@ class Tests_DB_Charset extends WP_UnitTestCase {
|
||||
self::$_wpdb->query( $create );
|
||||
|
||||
foreach ( $expected_charset as $column => $charset ) {
|
||||
if ( self::$utf8_is_utf8mb3 && 'utf8' === $charset ) {
|
||||
$charset = 'utf8mb3';
|
||||
}
|
||||
|
||||
$this->assertSame( $charset, self::$_wpdb->get_col_charset( $table, $column ) );
|
||||
$this->assertSame( $charset, self::$_wpdb->get_col_charset( strtoupper( $table ), strtoupper( $column ) ) );
|
||||
}
|
||||
@ -875,27 +906,29 @@ class Tests_DB_Charset extends WP_UnitTestCase {
|
||||
public function data_strip_invalid_text_from_query() {
|
||||
$table_name = 'strip_invalid_text_from_query_table';
|
||||
$data = array(
|
||||
array(
|
||||
'utf8 + binary' => array(
|
||||
// Binary tables don't get stripped.
|
||||
'( a VARCHAR(50) CHARACTER SET utf8, b BINARY )', // Create.
|
||||
"('foo\xf0\x9f\x98\x88bar', 'foo')", // Query.
|
||||
"('foo\xf0\x9f\x98\x88bar', 'foo')", // Expected result.
|
||||
'create' => '( a VARCHAR(50) CHARACTER SET utf8, b BINARY )',
|
||||
'query' => "('foo\xf0\x9f\x98\x88bar', 'foo')",
|
||||
'expected' => "('foo\xf0\x9f\x98\x88bar', 'foo')",
|
||||
),
|
||||
array(
|
||||
'utf8 + utf8mb4' => array(
|
||||
// utf8/utf8mb4 tables default to utf8.
|
||||
'( a VARCHAR(50) CHARACTER SET utf8, b VARCHAR(50) CHARACTER SET utf8mb4 )',
|
||||
"('foo\xf0\x9f\x98\x88bar', 'foo')",
|
||||
"('foobar', 'foo')",
|
||||
'create' => '( a VARCHAR(50) CHARACTER SET utf8, b VARCHAR(50) CHARACTER SET utf8mb4 )',
|
||||
'query' => "('foo\xf0\x9f\x98\x88bar', 'foo')",
|
||||
'expected' => "('foobar', 'foo')",
|
||||
),
|
||||
);
|
||||
|
||||
foreach ( $data as $i => &$value ) {
|
||||
$this_table_name = $table_name . '_' . $i;
|
||||
$i = 0;
|
||||
|
||||
$value[0] = "CREATE TABLE $this_table_name {$value[0]}";
|
||||
$value[1] = "INSERT INTO $this_table_name VALUES {$value[1]}";
|
||||
$value[2] = "INSERT INTO $this_table_name VALUES {$value[2]}";
|
||||
$value[3] = "DROP TABLE IF EXISTS $this_table_name";
|
||||
foreach ( $data as &$value ) {
|
||||
$this_table_name = $table_name . '_' . $i++;
|
||||
|
||||
$value['create'] = "CREATE TABLE $this_table_name {$value['create']}";
|
||||
$value['query'] = "INSERT INTO $this_table_name VALUES {$value['query']}";
|
||||
$value['expected'] = "INSERT INTO $this_table_name VALUES {$value['expected']}";
|
||||
$value['drop'] = "DROP TABLE IF EXISTS $this_table_name";
|
||||
}
|
||||
unset( $value );
|
||||
|
||||
@ -979,42 +1012,44 @@ class Tests_DB_Charset extends WP_UnitTestCase {
|
||||
public function data_table_collation_check() {
|
||||
$table_name = 'table_collation_check';
|
||||
$data = array(
|
||||
array(
|
||||
'utf8_bin' => array(
|
||||
// utf8_bin tables don't need extra sanity checking.
|
||||
'( a VARCHAR(50) COLLATE utf8_bin )', // Create.
|
||||
true, // Expected result.
|
||||
'create' => '( a VARCHAR(50) COLLATE utf8_bin )',
|
||||
'expected' => true,
|
||||
),
|
||||
array(
|
||||
'utf8_general_ci' => array(
|
||||
// Neither do utf8_general_ci tables.
|
||||
'( a VARCHAR(50) COLLATE utf8_general_ci )',
|
||||
true,
|
||||
'create' => '( a VARCHAR(50) COLLATE utf8_general_ci )',
|
||||
'expected' => true,
|
||||
),
|
||||
array(
|
||||
'utf8_unicode_ci' => array(
|
||||
// utf8_unicode_ci tables do.
|
||||
'( a VARCHAR(50) COLLATE utf8_unicode_ci )',
|
||||
false,
|
||||
'create' => '( a VARCHAR(50) COLLATE utf8_unicode_ci )',
|
||||
'expected' => false,
|
||||
),
|
||||
array(
|
||||
'utf8_bin + big5_chinese_ci' => array(
|
||||
// utf8_bin tables don't need extra sanity checking,
|
||||
// except for when they're not just utf8_bin.
|
||||
'( a VARCHAR(50) COLLATE utf8_bin, b VARCHAR(50) COLLATE big5_chinese_ci )',
|
||||
false,
|
||||
'create' => '( a VARCHAR(50) COLLATE utf8_bin, b VARCHAR(50) COLLATE big5_chinese_ci )',
|
||||
'expected' => false,
|
||||
),
|
||||
array(
|
||||
'utf8_bin + int' => array(
|
||||
// utf8_bin tables don't need extra sanity checking
|
||||
// when the other columns aren't strings.
|
||||
'( a VARCHAR(50) COLLATE utf8_bin, b INT )',
|
||||
true,
|
||||
'create' => '( a VARCHAR(50) COLLATE utf8_bin, b INT )',
|
||||
'expected' => true,
|
||||
),
|
||||
);
|
||||
|
||||
foreach ( $data as $i => &$value ) {
|
||||
$this_table_name = $table_name . '_' . $i;
|
||||
$i = 0;
|
||||
|
||||
$value[0] = "CREATE TABLE $this_table_name {$value[0]}";
|
||||
$value[2] = "SELECT * FROM $this_table_name WHERE a='\xf0\x9f\x98\x88'";
|
||||
$value[3] = "DROP TABLE IF EXISTS $this_table_name";
|
||||
$value[4] = array(
|
||||
foreach ( $data as &$value ) {
|
||||
$this_table_name = $table_name . '_' . $i++;
|
||||
|
||||
$value['create'] = "CREATE TABLE $this_table_name {$value['create']}";
|
||||
$value['query'] = "SELECT * FROM $this_table_name WHERE a='\xf0\x9f\x98\x88'";
|
||||
$value['drop'] = "DROP TABLE IF EXISTS $this_table_name";
|
||||
$value['always_true'] = array(
|
||||
"SELECT * FROM $this_table_name WHERE a='foo'",
|
||||
"SHOW FULL TABLES LIKE $this_table_name",
|
||||
"DESCRIBE $this_table_name",
|
||||
@ -1040,11 +1075,31 @@ class Tests_DB_Charset extends WP_UnitTestCase {
|
||||
self::$_wpdb->query( $create );
|
||||
|
||||
$return = self::$_wpdb->check_safe_collation( $query );
|
||||
$this->assertSame( $expected, $return );
|
||||
$this->assertSame(
|
||||
$expected,
|
||||
$return,
|
||||
sprintf(
|
||||
"wpdb::check_safe_collation() should return %s for this query.\n" .
|
||||
"Table: %s\n" .
|
||||
'Query: %s',
|
||||
$expected ? 'true' : 'false',
|
||||
$create,
|
||||
$query
|
||||
)
|
||||
);
|
||||
|
||||
foreach ( $always_true as $true_query ) {
|
||||
$return = self::$_wpdb->check_safe_collation( $true_query );
|
||||
$this->assertTrue( $return );
|
||||
$this->assertTrue(
|
||||
$return,
|
||||
sprintf(
|
||||
"wpdb::check_safe_collation() should return true for this query.\n" .
|
||||
"Table: %s\n" .
|
||||
'Query: %s',
|
||||
$create,
|
||||
$true_query
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
self::$_wpdb->query( $drop );
|
||||
@ -1115,12 +1170,13 @@ class Tests_DB_Charset extends WP_UnitTestCase {
|
||||
*/
|
||||
public function test_set_charset_changes_the_connection_collation() {
|
||||
self::$_wpdb->set_charset( self::$_wpdb->dbh, 'utf8', 'utf8_general_ci' );
|
||||
$results = self::$_wpdb->get_results( "SHOW VARIABLES WHERE Variable_name='collation_connection'" );
|
||||
$this->assertSame( 'utf8_general_ci', $results[0]->Value );
|
||||
$results = self::$_wpdb->get_results( "SHOW VARIABLES WHERE Variable_name='collation_connection'" );
|
||||
$expected = self::$utf8_is_utf8mb3 ? 'utf8mb3_general_ci' : 'utf8_general_ci';
|
||||
$this->assertSame( $expected, $results[0]->Value, "Collation should be set to $expected." );
|
||||
|
||||
self::$_wpdb->set_charset( self::$_wpdb->dbh, 'utf8mb4', 'utf8mb4_unicode_ci' );
|
||||
$results = self::$_wpdb->get_results( "SHOW VARIABLES WHERE Variable_name='collation_connection'" );
|
||||
$this->assertSame( 'utf8mb4_unicode_ci', $results[0]->Value );
|
||||
$this->assertSame( 'utf8mb4_unicode_ci', $results[0]->Value, 'Collation should be set to utf8mb4_unicode_ci.' );
|
||||
|
||||
self::$_wpdb->set_charset( self::$_wpdb->dbh );
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user