mirror of
https://github.com/gosticks/wordpress-develop.git
synced 2025-10-16 12:05:38 +00:00
See #47632. git-svn-id: https://develop.svn.wordpress.org/trunk@45588 602fd350-edb4-49c9-b593-d223f7449a82
1104 lines
31 KiB
PHP
1104 lines
31 KiB
PHP
<?php
|
||
|
||
/**
|
||
* Test WPDB methods
|
||
*
|
||
* @group wpdb
|
||
* @group security-153
|
||
*/
|
||
class Tests_DB_Charset extends WP_UnitTestCase {
|
||
|
||
/**
|
||
* Our special WPDB
|
||
*
|
||
* @var resource
|
||
*/
|
||
protected static $_wpdb;
|
||
|
||
/**
|
||
* The version of the MySQL server.
|
||
*
|
||
* @var string
|
||
*/
|
||
private static $server_info;
|
||
|
||
public static function setUpBeforeClass() {
|
||
parent::setUpBeforeClass();
|
||
|
||
require_once( dirname( dirname( __FILE__ ) ) . '/db.php' );
|
||
|
||
self::$_wpdb = new wpdb_exposed_methods_for_testing();
|
||
|
||
if ( self::$_wpdb->use_mysqli ) {
|
||
self::$server_info = mysqli_get_server_info( self::$_wpdb->dbh );
|
||
} else {
|
||
self::$server_info = mysql_get_server_info( self::$_wpdb->dbh );
|
||
}
|
||
}
|
||
|
||
/**
|
||
* @ticket 21212
|
||
*/
|
||
function data_strip_invalid_text() {
|
||
$fields = array(
|
||
'latin1' => array(
|
||
// latin1. latin1 never changes.
|
||
'charset' => 'latin1',
|
||
'value' => "\xf0\x9f\x8e\xb7",
|
||
'expected' => "\xf0\x9f\x8e\xb7",
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 100,
|
||
),
|
||
),
|
||
'latin1_char_length' => array(
|
||
// latin1. latin1 never changes.
|
||
'charset' => 'latin1',
|
||
'value' => str_repeat( 'A', 11 ),
|
||
'expected' => str_repeat( 'A', 10 ),
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'latin1_byte_length' => array(
|
||
// latin1. latin1 never changes.
|
||
'charset' => 'latin1',
|
||
'value' => str_repeat( 'A', 11 ),
|
||
'expected' => str_repeat( 'A', 10 ),
|
||
'length' => array(
|
||
'type' => 'byte',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'ascii' => array(
|
||
// ascii gets special treatment, make sure it's covered
|
||
'charset' => 'ascii',
|
||
'value' => 'Hello World',
|
||
'expected' => 'Hello World',
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 100,
|
||
),
|
||
),
|
||
'ascii_char_length' => array(
|
||
// ascii gets special treatment, make sure it's covered
|
||
'charset' => 'ascii',
|
||
'value' => str_repeat( 'A', 11 ),
|
||
'expected' => str_repeat( 'A', 10 ),
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'ascii_byte_length' => array(
|
||
// ascii gets special treatment, make sure it's covered
|
||
'charset' => 'ascii',
|
||
'value' => str_repeat( 'A', 11 ),
|
||
'expected' => str_repeat( 'A', 10 ),
|
||
'length' => array(
|
||
'type' => 'byte',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'utf8' => array(
|
||
// utf8 only allows <= 3-byte chars
|
||
'charset' => 'utf8',
|
||
'value' => "H€llo\xf0\x9f\x98\x88World¢",
|
||
'expected' => 'H€lloWorld¢',
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 100,
|
||
),
|
||
),
|
||
'utf8_23char_length' => array(
|
||
// utf8 only allows <= 3-byte chars
|
||
'charset' => 'utf8',
|
||
'value' => str_repeat( '²3', 10 ),
|
||
'expected' => str_repeat( '²3', 5 ),
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'utf8_23byte_length' => array(
|
||
// utf8 only allows <= 3-byte chars
|
||
'charset' => 'utf8',
|
||
'value' => str_repeat( '²3', 10 ),
|
||
'expected' => '²3²3',
|
||
'length' => array(
|
||
'type' => 'byte',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'utf8_3char_length' => array(
|
||
// utf8 only allows <= 3-byte chars
|
||
'charset' => 'utf8',
|
||
'value' => str_repeat( '3', 11 ),
|
||
'expected' => str_repeat( '3', 10 ),
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'utf8_3byte_length' => array(
|
||
// utf8 only allows <= 3-byte chars
|
||
'charset' => 'utf8',
|
||
'value' => str_repeat( '3', 11 ),
|
||
'expected' => '333',
|
||
'length' => array(
|
||
'type' => 'byte',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'utf8mb3' => array(
|
||
// utf8mb3 should behave the same an utf8
|
||
'charset' => 'utf8mb3',
|
||
'value' => "H€llo\xf0\x9f\x98\x88World¢",
|
||
'expected' => 'H€lloWorld¢',
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 100,
|
||
),
|
||
),
|
||
'utf8mb3_23char_length' => array(
|
||
// utf8mb3 should behave the same an utf8
|
||
'charset' => 'utf8mb3',
|
||
'value' => str_repeat( '²3', 10 ),
|
||
'expected' => str_repeat( '²3', 5 ),
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'utf8mb3_23byte_length' => array(
|
||
// utf8mb3 should behave the same an utf8
|
||
'charset' => 'utf8mb3',
|
||
'value' => str_repeat( '²3', 10 ),
|
||
'expected' => '²3²3',
|
||
'length' => array(
|
||
'type' => 'byte',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'utf8mb3_3char_length' => array(
|
||
// utf8mb3 should behave the same an utf8
|
||
'charset' => 'utf8mb3',
|
||
'value' => str_repeat( '3', 11 ),
|
||
'expected' => str_repeat( '3', 10 ),
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'utf8mb3_3byte_length' => array(
|
||
// utf8mb3 should behave the same an utf8
|
||
'charset' => 'utf8mb3',
|
||
'value' => str_repeat( '3', 10 ),
|
||
'expected' => '333',
|
||
'length' => array(
|
||
'type' => 'byte',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'utf8mb4' => array(
|
||
// utf8mb4 allows 4-byte characters, too
|
||
'charset' => 'utf8mb4',
|
||
'value' => "H€llo\xf0\x9f\x98\x88World¢",
|
||
'expected' => "H€llo\xf0\x9f\x98\x88World¢",
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 100,
|
||
),
|
||
),
|
||
'utf8mb4_234char_length' => array(
|
||
// utf8mb4 allows 4-byte characters, too
|
||
'charset' => 'utf8mb4',
|
||
'value' => str_repeat( '²3𝟜', 10 ),
|
||
'expected' => '²3𝟜²3𝟜²3𝟜²',
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'utf8mb4_234byte_length' => array(
|
||
// utf8mb4 allows 4-byte characters, too
|
||
'charset' => 'utf8mb4',
|
||
'value' => str_repeat( '²3𝟜', 10 ),
|
||
'expected' => '²3𝟜',
|
||
'length' => array(
|
||
'type' => 'byte',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'utf8mb4_4char_length' => array(
|
||
// utf8mb4 allows 4-byte characters, too
|
||
'charset' => 'utf8mb4',
|
||
'value' => str_repeat( '𝟜', 11 ),
|
||
'expected' => str_repeat( '𝟜', 10 ),
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'utf8mb4_4byte_length' => array(
|
||
// utf8mb4 allows 4-byte characters, too
|
||
'charset' => 'utf8mb4',
|
||
'value' => str_repeat( '𝟜', 10 ),
|
||
'expected' => '𝟜𝟜',
|
||
'length' => array(
|
||
'type' => 'byte',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'koi8r' => array(
|
||
'charset' => 'koi8r',
|
||
'value' => "\xfdord\xf2ress",
|
||
'expected' => "\xfdord\xf2ress",
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 100,
|
||
),
|
||
),
|
||
'koi8r_char_length' => array(
|
||
'charset' => 'koi8r',
|
||
'value' => str_repeat( "\xfd\xf2", 10 ),
|
||
'expected' => str_repeat( "\xfd\xf2", 5 ),
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'koi8r_byte_length' => array(
|
||
'charset' => 'koi8r',
|
||
'value' => str_repeat( "\xfd\xf2", 10 ),
|
||
'expected' => str_repeat( "\xfd\xf2", 5 ),
|
||
'length' => array(
|
||
'type' => 'byte',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'hebrew' => array(
|
||
'charset' => 'hebrew',
|
||
'value' => "\xf9ord\xf7ress",
|
||
'expected' => "\xf9ord\xf7ress",
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 100,
|
||
),
|
||
),
|
||
'hebrew_char_length' => array(
|
||
'charset' => 'hebrew',
|
||
'value' => str_repeat( "\xf9\xf7", 10 ),
|
||
'expected' => str_repeat( "\xf9\xf7", 5 ),
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'hebrew_byte_length' => array(
|
||
'charset' => 'hebrew',
|
||
'value' => str_repeat( "\xf9\xf7", 10 ),
|
||
'expected' => str_repeat( "\xf9\xf7", 5 ),
|
||
'length' => array(
|
||
'type' => 'byte',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'cp1251' => array(
|
||
'charset' => 'cp1251',
|
||
'value' => "\xd8ord\xd0ress",
|
||
'expected' => "\xd8ord\xd0ress",
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 100,
|
||
),
|
||
),
|
||
'cp1251_no_length' => array(
|
||
'charset' => 'cp1251',
|
||
'value' => "\xd8ord\xd0ress",
|
||
'expected' => "\xd8ord\xd0ress",
|
||
'length' => false,
|
||
),
|
||
'cp1251_no_length_ascii' => array(
|
||
'charset' => 'cp1251',
|
||
'value' => 'WordPress',
|
||
'expected' => 'WordPress',
|
||
'length' => false,
|
||
// Don't set 'ascii' => true/false.
|
||
// That's a different codepath than it being unset even if
|
||
// three's only only ASCII in the value.
|
||
),
|
||
'cp1251_char_length' => array(
|
||
'charset' => 'cp1251',
|
||
'value' => str_repeat( "\xd8\xd0", 10 ),
|
||
'expected' => str_repeat( "\xd8\xd0", 5 ),
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'cp1251_byte_length' => array(
|
||
'charset' => 'cp1251',
|
||
'value' => str_repeat( "\xd8\xd0", 10 ),
|
||
'expected' => str_repeat( "\xd8\xd0", 5 ),
|
||
'length' => array(
|
||
'type' => 'byte',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'tis620' => array(
|
||
'charset' => 'tis620',
|
||
'value' => "\xccord\xe3ress",
|
||
'expected' => "\xccord\xe3ress",
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 100,
|
||
),
|
||
),
|
||
'tis620_char_length' => array(
|
||
'charset' => 'tis620',
|
||
'value' => str_repeat( "\xcc\xe3", 10 ),
|
||
'expected' => str_repeat( "\xcc\xe3", 5 ),
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'tis620_byte_length' => array(
|
||
'charset' => 'tis620',
|
||
'value' => str_repeat( "\xcc\xe3", 10 ),
|
||
'expected' => str_repeat( "\xcc\xe3", 5 ),
|
||
'length' => array(
|
||
'type' => 'byte',
|
||
'length' => 10,
|
||
),
|
||
),
|
||
'ujis_with_utf8_connection' => array(
|
||
'charset' => 'ujis',
|
||
'connection_charset' => 'utf8',
|
||
'value' => '自動下書き',
|
||
'expected' => '自動下書き',
|
||
'length' => array(
|
||
'type' => 'byte',
|
||
'length' => 100,
|
||
),
|
||
),
|
||
'ujis_with_utf8_connection_char_length' => array(
|
||
'charset' => 'ujis',
|
||
'connection_charset' => 'utf8',
|
||
'value' => '自動下書き',
|
||
'expected' => '自動下書',
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 4,
|
||
),
|
||
),
|
||
'ujis_with_utf8_connection_byte_length' => array(
|
||
'charset' => 'ujis',
|
||
'connection_charset' => 'utf8',
|
||
'value' => '自動下書き',
|
||
'expected' => '自動',
|
||
'length' => array(
|
||
'type' => 'byte',
|
||
'length' => 6,
|
||
),
|
||
),
|
||
'false' => array(
|
||
// false is a column with no character set (ie, a number column)
|
||
'charset' => false,
|
||
'value' => 100,
|
||
'expected' => 100,
|
||
'length' => false,
|
||
),
|
||
);
|
||
|
||
if ( function_exists( 'mb_convert_encoding' ) ) {
|
||
// big5 is a non-Unicode multibyte charset
|
||
$utf8 = "a\xe5\x85\xb1b"; // UTF-8 Character 20849
|
||
$big5 = mb_convert_encoding( $utf8, 'BIG-5', 'UTF-8' );
|
||
$conv_utf8 = mb_convert_encoding( $big5, 'UTF-8', 'BIG-5' );
|
||
// Make sure PHP's multibyte conversions are working correctly
|
||
$this->assertNotEquals( $utf8, $big5 );
|
||
$this->assertEquals( $utf8, $conv_utf8 );
|
||
|
||
$fields['big5'] = array(
|
||
'charset' => 'big5',
|
||
'value' => $big5,
|
||
'expected' => $big5,
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 100,
|
||
),
|
||
);
|
||
|
||
$fields['big5_char_length'] = array(
|
||
'charset' => 'big5',
|
||
'value' => str_repeat( $big5, 10 ),
|
||
'expected' => str_repeat( $big5, 3 ) . 'a',
|
||
'length' => array(
|
||
'type' => 'char',
|
||
'length' => 10,
|
||
),
|
||
);
|
||
|
||
$fields['big5_byte_length'] = array(
|
||
'charset' => 'big5',
|
||
'value' => str_repeat( $big5, 10 ),
|
||
'expected' => str_repeat( $big5, 2 ) . 'a',
|
||
'length' => array(
|
||
'type' => 'byte',
|
||
'length' => 10,
|
||
),
|
||
);
|
||
}
|
||
|
||
// The data above is easy to edit. Now, prepare it for the data provider.
|
||
$data_provider = array();
|
||
$multiple = array();
|
||
$multiple_expected = array();
|
||
foreach ( $fields as $test_case => $field ) {
|
||
$expected = $field;
|
||
$expected['value'] = $expected['expected'];
|
||
unset( $expected['expected'], $field['expected'], $expected['connection_charset'] );
|
||
|
||
// We're keeping track of these for our multiple-field test.
|
||
$multiple[] = $field;
|
||
$multiple_expected[] = $expected;
|
||
|
||
// strip_invalid_text() expects an array of fields. We're testing one field at a time.
|
||
$data = array( $field );
|
||
$expected = array( $expected );
|
||
|
||
// First argument is field data. Second is expected. Third is the message.
|
||
$data_provider[] = array( $data, $expected, $test_case );
|
||
}
|
||
|
||
return $data_provider;
|
||
}
|
||
|
||
/**
|
||
* @dataProvider data_strip_invalid_text
|
||
* @ticket 21212
|
||
*/
|
||
function test_strip_invalid_text( $data, $expected, $message ) {
|
||
if ( version_compare( PHP_VERSION, '5.3', '<' ) && stristr( php_uname( 's' ), 'win' ) ) {
|
||
$this->markTestSkipped( 'This test fails in PHP 5.2 on Windows. See https://core.trac.wordpress.org/ticket/31262' );
|
||
}
|
||
|
||
$charset = self::$_wpdb->charset;
|
||
if ( isset( $data[0]['connection_charset'] ) ) {
|
||
$new_charset = $data[0]['connection_charset'];
|
||
unset( $data[0]['connection_charset'] );
|
||
} else {
|
||
$new_charset = $data[0]['charset'];
|
||
}
|
||
|
||
if ( 'utf8mb4' === $new_charset && ! self::$_wpdb->has_cap( 'utf8mb4' ) ) {
|
||
$this->markTestSkipped( "The current MySQL server doesn't support the utf8mb4 character set." );
|
||
}
|
||
|
||
if ( 'big5' === $new_charset && 'byte' === $data[0]['length']['type'] && false !== strpos( self::$server_info, 'MariaDB' ) ) {
|
||
$this->markTestSkipped( "MariaDB doesn't support this data set. See https://core.trac.wordpress.org/ticket/33171." );
|
||
}
|
||
|
||
self::$_wpdb->charset = $new_charset;
|
||
self::$_wpdb->set_charset( self::$_wpdb->dbh, $new_charset );
|
||
|
||
$actual = self::$_wpdb->strip_invalid_text( $data );
|
||
|
||
self::$_wpdb->charset = $charset;
|
||
self::$_wpdb->set_charset( self::$_wpdb->dbh, $charset );
|
||
|
||
$this->assertSame( $expected, $actual, $message );
|
||
}
|
||
|
||
/**
|
||
* @ticket 21212
|
||
*/
|
||
function test_process_fields_failure() {
|
||
global $wpdb;
|
||
|
||
$charset = $wpdb->get_col_charset( $wpdb->posts, 'post_content' );
|
||
if ( 'utf8' !== $charset && 'utf8mb4' !== $charset ) {
|
||
$this->markTestSkipped( 'This test requires a utf8 character set' );
|
||
}
|
||
|
||
// \xf0\xff\xff\xff is invalid in utf8 and utf8mb4.
|
||
$data = array( 'post_content' => "H€llo\xf0\xff\xff\xffWorld¢" );
|
||
$this->assertFalse( self::$_wpdb->process_fields( $wpdb->posts, $data, null ) );
|
||
}
|
||
|
||
/**
|
||
* @ticket 21212
|
||
*/
|
||
function data_process_field_charsets() {
|
||
if ( $GLOBALS['wpdb']->charset ) {
|
||
$charset = $GLOBALS['wpdb']->charset;
|
||
} else {
|
||
$charset = $GLOBALS['wpdb']->get_col_charset( $GLOBALS['wpdb']->posts, 'post_content' );
|
||
}
|
||
|
||
// 'value' and 'format' are $data, 'charset' ends up as part of $expected
|
||
|
||
$no_string_fields = array(
|
||
'post_parent' => array(
|
||
'value' => 10,
|
||
'format' => '%d',
|
||
'charset' => false,
|
||
),
|
||
'comment_count' => array(
|
||
'value' => 0,
|
||
'format' => '%d',
|
||
'charset' => false,
|
||
),
|
||
);
|
||
|
||
$all_ascii_fields = array(
|
||
'post_content' => array(
|
||
'value' => 'foo foo foo!',
|
||
'format' => '%s',
|
||
'charset' => $charset,
|
||
),
|
||
'post_excerpt' => array(
|
||
'value' => 'bar bar bar!',
|
||
'format' => '%s',
|
||
'charset' => $charset,
|
||
),
|
||
);
|
||
|
||
// This is the same data used in process_field_charsets_for_nonexistent_table()
|
||
$non_ascii_string_fields = array(
|
||
'post_content' => array(
|
||
'value' => '¡foo foo foo!',
|
||
'format' => '%s',
|
||
'charset' => $charset,
|
||
),
|
||
'post_excerpt' => array(
|
||
'value' => '¡bar bar bar!',
|
||
'format' => '%s',
|
||
'charset' => $charset,
|
||
),
|
||
);
|
||
|
||
$vars = get_defined_vars();
|
||
unset( $vars['charset'] );
|
||
foreach ( $vars as $var_name => $var ) {
|
||
$data = $var;
|
||
$expected = $var;
|
||
foreach ( $data as &$datum ) {
|
||
// 'charset' and 'ascii' are part of the expected return only.
|
||
unset( $datum['charset'], $datum['ascii'] );
|
||
}
|
||
|
||
$vars[ $var_name ] = array( $data, $expected, $var_name );
|
||
}
|
||
|
||
return array_values( $vars );
|
||
}
|
||
|
||
/**
|
||
* @dataProvider data_process_field_charsets
|
||
* @ticket 21212
|
||
*/
|
||
function test_process_field_charsets( $data, $expected, $message ) {
|
||
$actual = self::$_wpdb->process_field_charsets( $data, $GLOBALS['wpdb']->posts );
|
||
$this->assertSame( $expected, $actual, $message );
|
||
}
|
||
|
||
/**
|
||
* The test this test depends on first verifies that this
|
||
* would normally work against the posts table.
|
||
*
|
||
* @ticket 21212
|
||
* @depends test_process_field_charsets
|
||
*/
|
||
function test_process_field_charsets_on_nonexistent_table() {
|
||
$data = array(
|
||
'post_content' => array(
|
||
'value' => '¡foo foo foo!',
|
||
'format' => '%s',
|
||
),
|
||
);
|
||
self::$_wpdb->suppress_errors( true );
|
||
$this->assertFalse( self::$_wpdb->process_field_charsets( $data, 'nonexistent_table' ) );
|
||
self::$_wpdb->suppress_errors( false );
|
||
}
|
||
|
||
/**
|
||
* @ticket 21212
|
||
*/
|
||
function test_check_ascii() {
|
||
$ascii = "\0\t\n\r '" . '!"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~';
|
||
$this->assertTrue( self::$_wpdb->check_ascii( $ascii ) );
|
||
}
|
||
|
||
/**
|
||
* @ticket 21212
|
||
*/
|
||
function test_check_ascii_false() {
|
||
$this->assertFalse( self::$_wpdb->check_ascii( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ¡©«' ) );
|
||
}
|
||
|
||
/**
|
||
* @ticket 21212
|
||
*/
|
||
function test_strip_invalid_text_for_column() {
|
||
global $wpdb;
|
||
|
||
$charset = $wpdb->get_col_charset( $wpdb->posts, 'post_content' );
|
||
if ( 'utf8' !== $charset && 'utf8mb4' !== $charset ) {
|
||
$this->markTestSkipped( 'This test requires a utf8 character set' );
|
||
}
|
||
|
||
// Invalid 3-byte and 4-byte sequences
|
||
$value = "H€llo\xe0\x80\x80World\xf0\xff\xff\xff¢";
|
||
$expected = 'H€lloWorld¢';
|
||
$actual = $wpdb->strip_invalid_text_for_column( $wpdb->posts, 'post_content', $value );
|
||
$this->assertEquals( $expected, $actual );
|
||
}
|
||
|
||
/**
|
||
* Set of table definitions for testing wpdb::get_table_charset and wpdb::get_column_charset
|
||
*
|
||
* @var array
|
||
*/
|
||
protected $table_and_column_defs = array(
|
||
array(
|
||
'definition' => '( a INT, b FLOAT )',
|
||
'table_expected' => false,
|
||
'column_expected' => array(
|
||
'a' => false,
|
||
'b' => false,
|
||
),
|
||
),
|
||
array(
|
||
'definition' => '( a VARCHAR(50) CHARACTER SET big5, b TEXT CHARACTER SET big5 )',
|
||
'table_expected' => 'big5',
|
||
'column_expected' => array(
|
||
'a' => 'big5',
|
||
'b' => 'big5',
|
||
),
|
||
),
|
||
array(
|
||
'definition' => '( a VARCHAR(50) CHARACTER SET big5, b BINARY )',
|
||
'table_expected' => 'binary',
|
||
'column_expected' => array(
|
||
'a' => 'big5',
|
||
'b' => false,
|
||
),
|
||
),
|
||
array(
|
||
'definition' => '( a VARCHAR(50) CHARACTER SET latin1, b BLOB )',
|
||
'table_expected' => 'binary',
|
||
'column_expected' => array(
|
||
'a' => 'latin1',
|
||
'b' => false,
|
||
),
|
||
),
|
||
array(
|
||
'definition' => '( a VARCHAR(50) CHARACTER SET latin1, b TEXT CHARACTER SET koi8r )',
|
||
'table_expected' => 'koi8r',
|
||
'column_expected' => array(
|
||
'a' => 'latin1',
|
||
'b' => 'koi8r',
|
||
),
|
||
),
|
||
array(
|
||
'definition' => '( a VARCHAR(50) CHARACTER SET utf8mb3, b TEXT CHARACTER SET utf8mb3 )',
|
||
'table_expected' => 'utf8',
|
||
'column_expected' => array(
|
||
'a' => 'utf8',
|
||
'b' => 'utf8',
|
||
),
|
||
),
|
||
array(
|
||
'definition' => '( a VARCHAR(50) CHARACTER SET utf8, b TEXT CHARACTER SET utf8mb4 )',
|
||
'table_expected' => 'utf8',
|
||
'column_expected' => array(
|
||
'a' => 'utf8',
|
||
'b' => 'utf8mb4',
|
||
),
|
||
),
|
||
array(
|
||
'definition' => '( a VARCHAR(50) CHARACTER SET big5, b TEXT CHARACTER SET koi8r )',
|
||
'table_expected' => 'ascii',
|
||
'column_expected' => array(
|
||
'a' => 'big5',
|
||
'b' => 'koi8r',
|
||
),
|
||
),
|
||
);
|
||
|
||
/**
|
||
* @ticket 21212
|
||
*/
|
||
function data_test_get_table_charset() {
|
||
$table_name = 'test_get_table_charset';
|
||
|
||
$vars = array();
|
||
foreach ( $this->table_and_column_defs as $i => $value ) {
|
||
$this_table_name = $table_name . '_' . $i;
|
||
$drop = "DROP TABLE IF EXISTS $this_table_name";
|
||
$create = "CREATE TABLE $this_table_name {$value['definition']}";
|
||
$vars[] = array( $drop, $create, $this_table_name, $value['table_expected'] );
|
||
}
|
||
|
||
return $vars;
|
||
}
|
||
|
||
/**
|
||
* @dataProvider data_test_get_table_charset
|
||
* @ticket 21212
|
||
*/
|
||
function test_get_table_charset( $drop, $create, $table, $expected_charset ) {
|
||
self::$_wpdb->query( $drop );
|
||
|
||
if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) {
|
||
$this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." );
|
||
return;
|
||
}
|
||
|
||
self::$_wpdb->query( $create );
|
||
|
||
$charset = self::$_wpdb->get_table_charset( $table );
|
||
$this->assertEquals( $charset, $expected_charset );
|
||
|
||
$charset = self::$_wpdb->get_table_charset( strtoupper( $table ) );
|
||
$this->assertEquals( $charset, $expected_charset );
|
||
|
||
self::$_wpdb->query( $drop );
|
||
}
|
||
|
||
/**
|
||
* @ticket 21212
|
||
*/
|
||
function data_test_get_column_charset() {
|
||
$table_name = 'test_get_column_charset';
|
||
|
||
$vars = array();
|
||
foreach ( $this->table_and_column_defs as $i => $value ) {
|
||
$this_table_name = $table_name . '_' . $i;
|
||
$drop = "DROP TABLE IF EXISTS $this_table_name";
|
||
$create = "CREATE TABLE $this_table_name {$value['definition']}";
|
||
$vars[] = array( $drop, $create, $this_table_name, $value['column_expected'] );
|
||
}
|
||
|
||
return $vars;
|
||
}
|
||
|
||
/**
|
||
* @dataProvider data_test_get_column_charset
|
||
* @ticket 21212
|
||
*/
|
||
function test_get_column_charset( $drop, $create, $table, $expected_charset ) {
|
||
self::$_wpdb->query( $drop );
|
||
|
||
if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) {
|
||
$this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." );
|
||
return;
|
||
}
|
||
|
||
self::$_wpdb->query( $create );
|
||
|
||
foreach ( $expected_charset as $column => $charset ) {
|
||
$this->assertEquals( $charset, self::$_wpdb->get_col_charset( $table, $column ) );
|
||
$this->assertEquals( $charset, self::$_wpdb->get_col_charset( strtoupper( $table ), strtoupper( $column ) ) );
|
||
}
|
||
|
||
self::$_wpdb->query( $drop );
|
||
}
|
||
|
||
/**
|
||
* @dataProvider data_test_get_column_charset
|
||
* @ticket 21212
|
||
*/
|
||
function test_get_column_charset_non_mysql( $drop, $create, $table, $columns ) {
|
||
self::$_wpdb->query( $drop );
|
||
|
||
if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) {
|
||
$this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." );
|
||
return;
|
||
}
|
||
|
||
self::$_wpdb->is_mysql = false;
|
||
|
||
self::$_wpdb->query( $create );
|
||
|
||
$columns = array_keys( $columns );
|
||
foreach ( $columns as $column => $charset ) {
|
||
$this->assertEquals( false, self::$_wpdb->get_col_charset( $table, $column ) );
|
||
}
|
||
|
||
self::$_wpdb->query( $drop );
|
||
|
||
self::$_wpdb->is_mysql = true;
|
||
}
|
||
|
||
/**
|
||
* @dataProvider data_test_get_column_charset
|
||
* @ticket 33501
|
||
*/
|
||
function test_get_column_charset_is_mysql_undefined( $drop, $create, $table, $columns ) {
|
||
self::$_wpdb->query( $drop );
|
||
|
||
if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) {
|
||
$this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." );
|
||
return;
|
||
}
|
||
|
||
unset( self::$_wpdb->is_mysql );
|
||
|
||
self::$_wpdb->query( $create );
|
||
|
||
$columns = array_keys( $columns );
|
||
foreach ( $columns as $column => $charset ) {
|
||
$this->assertEquals( false, self::$_wpdb->get_col_charset( $table, $column ) );
|
||
}
|
||
|
||
self::$_wpdb->query( $drop );
|
||
|
||
self::$_wpdb->is_mysql = true;
|
||
}
|
||
|
||
/**
|
||
* @ticket 21212
|
||
*/
|
||
function data_strip_invalid_text_from_query() {
|
||
$table_name = 'strip_invalid_text_from_query_table';
|
||
$data = array(
|
||
array(
|
||
// binary tables don't get stripped
|
||
'( a VARCHAR(50) CHARACTER SET utf8, b BINARY )', // create
|
||
"('foo\xf0\x9f\x98\x88bar', 'foo')", // query
|
||
"('foo\xf0\x9f\x98\x88bar', 'foo')", // expected result
|
||
),
|
||
array(
|
||
// utf8/utf8mb4 tables default to utf8
|
||
'( a VARCHAR(50) CHARACTER SET utf8, b VARCHAR(50) CHARACTER SET utf8mb4 )',
|
||
"('foo\xf0\x9f\x98\x88bar', 'foo')",
|
||
"('foobar', 'foo')",
|
||
),
|
||
);
|
||
|
||
foreach ( $data as $i => &$value ) {
|
||
$this_table_name = $table_name . '_' . $i;
|
||
|
||
$value[0] = "CREATE TABLE $this_table_name {$value[0]}";
|
||
$value[1] = "INSERT INTO $this_table_name VALUES {$value[1]}";
|
||
$value[2] = "INSERT INTO $this_table_name VALUES {$value[2]}";
|
||
$value[3] = "DROP TABLE IF EXISTS $this_table_name";
|
||
}
|
||
unset( $value );
|
||
|
||
return $data;
|
||
}
|
||
|
||
/**
|
||
* @dataProvider data_strip_invalid_text_from_query
|
||
* @ticket 21212
|
||
*/
|
||
function test_strip_invalid_text_from_query( $create, $query, $expected, $drop ) {
|
||
self::$_wpdb->query( $drop );
|
||
|
||
if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) {
|
||
$this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." );
|
||
return;
|
||
}
|
||
|
||
self::$_wpdb->query( $create );
|
||
|
||
$return = self::$_wpdb->strip_invalid_text_from_query( $query );
|
||
$this->assertEquals( $expected, $return );
|
||
|
||
self::$_wpdb->query( $drop );
|
||
}
|
||
|
||
/**
|
||
* @ticket 32104
|
||
*/
|
||
function data_dont_strip_text_from_schema_queries() {
|
||
// An obviously invalid and fake table name.
|
||
$table_name = "\xff\xff\xff\xff";
|
||
|
||
$queries = array(
|
||
"SHOW CREATE TABLE $table_name",
|
||
"DESCRIBE $table_name",
|
||
"DESC $table_name",
|
||
"EXPLAIN SELECT * FROM $table_name",
|
||
"CREATE $table_name( a VARCHAR(100))",
|
||
);
|
||
|
||
foreach ( $queries as &$query ) {
|
||
$query = array( $query );
|
||
}
|
||
unset( $query );
|
||
|
||
return $queries;
|
||
}
|
||
|
||
/**
|
||
* @dataProvider data_dont_strip_text_from_schema_queries
|
||
* @ticket 32104
|
||
*/
|
||
function test_dont_strip_text_from_schema_queries( $query ) {
|
||
$return = self::$_wpdb->strip_invalid_text_from_query( $query );
|
||
$this->assertEquals( $query, $return );
|
||
}
|
||
|
||
/**
|
||
* @ticket 21212
|
||
*/
|
||
function test_invalid_characters_in_query() {
|
||
global $wpdb;
|
||
|
||
$charset = $wpdb->get_col_charset( $wpdb->posts, 'post_content' );
|
||
if ( 'utf8' !== $charset && 'utf8mb4' !== $charset ) {
|
||
$this->markTestSkipped( 'This test requires a utf8 character set' );
|
||
}
|
||
|
||
$this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\xff\xff\xffbar')" ) );
|
||
}
|
||
|
||
/**
|
||
* @ticket 21212
|
||
*/
|
||
function data_table_collation_check() {
|
||
$table_name = 'table_collation_check';
|
||
$data = array(
|
||
array(
|
||
// utf8_bin tables don't need extra sanity checking.
|
||
'( a VARCHAR(50) COLLATE utf8_bin )', // create
|
||
true, // expected result
|
||
),
|
||
array(
|
||
// Neither do utf8_general_ci tables.
|
||
'( a VARCHAR(50) COLLATE utf8_general_ci )',
|
||
true,
|
||
),
|
||
array(
|
||
// utf8_unicode_ci tables do.
|
||
'( a VARCHAR(50) COLLATE utf8_unicode_ci )',
|
||
false,
|
||
),
|
||
array(
|
||
// utf8_bin tables don't need extra sanity checking,
|
||
// except for when they're not just utf8_bin.
|
||
'( a VARCHAR(50) COLLATE utf8_bin, b VARCHAR(50) COLLATE big5_chinese_ci )',
|
||
false,
|
||
),
|
||
array(
|
||
// utf8_bin tables don't need extra sanity checking
|
||
// when the other columns aren't strings.
|
||
'( a VARCHAR(50) COLLATE utf8_bin, b INT )',
|
||
true,
|
||
),
|
||
);
|
||
|
||
foreach ( $data as $i => &$value ) {
|
||
$this_table_name = $table_name . '_' . $i;
|
||
|
||
$value[0] = "CREATE TABLE $this_table_name {$value[0]}";
|
||
$value[2] = "SELECT * FROM $this_table_name WHERE a='\xf0\x9f\x98\x88'";
|
||
$value[3] = "DROP TABLE IF EXISTS $this_table_name";
|
||
$value[4] = array(
|
||
"SELECT * FROM $this_table_name WHERE a='foo'",
|
||
"SHOW FULL TABLES LIKE $this_table_name",
|
||
"DESCRIBE $this_table_name",
|
||
"DESC $this_table_name",
|
||
"EXPLAIN SELECT * FROM $this_table_name",
|
||
);
|
||
}
|
||
unset( $value );
|
||
|
||
return $data;
|
||
}
|
||
|
||
|
||
/**
|
||
* @dataProvider data_table_collation_check
|
||
* @ticket 21212
|
||
*/
|
||
function test_table_collation_check( $create, $expected, $query, $drop, $always_true ) {
|
||
self::$_wpdb->query( $drop );
|
||
|
||
self::$_wpdb->query( $create );
|
||
|
||
$return = self::$_wpdb->check_safe_collation( $query );
|
||
$this->assertEquals( $expected, $return );
|
||
|
||
foreach ( $always_true as $true_query ) {
|
||
$return = self::$_wpdb->check_safe_collation( $true_query );
|
||
$this->assertTrue( $return );
|
||
}
|
||
|
||
self::$_wpdb->query( $drop );
|
||
}
|
||
|
||
function test_strip_invalid_text_for_column_bails_if_ascii_input_too_long() {
|
||
global $wpdb;
|
||
|
||
// TEXT column
|
||
$stripped = $wpdb->strip_invalid_text_for_column( $wpdb->comments, 'comment_content', str_repeat( 'A', 65536 ) );
|
||
$this->assertEquals( 65535, strlen( $stripped ) );
|
||
|
||
// VARCHAR column
|
||
$stripped = $wpdb->strip_invalid_text_for_column( $wpdb->comments, 'comment_agent', str_repeat( 'A', 256 ) );
|
||
$this->assertEquals( 255, strlen( $stripped ) );
|
||
}
|
||
|
||
/**
|
||
* @ticket 32279
|
||
*/
|
||
function test_strip_invalid_text_from_query_cp1251_is_safe() {
|
||
$tablename = 'test_cp1251_query_' . rand_str( 5 );
|
||
if ( ! self::$_wpdb->query( "CREATE TABLE $tablename ( a VARCHAR(50) ) DEFAULT CHARSET 'cp1251'" ) ) {
|
||
$this->markTestSkipped( "Test requires the 'cp1251' charset" );
|
||
}
|
||
|
||
$safe_query = "INSERT INTO $tablename( `a` ) VALUES( 'safe data' )";
|
||
$stripped_query = self::$_wpdb->strip_invalid_text_from_query( $safe_query );
|
||
|
||
self::$_wpdb->query( "DROP TABLE $tablename" );
|
||
|
||
$this->assertEquals( $safe_query, $stripped_query );
|
||
}
|
||
|
||
/**
|
||
* @ticket 34708
|
||
*/
|
||
function test_no_db_charset_defined() {
|
||
$tablename = 'test_cp1251_query_' . rand_str( 5 );
|
||
if ( ! self::$_wpdb->query( "CREATE TABLE $tablename ( a VARCHAR(50) ) DEFAULT CHARSET 'cp1251'" ) ) {
|
||
$this->markTestSkipped( "Test requires the 'cp1251' charset" );
|
||
}
|
||
|
||
$charset = self::$_wpdb->charset;
|
||
self::$_wpdb->charset = '';
|
||
|
||
$safe_query = "INSERT INTO $tablename( `a` ) VALUES( 'safe data' )";
|
||
$stripped_query = self::$_wpdb->strip_invalid_text_from_query( $safe_query );
|
||
|
||
self::$_wpdb->query( "DROP TABLE $tablename" );
|
||
|
||
self::$_wpdb->charset = $charset;
|
||
|
||
$this->assertEquals( $safe_query, $stripped_query );
|
||
}
|
||
|
||
/**
|
||
* @ticket 36649
|
||
*/
|
||
function test_set_charset_changes_the_connection_collation() {
|
||
self::$_wpdb->set_charset( self::$_wpdb->dbh, 'utf8', 'utf8_general_ci' );
|
||
$results = self::$_wpdb->get_results( "SHOW VARIABLES WHERE Variable_name='collation_connection'" );
|
||
$this->assertEquals( 'utf8_general_ci', $results[0]->Value );
|
||
|
||
self::$_wpdb->set_charset( self::$_wpdb->dbh, 'utf8mb4', 'utf8mb4_unicode_ci' );
|
||
$results = self::$_wpdb->get_results( "SHOW VARIABLES WHERE Variable_name='collation_connection'" );
|
||
$this->assertEquals( 'utf8mb4_unicode_ci', $results[0]->Value );
|
||
|
||
self::$_wpdb->set_charset( self::$_wpdb->dbh );
|
||
}
|
||
}
|