WPDB: When checking that a string can be sent to MySQL, we shouldn't use mb_convert_encoding(), as it behaves differently to MySQL's character encoding conversion.

Props mdawaffe, pento, nbachiyski, jorbin, johnjamesjacoby, jeremyfelt.

See #32165.



git-svn-id: https://develop.svn.wordpress.org/trunk@32364 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Gary Pendergast
2015-05-06 02:59:50 +00:00
parent 35db6d722b
commit 2ce97b2984
9 changed files with 622 additions and 106 deletions

View File

@@ -121,7 +121,8 @@ class Tests_Comment extends WP_UnitTestCase {
$_SERVER['REMOTE_ADDR'] = '';
}
$post_id = $this->factory->post->create();
$u = $this->factory->user->create();
$post_id = $this->factory->post->create( array( 'post_author' => $u ) );
$data = array(
'comment_post_ID' => $post_id,
@@ -136,7 +137,9 @@ class Tests_Comment extends WP_UnitTestCase {
$id = wp_new_comment( $data );
$this->assertFalse( $id );
$comment = get_comment( $id );
$this->assertEquals( strlen( $comment->comment_content ), 65535 );
// Cleanup.
if ( isset( $remote_addr ) ) {

View File

@@ -2,13 +2,166 @@
/**
* @group compat
* @group security-153
*/
class Tests_Compat extends WP_UnitTestCase {
function test_mb_substr() {
$this->assertEquals('баб', _mb_substr('баба', 0, 3));
$this->assertEquals('баб', _mb_substr('баба', 0, -1));
$this->assertEquals('баб', _mb_substr('баба', 0, -1));
$this->assertEquals('I am your б', _mb_substr('I am your баба', 0, 11));
function utf8_string_lengths() {
return array(
// string, character_length, byte_length
array( 'баба', 4, 8 ),
array( 'баб', 3, 6 ),
array( 'I am your б', 11, 12 ),
array( '1111111111', 10, 10 ),
array( '²²²²²²²²²²', 10, 20 ),
array( '', 10, 30 ),
array( '𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜', 10, 40 ),
array( '1²𝟜𝟜𝟜', 12, 30 ),
);
}
function utf8_substrings() {
return array(
// string, start, length, character_substring, byte_substring
array( 'баба', 0, 3, 'баб', "б\xD0" ),
array( 'баба', 0, -1, 'баб', "баб\xD0" ),
array( 'баба', 1, null, 'аба', "\xB1аба" ),
array( 'баба', -3, null, 'аба', "\xB1а" ),
array( 'баба', -3, 2, 'аб', "\xB1\xD0" ),
array( 'баба', -1, 2, 'а', "\xB0" ),
array( 'I am your баба', 0, 11, 'I am your б', "I am your \xD0" ),
);
}
/**
* @dataProvider utf8_string_lengths
*/
function test_mb_strlen( $string, $expected_character_length ) {
$this->assertEquals( $expected_character_length, _mb_strlen( $string, 'UTF-8' ) );
}
/**
* @dataProvider utf8_string_lengths
*/
function test_mb_strlen_via_regex( $string, $expected_character_length ) {
_wp_can_use_pcre_u( false );
$this->assertEquals( $expected_character_length, _mb_strlen( $string, 'UTF-8' ) );
_wp_can_use_pcre_u( 'reset' );
}
/**
* @dataProvider utf8_string_lengths
*/
function test_8bit_mb_strlen( $string, $expected_character_length, $expected_byte_length ) {
$this->assertEquals( $expected_byte_length, _mb_strlen( $string, '8bit' ) );
}
/**
* @dataProvider utf8_substrings
*/
function test_mb_substr( $string, $start, $length, $expected_character_substring ) {
$this->assertEquals( $expected_character_substring, _mb_substr( $string, $start, $length, 'UTF-8' ) );
}
/**
* @dataProvider utf8_substrings
*/
function test_mb_substr_via_regex( $string, $start, $length, $expected_character_substring ) {
_wp_can_use_pcre_u( false );
$this->assertEquals( $expected_character_substring, _mb_substr( $string, $start, $length, 'UTF-8' ) );
_wp_can_use_pcre_u( 'reset' );
}
/**
* @dataProvider utf8_substrings
*/
function test_8bit_mb_substr( $string, $start, $length, $expected_character_substring, $expected_byte_substring ) {
$this->assertEquals( $expected_byte_substring, _mb_substr( $string, $start, $length, '8bit' ) );
}
function test_mb_substr_phpcore(){
/* https://github.com/php/php-src/blob/php-5.6.8/ext/mbstring/tests/mb_substr_basic.phpt */
$string_ascii = 'ABCDEF';
$string_mb = base64_decode('5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII=');
$this->assertEquals( 'DEF', _mb_substr($string_ascii, 3) );
$this->assertEquals( 'DEF', _mb_substr($string_ascii, 3, 5, 'ISO-8859-1') );
// specific latin-1 as that is the default the core php test opporates under
$this->assertEquals( 'peacrOiqng==' , base64_encode( _mb_substr($string_mb, 2, 7, 'latin-1' ) ) );
$this->assertEquals( '6Kqe44OG44Kt44K544OI44Gn44GZ', base64_encode( _mb_substr($string_mb, 2, 7, 'utf-8') ) );
/* https://github.com/php/php-src/blob/php-5.6.8/ext/mbstring/tests/mb_substr_variation1.phpt */
$start = 0;
$length = 5;
$unset_var = 10;
unset ($unset_var);
$heredoc = <<<EOT
hello world
EOT;
$inputs = array(
/*1*/ 0,
1,
12345,
-2345,
// float data
/*5*/ 10.5,
-10.5,
12.3456789000e10,
12.3456789000E-10,
.5,
// null data
/*10*/ NULL,
null,
// boolean data
/*12*/ true,
false,
TRUE,
FALSE,
// empty data
/*16*/ "",
'',
// string data
/*18*/ "string",
'string',
$heredoc,
// object data
/*21*/ new classA(),
// undefined data
/*22*/ @$undefined_var,
// unset data
/*23*/ @$unset_var,
);
$outputs = array(
"0",
"1",
"12345",
"-2345",
"10.5",
"-10.5",
"12345",
"1.234",
"0.5",
"",
"",
"1",
"",
"1",
"",
"",
"",
"strin",
"strin",
"hello",
"Class",
"",
"",
);
$iterator = 0;
foreach($inputs as $input) {
$this->assertEquals( $outputs[$iterator] , _mb_substr($input, $start, $length) );
$iterator++;
}
}
function test_hash_hmac_simple() {
@@ -34,3 +187,10 @@ class Tests_Compat extends WP_UnitTestCase {
$this->assertEquals( array( 'foo' ), $json->decode( '["foo"]' ) );
}
}
/* used in test_mb_substr_phpcore */
class classA {
public function __toString() {
return "Class A object";
}
}

View File

@@ -746,7 +746,6 @@ class Tests_DB extends WP_UnitTestCase {
'value' => '¡foo foo foo!',
'format' => '%s',
'charset' => $expected_charset,
'ascii' => false,
'length' => $wpdb->get_col_length( $wpdb->posts, 'post_content' ),
)
);

View File

@@ -6,6 +6,7 @@ require_once dirname( dirname( __FILE__ ) ) . '/db.php';
* Test WPDB methods
*
* @group wpdb
* @group security-153
*/
class Tests_DB_Charset extends WP_UnitTestCase {
@@ -28,57 +29,227 @@ class Tests_DB_Charset extends WP_UnitTestCase {
// latin1. latin1 never changes.
'charset' => 'latin1',
'value' => "\xf0\x9f\x8e\xb7",
'expected' => "\xf0\x9f\x8e\xb7"
'expected' => "\xf0\x9f\x8e\xb7",
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'latin1_char_length' => array(
// latin1. latin1 never changes.
'charset' => 'latin1',
'value' => str_repeat( 'A', 11 ),
'expected' => str_repeat( 'A', 10 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'latin1_byte_length' => array(
// latin1. latin1 never changes.
'charset' => 'latin1',
'value' => str_repeat( 'A', 11 ),
'expected' => str_repeat( 'A', 10 ),
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'ascii' => array(
// ascii gets special treatment, make sure it's covered
'charset' => 'ascii',
'value' => 'Hello World',
'expected' => 'Hello World'
'expected' => 'Hello World',
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'ascii_char_length' => array(
// ascii gets special treatment, make sure it's covered
'charset' => 'ascii',
'value' => str_repeat( 'A', 11 ),
'expected' => str_repeat( 'A', 10 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'ascii_byte_length' => array(
// ascii gets special treatment, make sure it's covered
'charset' => 'ascii',
'value' => str_repeat( 'A', 11 ),
'expected' => str_repeat( 'A', 10 ),
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'utf8' => array(
// utf8 only allows <= 3-byte chars
'charset' => 'utf8',
'value' => "H€llo\xf0\x9f\x98\x88World¢",
'expected' => 'H€lloWorld¢'
'expected' => 'H€lloWorld¢',
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'utf8_23char_length' => array(
// utf8 only allows <= 3-byte chars
'charset' => 'utf8',
'value' => str_repeat( "²3", 10 ),
'expected' => str_repeat( "²3", 5 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'utf8_23byte_length' => array(
// utf8 only allows <= 3-byte chars
'charset' => 'utf8',
'value' => str_repeat( "²3", 10 ),
'expected' => "²3²3",
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'utf8_3char_length' => array(
// utf8 only allows <= 3-byte chars
'charset' => 'utf8',
'value' => str_repeat( "", 11 ),
'expected' => str_repeat( "", 10 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'utf8_3byte_length' => array(
// utf8 only allows <= 3-byte chars
'charset' => 'utf8',
'value' => str_repeat( "", 11 ),
'expected' => "",
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'utf8mb3' => array(
// utf8mb3 should behave the same an utf8
'charset' => 'utf8mb3',
'value' => "H€llo\xf0\x9f\x98\x88World¢",
'expected' => 'H€lloWorld¢'
'expected' => 'H€lloWorld¢',
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'utf8mb3_23char_length' => array(
// utf8mb3 should behave the same an utf8
'charset' => 'utf8mb3',
'value' => str_repeat( "²3", 10 ),
'expected' => str_repeat( "²3", 5 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'utf8mb3_23byte_length' => array(
// utf8mb3 should behave the same an utf8
'charset' => 'utf8mb3',
'value' => str_repeat( "²3", 10 ),
'expected' => "²3²3",
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'utf8mb3_3char_length' => array(
// utf8mb3 should behave the same an utf8
'charset' => 'utf8mb3',
'value' => str_repeat( "", 11 ),
'expected' => str_repeat( "", 10 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'utf8mb3_3byte_length' => array(
// utf8mb3 should behave the same an utf8
'charset' => 'utf8mb3',
'value' => str_repeat( "", 10 ),
'expected' => "",
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'utf8mb4' => array(
// utf8mb4 allows 4-byte characters, too
'charset' => 'utf8mb4',
'value' => "H€llo\xf0\x9f\x98\x88World¢",
'expected' => "H€llo\xf0\x9f\x98\x88World¢"
'expected' => "H€llo\xf0\x9f\x98\x88World¢",
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'utf8mb4_234char_length' => array(
// utf8mb4 allows 4-byte characters, too
'charset' => 'utf8mb4',
'value' => str_repeat( "²3𝟜", 10 ),
'expected' => "²3𝟜²3𝟜²3𝟜²",
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'utf8mb4_234byte_length' => array(
// utf8mb4 allows 4-byte characters, too
'charset' => 'utf8mb4',
'value' => str_repeat( "²3𝟜", 10 ),
'expected' => "²3𝟜",
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'utf8mb4_4char_length' => array(
// utf8mb4 allows 4-byte characters, too
'charset' => 'utf8mb4',
'value' => str_repeat( "𝟜", 11 ),
'expected' => str_repeat( "𝟜", 10 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'utf8mb4_4byte_length' => array(
// utf8mb4 allows 4-byte characters, too
'charset' => 'utf8mb4',
'value' => str_repeat( "𝟜", 10 ),
'expected' => "𝟜𝟜",
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'koi8r' => array(
'charset' => 'koi8r',
'value' => "\xfdord\xf2ress",
'expected' => "\xfdord\xf2ress",
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'koi8r_char_length' => array(
'charset' => 'koi8r',
'value' => str_repeat( "\xfd\xf2", 10 ),
'expected' => str_repeat( "\xfd\xf2", 5 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'koi8r_byte_length' => array(
'charset' => 'koi8r',
'value' => str_repeat( "\xfd\xf2", 10 ),
'expected' => str_repeat( "\xfd\xf2", 5 ),
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'hebrew' => array(
'charset' => 'hebrew',
'value' => "\xf9ord\xf7ress",
'expected' => "\xf9ord\xf7ress",
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'hebrew_char_length' => array(
'charset' => 'hebrew',
'value' => str_repeat( "\xf9\xf7", 10 ),
'expected' => str_repeat( "\xf9\xf7", 5 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'hebrew_byte_length' => array(
'charset' => 'hebrew',
'value' => str_repeat( "\xf9\xf7", 10 ),
'expected' => str_repeat( "\xf9\xf7", 5 ),
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'cp1251' => array(
'charset' => 'cp1251',
'value' => "\xd8ord\xd0ress",
'expected' => "\xd8ord\xd0ress",
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'cp1251_char_length' => array(
'charset' => 'cp1251',
'value' => str_repeat( "\xd8\xd0", 10 ),
'expected' => str_repeat( "\xd8\xd0", 5 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'cp1251_byte_length' => array(
'charset' => 'cp1251',
'value' => str_repeat( "\xd8\xd0", 10 ),
'expected' => str_repeat( "\xd8\xd0", 5 ),
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'tis620' => array(
'charset' => 'tis620',
'value' => "\xccord\xe3ress",
'expected' => "\xccord\xe3ress",
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'tis620_char_length' => array(
'charset' => 'tis620',
'value' => str_repeat( "\xcc\xe3", 10 ),
'expected' => str_repeat( "\xcc\xe3", 5 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'tis620_byte_length' => array(
'charset' => 'tis620',
'value' => str_repeat( "\xcc\xe3", 10 ),
'expected' => str_repeat( "\xcc\xe3", 5 ),
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'false' => array(
// false is a column with no character set (ie, a number column)
'charset' => false,
'value' => 100,
'expected' => 100
'expected' => 100,
'length' => false,
),
);
@@ -94,7 +265,22 @@ class Tests_DB_Charset extends WP_UnitTestCase {
$fields['big5'] = array(
'charset' => 'big5',
'value' => $big5,
'expected' => $big5
'expected' => $big5,
'length' => array( 'type' => 'char', 'length' => 100 ),
);
$fields['big5_char_length'] = array(
'charset' => 'big5',
'value' => str_repeat( $big5, 10 ),
'expected' => str_repeat( $big5, 3 ) . 'a',
'length' => array( 'type' => 'char', 'length' => 10 ),
);
$fields['big5_byte_length'] = array(
'charset' => 'big5',
'value' => str_repeat( $big5, 10 ),
'expected' => str_repeat( $big5, 2 ) . 'a',
'length' => array( 'type' => 'byte', 'length' => 10 ),
);
}
@@ -170,14 +356,14 @@ class Tests_DB_Charset extends WP_UnitTestCase {
);
$all_ascii_fields = array(
'post_content' => array( 'value' => 'foo foo foo!', 'format' => '%s', 'charset' => false ),
'post_excerpt' => array( 'value' => 'bar bar bar!', 'format' => '%s', 'charset' => false ),
'post_content' => array( 'value' => 'foo foo foo!', 'format' => '%s', 'charset' => $charset ),
'post_excerpt' => array( 'value' => 'bar bar bar!', 'format' => '%s', 'charset' => $charset ),
);
// This is the same data used in process_field_charsets_for_nonexistent_table()
$non_ascii_string_fields = array(
'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s', 'charset' => $charset, 'ascii' => false ),
'post_excerpt' => array( 'value' => '¡bar bar bar!', 'format' => '%s', 'charset' => $charset, 'ascii' => false ),
'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s', 'charset' => $charset ),
'post_excerpt' => array( 'value' => '¡bar bar bar!', 'format' => '%s', 'charset' => $charset ),
);
$vars = get_defined_vars();
@@ -544,4 +730,16 @@ class Tests_DB_Charset extends WP_UnitTestCase {
self::$_wpdb->query( $drop );
}
function test_strip_invalid_test_for_column_bails_if_ascii_input_too_long() {
global $wpdb;
// TEXT column
$stripped = $wpdb->strip_invalid_text_for_column( $wpdb->comments, 'comment_content', str_repeat( 'A', 65536 ) );
$this->assertEquals( 65535, strlen( $stripped ) );
// VARCHAR column
$stripped = $wpdb->strip_invalid_text_for_column( $wpdb->comments, 'comment_agent', str_repeat( 'A', 256 ) );
$this->assertEquals( 255, strlen( $stripped ) );
}
}