mirror of
https://github.com/gosticks/wordpress-develop.git
synced 2026-04-04 20:54:29 +00:00
WPDB: When checking that a string can be sent to MySQL, we shouldn't use mb_convert_encoding(), as it behaves differently to MySQL's character encoding conversion.
Props mdawaffe, pento, nbachiyski, jorbin, johnjamesjacoby, jeremyfelt. See #32165. git-svn-id: https://develop.svn.wordpress.org/trunk@32364 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
@@ -13,23 +13,85 @@ if ( !function_exists('_') ) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether PCRE/u (PCRE_UTF8 modifier) is available for use.
|
||||
*
|
||||
* @ignore
|
||||
* @since 4.2.2
|
||||
* @access private
|
||||
*
|
||||
* @param bool $set - Used for testing only
|
||||
* null : default - get PCRE/u capability
|
||||
* false : Used for testing - return false for future calls to this function
|
||||
* 'reset': Used for testing - restore default behavior of this function
|
||||
*/
|
||||
function _wp_can_use_pcre_u( $set = null ) {
|
||||
static $utf8_pcre = 'reset';
|
||||
|
||||
if ( null !== $set ) {
|
||||
$utf8_pcre = $set;
|
||||
}
|
||||
|
||||
if ( 'reset' === $utf8_pcre ) {
|
||||
$utf8_pcre = @preg_match( '/^./u', 'a' );
|
||||
}
|
||||
|
||||
return $utf8_pcre;
|
||||
}
|
||||
|
||||
if ( ! function_exists( 'mb_substr' ) ) :
|
||||
function mb_substr( $str, $start, $length = null, $encoding = null ) {
|
||||
return _mb_substr( $str, $start, $length, $encoding );
|
||||
}
|
||||
endif;
|
||||
|
||||
/*
|
||||
* Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit.
|
||||
* For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence.
|
||||
* The behavior of this function for invalid inputs is undefined.
|
||||
*/
|
||||
function _mb_substr( $str, $start, $length = null, $encoding = null ) {
|
||||
if ( null === $encoding ) {
|
||||
$encoding = get_option( 'blog_charset' );
|
||||
}
|
||||
|
||||
// The solution below works only for UTF-8,
|
||||
// so in case of a different charset just use built-in substr()
|
||||
$charset = get_option( 'blog_charset' );
|
||||
if ( ! in_array( $charset, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
|
||||
if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
|
||||
return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length );
|
||||
}
|
||||
// Use the regex unicode support to separate the UTF-8 characters into an array
|
||||
preg_match_all( '/./us', $str, $match );
|
||||
$chars = is_null( $length ) ? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length );
|
||||
return implode( '', $chars );
|
||||
|
||||
if ( _wp_can_use_pcre_u() ) {
|
||||
// Use the regex unicode support to separate the UTF-8 characters into an array
|
||||
preg_match_all( '/./us', $str, $match );
|
||||
$chars = is_null( $length ) ? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length );
|
||||
return implode( '', $chars );
|
||||
}
|
||||
|
||||
$regex = '/(
|
||||
[\x00-\x7F] # single-byte sequences 0xxxxxxx
|
||||
| [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx
|
||||
| \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2
|
||||
| [\xE1-\xEC][\x80-\xBF]{2}
|
||||
| \xED[\x80-\x9F][\x80-\xBF]
|
||||
| [\xEE-\xEF][\x80-\xBF]{2}
|
||||
| \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3
|
||||
| [\xF1-\xF3][\x80-\xBF]{3}
|
||||
| \xF4[\x80-\x8F][\x80-\xBF]{2}
|
||||
)/x';
|
||||
|
||||
$chars = array( '' ); // Start with 1 element instead of 0 since the first thing we do is pop
|
||||
do {
|
||||
// We had some string left over from the last round, but we counted it in that last round.
|
||||
array_pop( $chars );
|
||||
|
||||
// Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string)
|
||||
$pieces = preg_split( $regex, $str, 1000, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
|
||||
|
||||
$chars = array_merge( $chars, $pieces );
|
||||
} while ( count( $pieces ) > 1 && $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop.
|
||||
|
||||
return join( '', array_slice( $chars, $start, $length ) );
|
||||
}
|
||||
|
||||
if ( ! function_exists( 'mb_strlen' ) ) :
|
||||
@@ -38,16 +100,54 @@ if ( ! function_exists( 'mb_strlen' ) ) :
|
||||
}
|
||||
endif;
|
||||
|
||||
/*
|
||||
* Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit.
|
||||
* For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence.
|
||||
* The behavior of this function for invalid inputs is undefined.
|
||||
*/
|
||||
function _mb_strlen( $str, $encoding = null ) {
|
||||
if ( null === $encoding ) {
|
||||
$encoding = get_option( 'blog_charset' );
|
||||
}
|
||||
|
||||
// The solution below works only for UTF-8,
|
||||
// so in case of a different charset just use built-in strlen()
|
||||
$charset = get_option( 'blog_charset' );
|
||||
if ( ! in_array( $charset, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
|
||||
if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
|
||||
return strlen( $str );
|
||||
}
|
||||
// Use the regex unicode support to separate the UTF-8 characters into an array
|
||||
preg_match_all( '/./us', $str, $match );
|
||||
return count( $match[0] );
|
||||
|
||||
if ( _wp_can_use_pcre_u() ) {
|
||||
// Use the regex unicode support to separate the UTF-8 characters into an array
|
||||
preg_match_all( '/./us', $str, $match );
|
||||
return count( $match[0] );
|
||||
}
|
||||
|
||||
$regex = '/(?:
|
||||
[\x00-\x7F] # single-byte sequences 0xxxxxxx
|
||||
| [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx
|
||||
| \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2
|
||||
| [\xE1-\xEC][\x80-\xBF]{2}
|
||||
| \xED[\x80-\x9F][\x80-\xBF]
|
||||
| [\xEE-\xEF][\x80-\xBF]{2}
|
||||
| \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3
|
||||
| [\xF1-\xF3][\x80-\xBF]{3}
|
||||
| \xF4[\x80-\x8F][\x80-\xBF]{2}
|
||||
)/x';
|
||||
|
||||
$count = 1; // Start at 1 instead of 0 since the first thing we do is decrement
|
||||
do {
|
||||
// We had some string left over from the last round, but we counted it in that last round.
|
||||
$count--;
|
||||
|
||||
// Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string)
|
||||
$pieces = preg_split( $regex, $str, 1000 );
|
||||
|
||||
// Increment
|
||||
$count += count( $pieces );
|
||||
} while ( $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop.
|
||||
|
||||
// Fencepost: preg_split() always returns one extra item in the array
|
||||
return --$count;
|
||||
}
|
||||
|
||||
if ( !function_exists('hash_hmac') ):
|
||||
|
||||
Reference in New Issue
Block a user