diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php index 6bf91277b0..457cd627ea 100644 --- a/src/wp-includes/formatting.php +++ b/src/wp-includes/formatting.php @@ -219,43 +219,8 @@ function wptexturize( $text, $reset = false ) { preg_match_all( '@\[/?([^<>&/\[\]\x00-\x20]++)@', $text, $matches ); $tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] ); $found_shortcodes = ! empty( $tagnames ); - if ( $found_shortcodes ) { - $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) ); - $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex(). - $shortcode_regex = - '\[' // Find start of shortcode. - . '[\/\[]?' // Shortcodes may begin with [/ or [[ - . $tagregexp // Only match registered shortcodes, because performance. - . '(?:' - . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. - . '|' - . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. - . ')*+' // Possessive critical. - . '\]' // Find end of shortcode. - . '\]?'; // Shortcodes may end with ]] - } - - $comment_regex = - '!' // Start of comment, after the <. - . '(?:' // Unroll the loop: Consume everything until --> is found. - . '-(?!->)' // Dash not followed by end of comment. - . '[^\-]*+' // Consume non-dashes. - . ')*+' // Loop possessively. - . '(?:-->)?'; // End of comment. If not found, match all input. - - $html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap. - '<' // Find start of element. - . '(?(?=!--)' // Is this a comment? - . $comment_regex // Find end of comment. - . '|' - . '[^>]*>?' // Find end of element. If not found, match all input. - . ')'; - - if ( $found_shortcodes ) { - $regex = '/(' . $html_regex . '|' . $shortcode_regex . ')/s'; - } else { - $regex = '/(' . $html_regex . ')/s'; - } + $shortcode_regex = $found_shortcodes ? _get_wptexturize_shortcode_regex( $tagnames ) : ''; + $regex = _get_wptexturize_split_regex( $shortcode_regex ); $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); @@ -264,7 +229,7 @@ function wptexturize( $text, $reset = false ) { $first = $curl[0]; if ( '<' === $first ) { if ( ' is found. + . '-(?!->)' // Dash not followed by end of comment. + . '[^\-]*+' // Consume non-dashes. + . ')*+' // Loop possessively. + . '(?:-->)?'; // End of comment. If not found, match all input. + + $html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap. + '<' // Find start of element. + . '(?(?=!--)' // Is this a comment? + . $comment_regex // Find end of comment. + . '|' + . '[^>]*>?' // Find end of element. If not found, match all input. + . ')'; + } + + if ( empty( $shortcode_regex ) ) { + $regex = '/(' . $html_regex . ')/'; + } else { + $regex = '/(' . $html_regex . '|' . $shortcode_regex . ')/'; + } + + return $regex; +} + +/** + * Retrieve the regular expression for shortcodes. + * + * @access private + * @ignore + * @internal This function will be removed in 4.5.0 per Shortcode API Roadmap. + * @since 4.4.0 + * + * @param array $tagnames List of shortcodes to find. + * @return string The regular expression + */ +function _get_wptexturize_shortcode_regex( $tagnames ) { + $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) ); + $tagregexp = "(?:$tagregexp)(?=[\\s\\]\\/])"; // Excerpt of get_shortcode_regex(). + $regex = + '\[' // Find start of shortcode. + . '[\/\[]?' // Shortcodes may begin with [/ or [[ + . $tagregexp // Only match registered shortcodes, because performance. + . '(?:' + . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. + . '|' + . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. + . ')*+' // Possessive critical. + . '\]' // Find end of shortcode. + . '\]?'; // Shortcodes may end with ]] + + return $regex; } /** @@ -768,7 +822,7 @@ function shortcode_unautop( $pee ) { . ')' . '(?:' . $spaces . ')*+' // optional trailing whitespace . '<\\/p>' // closing paragraph - . '/s'; + . '/'; return preg_replace( $pattern, '$1', $pee ); } diff --git a/src/wp-includes/shortcodes.php b/src/wp-includes/shortcodes.php index f7ce887827..3959818575 100644 --- a/src/wp-includes/shortcodes.php +++ b/src/wp-includes/shortcodes.php @@ -168,7 +168,7 @@ function has_shortcode( $content, $tag ) { } if ( shortcode_exists( $tag ) ) { - preg_match_all( '/' . get_shortcode_regex() . '/s', $content, $matches, PREG_SET_ORDER ); + preg_match_all( '/' . get_shortcode_regex() . '/', $content, $matches, PREG_SET_ORDER ); if ( empty( $matches ) ) return false; @@ -219,7 +219,7 @@ function do_shortcode( $content, $ignore_html = false ) { $content = do_shortcodes_in_html_tags( $content, $ignore_html, $tagnames ); $pattern = get_shortcode_regex( $tagnames ); - $content = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content ); + $content = preg_replace_callback( "/$pattern/", 'do_shortcode_tag', $content ); // Always restore square braces so we don't break things like + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Lorem ipsumLorem ipsum
RequirementsMinimumsStatusMinimumsStatus
AgeLorem ipsumYesLorem ipsumYes
TotalLorem ipsum100%Lorem ipsum100%
EducationLorem ipsumYesn/a
ConcentrationLorem ipsumYesn/a
CertificationLorem ipsumNon/a
Recognized CourseworkLorem ipsumTBDn/a
Lorem ipsumLorem ipsumYesn/a
Lorem ipsumLorem ipsumYesn/a
Lorem ipsumLorem ipsumYesn/a
Lorem ipsumLorem ipsumYesn/a
Lorem ipsumLorem ipsum80%200 hours80%
Lorem ipsumLorem ipsumby credit100 hoursby credit
Lorem ipsumLorem ipsum60%50 hours60%
Lorem ipsumLorem ipsumby credit75 hoursby credit
Lorem ipsumLorem ipsumYes250 hoursYes
Lorem ipsumLorem ipsumYes100 hoursYes
Lorem ipsumLorem ipsumYes25 hoursYes
Lorem ipsumEffective Aug 2014n/an/a
Lorem ipsumLorem ipsumDec \'14Dec \'14
Lorem ipsumLorem ipsumYes2nd ClassYes
+Tota offendit sea et, ne vim dicam admodum blandit. No vix quas nusquam. Putant scripta fierent mei ea. Eum ei putant persius probatus, quo ea wisi electram. + + Eu illud definiebas honestatis sit. Eam exerci deseruisse ei, ex cum erant tacimates. Enim eros id vel, vidisse abhorreant cu eum. Et saperet appellantur est, eum esse soluta recusabo ad. Eam malis sensibus ea. + + Exerci scripta at est. His ei nostrum perfecto, accumsan eligendi tincidunt an eum. Ius tempor aperiam ea, mei autem lorem eu. Vis duis modus ornatus no, alia malis ornatus mea et, ea eros probatus qui. + + Mazim assentior mel te, rebum periculis constituam nec ut. In ferri admodum deleniti eum, nam te quas nominati appellantur. Est at erat pertinax, no sit nulla placerat. Munere euripidis ad has. + + Magna graeco oblique vel ea, no movet aliquando mea. Eum no sadipscing delicatissimi, doctus consequuntur eu sed. Sed in persius eleifend, regione euismod no per. Ei pri vivendum gubergren, vix at eligendi invenire aliquando, brute malorum id usu. Cum tantas prodesset consequuntur ei, eum liberavisse delicatissimi vituperatoribus at. + + Cu bonorum graecis ius. Duo id ancillae probatus. Tota latine pri an. Cum ei iudico semper. Eum in blandit voluptaria. + + Illud debet vitae ex vis numquam.'), + +/* DIVIDER */ + +);}?> \ No newline at end of file diff --git a/tests/phpunit/includes/utils.php b/tests/phpunit/includes/utils.php index baebfe0303..0696c5b4ac 100644 --- a/tests/phpunit/includes/utils.php +++ b/tests/phpunit/includes/utils.php @@ -390,3 +390,59 @@ class wpdb_exposed_methods_for_testing extends wpdb { return call_user_func_array( array( $this, $name ), $arguments ); } } + +/** + * Determine approximate backtrack count when running PCRE. + * + * @return int The backtrack count. + */ +function benchmark_pcre_backtracking( $pattern, $subject, $strategy ) { + $saved_config = ini_get( 'pcre.backtrack_limit' ); + + // Attempt to prevent PHP crashes. Adjust these lower when needed. + if ( version_compare( phpversion(), '5.4.8', '>' ) ) { + $limit = 1000000; + } else { + $limit = 20000; // 20,000 is a reasonable upper limit, but see also https://core.trac.wordpress.org/ticket/29557#comment:10 + } + + // Start with small numbers, so if a crash is encountered at higher numbers we can still debug the problem. + for( $i = 4; $i <= $limit; $i *= 2 ) { + + ini_set( 'pcre.backtrack_limit', $i ); + + switch( $strategy ) { + case 'split': + preg_split( $pattern, $subject ); + break; + case 'match': + preg_match( $pattern, $subject ); + break; + case 'match_all': + preg_match_all( $pattern, $subject ); + break; + } + + ini_set( 'pcre.backtrack_limit', $saved_config ); + + switch( preg_last_error() ) { + case PREG_NO_ERROR: + return $i; + case PREG_BACKTRACK_LIMIT_ERROR: + continue; + case PREG_RECURSION_LIMIT_ERROR: + trigger_error('PCRE recursion limit encountered before backtrack limit.'); + break; + case PREG_BAD_UTF8_ERROR: + trigger_error('UTF-8 error during PCRE benchmark.'); + break; + case PREG_INTERNAL_ERROR: + trigger_error('Internal error during PCRE benchmark.'); + break; + default: + trigger_error('Unexpected error during PCRE benchmark.'); + } + } + + return $i; +} \ No newline at end of file diff --git a/tests/phpunit/tests/formatting/WPTexturize.php b/tests/phpunit/tests/formatting/WPTexturize.php index 7ea94498df..f4b8ef6d67 100644 --- a/tests/phpunit/tests/formatting/WPTexturize.php +++ b/tests/phpunit/tests/formatting/WPTexturize.php @@ -2048,4 +2048,29 @@ String with a number followed by a single quote !q1!Expendables 3!q1! vestibulum ), ); } + + /** + * Automated performance testing of the main regex. + * + * @dataProvider data_whole_posts + */ + function test_pcre_performance( $input ) { + global $shortcode_tags; + + // With Shortcodes Disabled + $regex = _get_wptexturize_split_regex( ); + $result = benchmark_pcre_backtracking( $regex, $input, 'split' ); + $this->assertLessThan( 200, $result ); + + // With Shortcodes Enabled + $shortcode_regex = _get_wptexturize_shortcode_regex( array_keys( $shortcode_tags ) ); + $regex = _get_wptexturize_split_regex( $shortcode_regex ); + $result = benchmark_pcre_backtracking( $regex, $input, 'split' ); + return $this->assertLessThan( 200, $result ); + } + + function data_whole_posts() { + require_once( DIR_TESTDATA . '/formatting/whole-posts.php' ); + return data_whole_posts(); + } } \ No newline at end of file diff --git a/tests/phpunit/tests/shortcode.php b/tests/phpunit/tests/shortcode.php index 680dee3441..19d554ca6e 100644 --- a/tests/phpunit/tests/shortcode.php +++ b/tests/phpunit/tests/shortcode.php @@ -616,4 +616,20 @@ EOF; ), ); } + + /** + * Automated performance testing of the main regex. + * + * @dataProvider data_whole_posts + */ + function test_pcre_performance( $input ) { + $regex = '/' . get_shortcode_regex() . '/'; + $result = benchmark_pcre_backtracking( $regex, $input, 'match_all' ); + return $this->assertLessThan( 200, $result ); + } + + function data_whole_posts() { + require_once( DIR_TESTDATA . '/formatting/whole-posts.php' ); + return data_whole_posts(); + } }