mirror of
https://github.com/gosticks/wordpress-develop.git
synced 2026-04-03 12:14:25 +00:00
Shortcodes: Improve the reliablity of shortcodes inside HTML tags.
Props miqrogroove. See #15694. git-svn-id: https://develop.svn.wordpress.org/trunk@33359 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
@@ -182,9 +182,10 @@ function has_shortcode( $content, $tag ) {
|
||||
* @global array $shortcode_tags List of shortcode tags and their callback hooks.
|
||||
*
|
||||
* @param string $content Content to search for shortcodes.
|
||||
* @param bool $ignore_html When true, shortcodes inside HTML elements will be skipped.
|
||||
* @return string Content with shortcodes filtered out.
|
||||
*/
|
||||
function do_shortcode($content) {
|
||||
function do_shortcode( $content, $ignore_html = false ) {
|
||||
global $shortcode_tags;
|
||||
|
||||
if ( false === strpos( $content, '[' ) ) {
|
||||
@@ -194,8 +195,24 @@ function do_shortcode($content) {
|
||||
if (empty($shortcode_tags) || !is_array($shortcode_tags))
|
||||
return $content;
|
||||
|
||||
$tagnames = array_keys($shortcode_tags);
|
||||
$tagregexp = join( '|', array_map('preg_quote', $tagnames) );
|
||||
$pattern = "/\\[($tagregexp)/s";
|
||||
|
||||
if ( 1 !== preg_match( $pattern, $content ) ) {
|
||||
// Avoids parsing HTML when there are no shortcodes or embeds anyway.
|
||||
return $content;
|
||||
}
|
||||
|
||||
$content = do_shortcodes_in_html_tags( $content, $ignore_html );
|
||||
|
||||
$pattern = get_shortcode_regex();
|
||||
return preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
|
||||
$content = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
|
||||
|
||||
// Always restore square braces so we don't break things like <!--[if IE ]>
|
||||
$content = unescape_invalid_shortcodes( $content );
|
||||
|
||||
return $content;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -295,6 +312,141 @@ function do_shortcode_tag( $m ) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Search only inside HTML elements for shortcodes and process them.
|
||||
*
|
||||
* Any [ or ] characters remaining inside elements will be HTML encoded
|
||||
* to prevent interference with shortcodes that are outside the elements.
|
||||
* Assumes $content processed by KSES already. Users with unfiltered_html
|
||||
* capability may get unexpected output if angle braces are nested in tags.
|
||||
*
|
||||
* @since 4.2.3
|
||||
*
|
||||
* @param string $content Content to search for shortcodes
|
||||
* @param bool $ignore_html When true, all square braces inside elements will be encoded.
|
||||
* @return string Content with shortcodes filtered out.
|
||||
*/
|
||||
function do_shortcodes_in_html_tags( $content, $ignore_html ) {
|
||||
// Normalize entities in unfiltered HTML before adding placeholders.
|
||||
$trans = array( '[' => '[', ']' => ']' );
|
||||
$content = strtr( $content, $trans );
|
||||
$trans = array( '[' => '[', ']' => ']' );
|
||||
|
||||
$pattern = get_shortcode_regex();
|
||||
|
||||
$comment_regex =
|
||||
'!' // Start of comment, after the <.
|
||||
. '(?:' // Unroll the loop: Consume everything until --> is found.
|
||||
. '-(?!->)' // Dash not followed by end of comment.
|
||||
. '[^\-]*+' // Consume non-dashes.
|
||||
. ')*+' // Loop possessively.
|
||||
. '(?:-->)?'; // End of comment. If not found, match all input.
|
||||
|
||||
$regex =
|
||||
'/(' // Capture the entire match.
|
||||
. '<' // Find start of element.
|
||||
. '(?(?=!--)' // Is this a comment?
|
||||
. $comment_regex // Find end of comment.
|
||||
. '|'
|
||||
. '[^>]*>?' // Find end of element. If not found, match all input.
|
||||
. ')'
|
||||
. ')/s';
|
||||
|
||||
$textarr = preg_split( $regex, $content, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
|
||||
|
||||
foreach ( $textarr as &$element ) {
|
||||
if ( '<' !== $element[0] ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$noopen = false === strpos( $element, '[' );
|
||||
$noclose = false === strpos( $element, ']' );
|
||||
if ( $noopen || $noclose ) {
|
||||
// This element does not contain shortcodes.
|
||||
if ( $noopen xor $noclose ) {
|
||||
// Need to encode stray [ or ] chars.
|
||||
$element = strtr( $element, $trans );
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( $ignore_html || '<!--' === substr( $element, 0, 4 ) ) {
|
||||
// Encode all [ and ] chars.
|
||||
$element = strtr( $element, $trans );
|
||||
continue;
|
||||
}
|
||||
|
||||
$attributes = wp_kses_attr_parse( $element );
|
||||
if ( false === $attributes ) {
|
||||
// Looks like we found some crazy unfiltered HTML. Skipping it for sanity.
|
||||
$element = strtr( $element, $trans );
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get element name
|
||||
$front = array_shift( $attributes );
|
||||
$back = array_pop( $attributes );
|
||||
$matches = array();
|
||||
preg_match('%[a-zA-Z0-9]+%', $front, $matches);
|
||||
$elname = $matches[0];
|
||||
|
||||
// Look for shortcodes in each attribute separately.
|
||||
foreach ( $attributes as &$attr ) {
|
||||
$open = strpos( $attr, '[' );
|
||||
$close = strpos( $attr, ']' );
|
||||
if ( false === $open || false === $close ) {
|
||||
continue; // Go to next attribute. Square braces will be escaped at end of loop.
|
||||
}
|
||||
$double = strpos( $attr, '"' );
|
||||
$single = strpos( $attr, "'" );
|
||||
if ( ( false === $single || $open < $single ) && ( false === $double || $open < $double ) ) {
|
||||
// $attr like '[shortcode]' or 'name = [shortcode]' implies unfiltered_html.
|
||||
// In this specific situation we assume KSES did not run because the input
|
||||
// was written by an administrator, so we should avoid changing the output
|
||||
// and we do not need to run KSES here.
|
||||
$attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr );
|
||||
} else {
|
||||
// $attr like 'name = "[shortcode]"' or "name = '[shortcode]'"
|
||||
// We do not know if $content was unfiltered. Assume KSES ran before shortcodes.
|
||||
$count = 0;
|
||||
$new_attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr, -1, $count );
|
||||
if ( $count > 0 ) {
|
||||
// Sanitize the shortcode output using KSES.
|
||||
$new_attr = wp_kses_one_attr( $new_attr, $elname );
|
||||
if ( '' !== $new_attr ) {
|
||||
// The shortcode is safe to use now.
|
||||
$attr = $new_attr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$element = $front . implode( '', $attributes ) . $back;
|
||||
|
||||
// Now encode any remaining [ or ] chars.
|
||||
$element = strtr( $element, $trans );
|
||||
}
|
||||
|
||||
$content = implode( '', $textarr );
|
||||
|
||||
return $content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove placeholders added by do_shortcodes_in_html_tags().
|
||||
*
|
||||
* @since 4.2.3
|
||||
*
|
||||
* @param string $content Content to search for placeholders.
|
||||
* @return string Content with placeholders removed.
|
||||
*/
|
||||
function unescape_invalid_shortcodes( $content ) {
|
||||
// Clean up entire string, avoids re-parsing HTML.
|
||||
$trans = array( '[' => '[', ']' => ']' );
|
||||
$content = strtr( $content, $trans );
|
||||
|
||||
return $content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve all attributes from the shortcodes tag.
|
||||
*
|
||||
@@ -394,9 +546,15 @@ function strip_shortcodes( $content ) {
|
||||
if (empty($shortcode_tags) || !is_array($shortcode_tags))
|
||||
return $content;
|
||||
|
||||
$pattern = get_shortcode_regex();
|
||||
$content = do_shortcodes_in_html_tags( $content, true );
|
||||
|
||||
return preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
|
||||
$pattern = get_shortcode_regex();
|
||||
$content = preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
|
||||
|
||||
// Always restore square braces so we don't break things like <!--[if IE ]>
|
||||
$content = unescape_invalid_shortcodes( $content );
|
||||
|
||||
return $content;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user