mirror of
https://github.com/gosticks/wordpress-develop.git
synced 2026-07-01 15:50:09 +00:00
Sitemaps: Add XML sitemaps functionality to WordPress.
While web crawlers are able to discover pages from links within the site and from other sites, XML sitemaps supplement this approach by allowing crawlers to quickly and comprehensively identify all URLs included in the sitemap and learn other signals about those URLs using the associated metadata. See https://make.wordpress.org/core/2020/06/10/merge-announcement-extensible-core-sitemaps/ for more details. This feature exposes the sitemap index via `/wp-sitemap.xml` and exposes a variety of new filters and hooks for developers to modify the behavior. Users can disable sitemaps completely by turning off search engine visibility in WordPress admin. This change also introduces a new `esc_xml()` function to escape strings for output in XML, as well as XML support to `wp_kses_normalize_entities()`. Props Adrian McShane, afragen, adamsilverstein, casiepa, flixos90, garrett-eclipse, joemcgill, kburgoine, kraftbj, milana_cap, pacifika, pbiron, pfefferle, Ruxandra Gradina, swissspidy, szepeviktor, tangrufus, tweetythierry. Fixes #50117. See #3670. See #19998. git-svn-id: https://develop.svn.wordpress.org/trunk@48072 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
@@ -935,6 +935,7 @@ function seems_utf8( $str ) {
|
||||
* ", or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded.
|
||||
*
|
||||
* @since 1.2.2
|
||||
* @since 5.5.0 `$quote_style` also accepts '`ENT_XML1`.
|
||||
* @access private
|
||||
*
|
||||
* @staticvar string $_charset
|
||||
@@ -942,7 +943,10 @@ function seems_utf8( $str ) {
|
||||
* @param string $string The text which is to be encoded.
|
||||
* @param int|string $quote_style Optional. Converts double quotes if set to ENT_COMPAT,
|
||||
* both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES.
|
||||
* Also compatible with old values; converting single quotes if set to 'single',
|
||||
* Converts single and double quotes, as well as converting HTML
|
||||
* named entities (that are not also XML named entities) to their
|
||||
* code points if set to ENT_XML1. Also compatible with old values;
|
||||
* converting single quotes if set to 'single',
|
||||
* double if set to 'double' or both if otherwise set.
|
||||
* Default is ENT_NOQUOTES.
|
||||
* @param false|string $charset Optional. The character encoding of the string. Default is false.
|
||||
@@ -964,7 +968,9 @@ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = fals
|
||||
// Account for the previous behaviour of the function when the $quote_style is not an accepted value.
|
||||
if ( empty( $quote_style ) ) {
|
||||
$quote_style = ENT_NOQUOTES;
|
||||
} elseif ( ! in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) {
|
||||
} elseif ( ENT_XML1 === $quote_style ) {
|
||||
$quote_style = ENT_QUOTES | ENT_XML1;
|
||||
} elseif ( ! in_array( $quote_style, array( ENT_NOQUOTES, ENT_COMPAT, ENT_QUOTES, 'single', 'double' ), true ) ) {
|
||||
$quote_style = ENT_QUOTES;
|
||||
}
|
||||
|
||||
@@ -994,7 +1000,7 @@ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = fals
|
||||
if ( ! $double_encode ) {
|
||||
// Guarantee every &entity; is valid, convert &garbage; into &garbage;
|
||||
// This is required for PHP < 5.4.0 because ENT_HTML401 flag is unavailable.
|
||||
$string = wp_kses_normalize_entities( $string );
|
||||
$string = wp_kses_normalize_entities( $string, ( $quote_style & ENT_XML1 ) ? 'xml' : 'html' );
|
||||
}
|
||||
|
||||
$string = htmlspecialchars( $string, $quote_style, $charset, $double_encode );
|
||||
@@ -4536,6 +4542,63 @@ function esc_textarea( $text ) {
|
||||
return apply_filters( 'esc_textarea', $safe_text, $text );
|
||||
}
|
||||
|
||||
/**
|
||||
* Escaping for XML blocks.
|
||||
*
|
||||
* @since 5.5.0
|
||||
*
|
||||
* @param string $text Text to escape.
|
||||
* @return string Escaped text.
|
||||
*/
|
||||
function esc_xml( $text ) {
|
||||
$safe_text = wp_check_invalid_utf8( $text );
|
||||
|
||||
$cdata_regex = '\<\!\[CDATA\[.*?\]\]\>';
|
||||
$regex = <<<EOF
|
||||
/
|
||||
(?=.*?{$cdata_regex}) # lookahead that will match anything followed by a CDATA Section
|
||||
(?<non_cdata_followed_by_cdata>(.*?)) # the "anything" matched by the lookahead
|
||||
(?<cdata>({$cdata_regex})) # the CDATA Section matched by the lookahead
|
||||
|
||||
| # alternative
|
||||
|
||||
(?<non_cdata>(.*)) # non-CDATA Section
|
||||
/sx
|
||||
EOF;
|
||||
|
||||
$safe_text = (string) preg_replace_callback(
|
||||
$regex,
|
||||
static function( $matches ) {
|
||||
if ( ! $matches[0] ) {
|
||||
return '';
|
||||
}
|
||||
|
||||
if ( ! empty( $matches['non_cdata'] ) ) {
|
||||
// escape HTML entities in the non-CDATA Section.
|
||||
return _wp_specialchars( $matches['non_cdata'], ENT_XML1 );
|
||||
}
|
||||
|
||||
// Return the CDATA Section unchanged, escape HTML entities in the rest.
|
||||
return _wp_specialchars( $matches['non_cdata_followed_by_cdata'], ENT_XML1 ) . $matches['cdata'];
|
||||
},
|
||||
$safe_text
|
||||
);
|
||||
|
||||
/**
|
||||
* Filters a string cleaned and escaped for output in XML.
|
||||
*
|
||||
* Text passed to esc_xml() is stripped of invalid or special characters
|
||||
* before output. HTML named character references are converted to their
|
||||
* equivalent code points.
|
||||
*
|
||||
* @since 5.5.0
|
||||
*
|
||||
* @param string $safe_text The text after it has been escaped.
|
||||
* @param string $text The text prior to being escaped.
|
||||
*/
|
||||
return apply_filters( 'esc_xml', $safe_text, $text );
|
||||
}
|
||||
|
||||
/**
|
||||
* Escape an HTML tag name.
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user