Sitemaps: Add XML sitemaps functionality to WordPress.

While web crawlers are able to discover pages from links within the site and from other sites, XML sitemaps supplement this approach by allowing crawlers to quickly and comprehensively identify all URLs included in the sitemap and learn other signals about those URLs using the associated metadata.

See https://make.wordpress.org/core/2020/06/10/merge-announcement-extensible-core-sitemaps/ for more details.

This feature exposes the sitemap index via `/wp-sitemap.xml` and exposes a variety of new filters and hooks for developers to modify the behavior. Users can disable sitemaps completely by turning off search engine visibility in WordPress admin.

This change also introduces a new `esc_xml()` function to escape strings for output in XML, as well as XML support to `wp_kses_normalize_entities()`.

Props Adrian McShane, afragen, adamsilverstein, casiepa, flixos90, garrett-eclipse, joemcgill, kburgoine, kraftbj, milana_cap, pacifika, pbiron, pfefferle, Ruxandra Gradina, swissspidy, szepeviktor, tangrufus, tweetythierry.
Fixes #50117.
See #3670. See #19998.


git-svn-id: https://develop.svn.wordpress.org/trunk@48072 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Pascal Birchler
2020-06-17 15:22:49 +00:00
parent 92456a8d4b
commit b58973554d
32 changed files with 3598 additions and 8 deletions

View File

@@ -153,12 +153,15 @@ require __DIR__ . '/testcase-rest-post-type-controller.php';
require __DIR__ . '/testcase-xmlrpc.php';
require __DIR__ . '/testcase-ajax.php';
require __DIR__ . '/testcase-canonical.php';
require __DIR__ . '/testcase-xml.php';
require __DIR__ . '/exceptions.php';
require __DIR__ . '/utils.php';
require __DIR__ . '/spy-rest-server.php';
require __DIR__ . '/class-wp-rest-test-search-handler.php';
require __DIR__ . '/class-wp-rest-test-configurable-controller.php';
require __DIR__ . '/class-wp-fake-block-type.php';
require __DIR__ . '/class-wp-sitemaps-test-provider.php';
require __DIR__ . '/class-wp-sitemaps-empty-test-provider.php';
/**
* A class to handle additional command line arguments passed to the script.

View File

@@ -0,0 +1,38 @@
<?php
/**
* Class WP_Sitemaps_Empty_Test_Provider.
*
* Provides test data for additional registered providers.
*/
class WP_Sitemaps_Empty_Test_Provider extends WP_Sitemaps_Provider {
/**
* WP_Sitemaps_Empty_Test_Provider constructor.
*
* @param string $object_type Optional. Object type name to use. Default 'test'.
*/
public function __construct( $object_type = 'test' ) {
$this->object_type = $object_type;
}
/**
* Gets a URL list for a sitemap.
*
* @param int $page_num Page of results.
* @param string $object_subtype Optional. Object subtype name. Default empty.
* @return array List of URLs for a sitemap.
*/
public function get_url_list( $page_num, $object_subtype = '' ) {
return array();
}
/**
* Query for determining the number of pages.
*
* @param string $object_subtype Optional. Object subtype. Default empty.
* @return int Total number of pages.
*/
public function get_max_num_pages( $object_subtype = '' ) {
return 0;
}
}

View File

@@ -0,0 +1,52 @@
<?php
/**
* Class WP_Sitemaps_Test_Provider.
*
* Provides test data for additional registered providers.
*/
class WP_Sitemaps_Test_Provider extends WP_Sitemaps_Provider {
/**
* WP_Sitemaps_Posts constructor.
*
* @param string $object_type Optional. Object type name to use. Default 'test'.
*/
public function __construct( $object_type = 'test' ) {
$this->object_type = $object_type;
}
/**
* Return the public post types, which excludes nav_items and similar types.
* Attachments are also excluded. This includes custom post types with public = true
*
* @return array Map of object subtype objects (WP_Post_Type) keyed by their name.
*/
public function get_object_subtypes() {
return array(
'type-1' => (object) array( 'name' => 'type-1' ),
'type-2' => (object) array( 'name' => 'type-2' ),
'type-3' => (object) array( 'name' => 'type-3' ),
);
}
/**
* Gets a URL list for a sitemap.
*
* @param int $page_num Page of results.
* @param string $object_subtype Optional. Object subtype name. Default empty.
* @return array List of URLs for a sitemap.
*/
public function get_url_list( $page_num, $object_subtype = '' ) {
return array();
}
/**
* Query for determining the number of pages.
*
* @param string $object_subtype Optional. Object subtype. Default empty.
* @return int Total number of pages.
*/
public function get_max_num_pages( $object_subtype = '' ) {
return 4;
}
}

View File

@@ -0,0 +1,76 @@
<?xml version='1.0' encoding='UTF-8' ?>
<!--
Normalize an XML document to make it easier to compare whether 2 documents will
be seen as "equal" to an XML processor.
The normalization is similiar, in spirit, to {@link https://www.w3.org/TR/xml-c14n11/ Canonical XML},
but without some aspects of C14N that make the kinds of assertions we need difficult.
For example, the following XML documents will be interpreted the same by an XML processor,
even though a string comparison of them would show differences:
<root xmlns='urn:example'>
<ns0:child xmlns:ns0='urn:another-example'>this is a test</ns0:child>
</root>
<ns0:root xmlns:ns0='urn:example'>
<child xmlns='urn:another-example'>this is a test</child>
</ns0:root>
-->
<xsl:transform
xmlns:xsl='http://www.w3.org/1999/XSL/Transform'
version='1.0'
>
<!--
Output UTF-8 XML, no indendation and all CDATA sections replaced with their character content.
-->
<xsl:output
method='xml'
indent='no'
cdata-section-elements=''
encoding='UTF-8' />
<!--
Strip insignificant white space.
-->
<xsl:strip-space elements='*' />
<!--
Noramlize elements by not relying on the prefix used in the input document
and ordering attributes first by namespace-uri and then by local-name.
-->
<xsl:template match='*' priority='10'>
<xsl:element name='{local-name()}' namespace='{namespace-uri()}'>
<xsl:apply-templates select='@*'>
<xsl:sort select='namespace-uri()' />
<xsl:sort select='local-name()' />
</xsl:apply-templates>
<xsl:apply-templates select='node()' />
</xsl:element>
</xsl:template>
<!--
Noramlize attributes by not relying on the prefix used in the input document.
-->
<xsl:template match='@*'>
<xsl:attribute name='{local-name()}' namespace='{namespace-uri()}'>
<xsl:value-of select='.' />
</xsl:attribute>
</xsl:template>
<!--
Strip comments.
-->
<xsl:template match='comment()' priority='10' />
<!--
Pass all other nodes through unchanged.
-->
<xsl:template match='node()'>
<xsl:copy>
<xsl:apply-templates select='node()' />
</xsl:copy>
</xsl:template>
</xsl:transform>

View File

@@ -0,0 +1,92 @@
<?php
abstract class WP_Test_XML_TestCase extends WP_UnitTestCase {
/**
* Load XML from a string.
*
* @param string $xml
* @param int $options Bitwise OR of the {@link https://www.php.net/manual/en/libxml.constants.php libxml option constants}.
* Default is 0.
* @return DOMDocument The DOMDocument object loaded from the XML.
*/
public function loadXML( $xml, $options = 0 ) {
// Suppress PHP warnings generated by DOMDocument::loadXML(), which would cause
// PHPUnit to incorrectly report an error instead of a just a failure.
$internal = libxml_use_internal_errors( true );
libxml_clear_errors();
$xml_dom = new DOMDocument();
$xml_dom->loadXML( $xml, $options );
$libxml_last_error = libxml_get_last_error();
$this->assertFalse(
isset( $libxml_last_error->message ),
isset( $libxml_last_error->message ) ? sprintf( 'Non-well-formed XML: %s.', $libxml_last_error->message ) : ''
);
// Restore default error handler.
libxml_use_internal_errors( $internal );
libxml_clear_errors();
return $xml_dom;
}
/**
* Normalize an XML document to make comparing two documents easier.
*
* @param string $xml
* @param int $options Bitwise OR of the {@link https://www.php.net/manual/en/libxml.constants.php libxml option constants}.
* Default is 0.
* @return string The normalized form of `$xml`.
*/
public function normalizeXML( $xml, $options = 0 ) {
if ( ! class_exists( 'XSLTProcessor' ) ) {
$this->markTestSkipped( 'This test requires the XSL extension.' );
}
static $xslt_proc;
if ( ! $xslt_proc ) {
$xslt_proc = new XSLTProcessor();
$xslt_proc->importStyleSheet( simplexml_load_file( __DIR__ . '/normalize-xml.xsl' ) );
}
return $xslt_proc->transformToXML( $this->loadXML( $xml, $options ) );
}
/**
* Reports an error identified by `$message` if the namespace normalized form of the XML document in `$actualXml`
* is equal to the namespace normalized form of the XML document in `$expectedXml`.
*
* This is similar to {@link https://phpunit.de/manual/6.5/en/appendixes.assertions.html#appendixes.assertions.assertXmlStringEqualsXmlString assertXmlStringEqualsXmlString()}
* except that differences in namespace prefixes are normalized away, such that given
* `$actualXml = "<root xmlns='urn:wordpress.org'><child/></root>";` and
* `$expectedXml = "<ns0:root xmlns:ns0='urn:wordpress.org'><ns0:child></ns0:root>";`
* then `$this->assertXMLEquals( $expectedXml, $actualXml )` will succeed.
*
* @param string $expectedXml
* @param string $actualXml
* @param string $message Optional. Message to display when the assertion fails.
*/
public function assertXMLEquals( $expectedXml, $actualXml, $message = '' ) { // phpcs:ignore WordPress.NamingConventions.ValidVariableName.VariableNotSnakeCase
$this->assertEquals( $this->normalizeXML( $expectedXml ), $this->normalizeXML( $actualXml ), $message ); //phpcs:ignore WordPress.NamingConventions.ValidVariableName.VariableNotSnakeCase
}
/**
* Reports an error identified by `$message` if the namespace normalized form of the XML document in `$actualXml`
* is not equal to the namespace normalized form of the XML document in `$expectedXml`.
*
* This is similar to {@link https://phpunit.de/manual/6.5/en/appendixes.assertions.html#appendixes.assertions.assertXmlStringEqualsXmlString assertXmlStringNotEqualsXmlString()}
* except that differences in namespace prefixes are normalized away, such that given
* `$actualXml = "<root xmlns='urn:wordpress.org'><child></root>";` and
* `$expectedXml = "<ns0:root xmlns:ns0='urn:wordpress.org'><ns0:child/></ns0:root>";`
* then `$this->assertXMLNotEquals( $expectedXml, $actualXml )` will fail.
*
* @param string $expectedXml
* @param string $actualXml
* @param string $message Optional. Message to display when the assertion fails.
*/
public function assertXMLNotEquals( $expectedXml, $actualXml, $message = '' ) { //phpcs:ignore WordPress.NamingConventions.ValidVariableName.VariableNotSnakeCase
$this->assertNotEquals( $this->normalizeXML( $expectedXml ), $this->normalizeXML( $actualXml ), $message ); //phpcs:ignore WordPress.NamingConventions.ValidVariableName.VariableNotSnakeCase
}
}