From 176a1f53f04cde92e7297b5214c03beb9e2ba5c8 Mon Sep 17 00:00:00 2001 From: Felix Arntz Date: Thu, 21 Jan 2021 01:35:16 +0000 Subject: [PATCH] Robots: Introduce Robots API. This changeset introduces a filter-based Robots API, providing central control over the `robots` meta tag. * Introduces `wp_robots()` function which should be called anywhere a `robots` meta tag should be included. * Introduces `wp_robots` filter which allows adding or modifying directives for the `robots` meta tag. The `wp_robots()` function is entirely filter-based, i.e. if no filter is added to `wp_robots`, no directives will be present, and therefore the entire `robots` meta tag will be omitted. * Introduces the following `wp_robots` filter functions which replace similar existing functions that were manually rendering a `robots` meta tag: * `wp_robots_noindex()` replaces `noindex()`, which has been deprecated. * `wp_robots_no_robots()` replaces `wp_no_robots()`, which has been deprecated. * `wp_robots_sensitive_page()` replaces `wp_sensitive_page_meta()`, which has been deprecated. Its rendering of the `referrer` meta tag has been moved to another new function `wp_strict_cross_origin_referrer()`. Migration to the new functions is straightforward. For example, a call to `add_action( 'wp_head', 'wp_no_robots' )` should be replaced with `add_filter( 'wp_robots', 'wp_robots_no_robots' )`. Plugins and themes that render their own `robots` meta tags are encouraged to switch to rely on the `wp_robots` filter in order to use the central management layer now provided by WordPress core. Props adamsilverstein, flixos90, timothyblynjacobs, westonruter. See #51511. git-svn-id: https://develop.svn.wordpress.org/trunk@49992 602fd350-edb4-49c9-b593-d223f7449a82 --- src/wp-activate.php | 3 +- .../class-wp-customize-manager.php | 2 +- src/wp-includes/default-filters.php | 11 +- src/wp-includes/deprecated.php | 68 ++++++ src/wp-includes/embed.php | 20 ++ src/wp-includes/functions.php | 8 +- src/wp-includes/general-template.php | 50 +--- src/wp-includes/robots-template.php | 135 +++++++++++ src/wp-login.php | 3 +- src/wp-settings.php | 1 + src/wp-signup.php | 2 +- tests/phpunit/tests/customize/manager.php | 2 +- tests/phpunit/tests/general/template.php | 15 -- tests/phpunit/tests/robots.php | 213 ++++++++++++++++++ 14 files changed, 461 insertions(+), 72 deletions(-) create mode 100644 src/wp-includes/robots-template.php create mode 100644 tests/phpunit/tests/robots.php diff --git a/src/wp-activate.php b/src/wp-activate.php index 65a07d00b9..ff71779036 100644 --- a/src/wp-activate.php +++ b/src/wp-activate.php @@ -114,7 +114,8 @@ function wpmu_activate_stylesheet() { \n"; + return; + } + + echo "\n"; +} + +/** + * Display a noindex,noarchive meta tag and referrer origin-when-cross-origin meta tag. + * + * Outputs a noindex,noarchive meta tag that tells web robots not to index or cache the page content. + * Outputs a referrer origin-when-cross-origin meta tag that tells the browser not to send the full + * url as a referrer to other sites when cross-origin assets are loaded. + * + * Typical usage is as a wp_head callback. add_action( 'wp_head', 'wp_sensitive_page_meta' ); + * + * @since 5.0.1 + * @deprecated 5.7.0 Use wp_robots_sensitive_page() instead on 'wp_robots' filter + * and wp_strict_cross_origin_referrer() on 'wp_head' action. + */ +function wp_sensitive_page_meta() { + _deprecated_function( __FUNCTION__, '5.7.0', 'wp_robots_sensitive_page()' ); + + ?> + + " /> <?php echo $title; ?> diff --git a/src/wp-includes/general-template.php b/src/wp-includes/general-template.php index 2fbfc4c3c2..e2833129b0 100644 --- a/src/wp-includes/general-template.php +++ b/src/wp-includes/general-template.php @@ -3191,59 +3191,17 @@ function wlwmanifest_link() { } /** - * Displays a noindex meta tag if required by the blog configuration. + * Displays a referrer strict-origin-when-cross-origin meta tag. * - * If a blog is marked as not being public then the noindex meta tag will be - * output to tell web robots not to index the page content. Add this to the - * {@see 'wp_head'} action. - * - * Typical usage is as a {@see 'wp_head'} callback: - * - * add_action( 'wp_head', 'noindex' ); - * - * @see wp_no_robots() - * - * @since 2.1.0 - */ -function noindex() { - // If the blog is not public, tell robots to go away. - if ( '0' == get_option( 'blog_public' ) ) { - wp_no_robots(); - } -} - -/** - * Display a noindex meta tag. - * - * Outputs a noindex meta tag that tells web robots not to index the page content. - * Typical usage is as a {@see 'wp_head'} callback. add_action( 'wp_head', 'wp_no_robots' ); - * - * @since 3.3.0 - * @since 5.3.0 Echo "noindex,nofollow" if search engine visibility is discouraged. - */ -function wp_no_robots() { - if ( get_option( 'blog_public' ) ) { - echo "\n"; - return; - } - - echo "\n"; -} - -/** - * Display a noindex,noarchive meta tag and referrer origin-when-cross-origin meta tag. - * - * Outputs a noindex,noarchive meta tag that tells web robots not to index or cache the page content. * Outputs a referrer origin-when-cross-origin meta tag that tells the browser not to send the full * url as a referrer to other sites when cross-origin assets are loaded. * - * Typical usage is as a wp_head callback. add_action( 'wp_head', 'wp_sensitive_page_meta' ); + * Typical usage is as a wp_head callback. add_action( 'wp_head', 'wp_strict_cross_origin_referrer' ); * - * @since 5.0.1 + * @since 5.7.0 */ -function wp_sensitive_page_meta() { +function wp_strict_cross_origin_referrer() { ?> - $value ) { + if ( is_string( $value ) ) { + // If a string value, include it as value for the directive. + $robots_strings[] = "{$directive}:{$value}"; + } elseif ( $value ) { + // Otherwise, include the directive if it is truthy. + $robots_strings[] = $directive; + } + } + + if ( empty( $robots_strings ) ) { + return; + } + + echo "\n"; +} + +/** + * Adds noindex to the robots meta tag if required by the site configuration. + * + * If a blog is marked as not being public then noindex will be output to + * tell web robots not to index the page content. Add this to the + * {@see 'wp_robots'} filter. + * + * Typical usage is as a {@see 'wp_robots'} callback: + * + * add_filter( 'wp_robots', 'wp_robots_noindex' ); + * + * @since 5.7.0 + * @see wp_robots_no_robots() + * + * @param array $robots Associative array of robots directives. + * @return array Filtered robots directives. + */ +function wp_robots_noindex( array $robots ) { + if ( ! get_option( 'blog_public' ) ) { + return wp_robots_no_robots( $robots ); + } + + return $robots; +} + +/** + * Adds noindex to the robots meta tag. + * + * This directive tells web robots not to index the page content. + * + * Typical usage is as a {@see 'wp_robots'} callback: + * + * add_filter( 'wp_robots', 'wp_robots_no_robots' ); + * + * @since 5.7.0 + * + * @param array $robots Associative array of robots directives. + * @return array Filtered robots directives. + */ +function wp_robots_no_robots( array $robots ) { + $robots['noindex'] = true; + + if ( get_option( 'blog_public' ) ) { + $robots['follow'] = true; + } else { + $robots['nofollow'] = true; + } + + return $robots; +} + +/** + * Adds noindex and noarchive to the robots meta tag. + * + * This directive tells web robots not to index or archive the page content and + * is recommended to be used for sensitive pages. + * + * Typical usage is as a {@see 'wp_robots'} callback: + * + * add_filter( 'wp_robots', 'wp_robots_sensitive_page' ); + * + * @since 5.7.0 + * + * @param array $robots Associative array of robots directives. + * @return array Filtered robots directives. + */ +function wp_robots_sensitive_page( array $robots ) { + $robots['noindex'] = true; + $robots['noarchive'] = true; + return $robots; +} diff --git a/src/wp-login.php b/src/wp-login.php index 700901f857..915020f717 100644 --- a/src/wp-login.php +++ b/src/wp-login.php @@ -42,7 +42,8 @@ function login_header( $title = 'Log In', $message = '', $wp_error = null ) { global $error, $interim_login, $action; // Don't index any of these forms. - add_action( 'login_head', 'wp_sensitive_page_meta' ); + add_filter( 'wp_robots', 'wp_robots_sensitive_page' ); + add_action( 'login_head', 'wp_strict_cross_origin_referrer' ); add_action( 'login_head', 'wp_login_viewport_meta' ); diff --git a/src/wp-settings.php b/src/wp-settings.php index 63c59c6cef..c5c3a56ee9 100644 --- a/src/wp-settings.php +++ b/src/wp-settings.php @@ -181,6 +181,7 @@ require ABSPATH . WPINC . '/class-wp-metadata-lazyloader.php'; require ABSPATH . WPINC . '/general-template.php'; require ABSPATH . WPINC . '/link-template.php'; require ABSPATH . WPINC . '/author-template.php'; +require ABSPATH . WPINC . '/robots-template.php'; require ABSPATH . WPINC . '/post.php'; require ABSPATH . WPINC . '/class-walker-page.php'; require ABSPATH . WPINC . '/class-walker-page-dropdown.php'; diff --git a/src/wp-signup.php b/src/wp-signup.php index 8dacd322b1..8ac5004e0e 100644 --- a/src/wp-signup.php +++ b/src/wp-signup.php @@ -3,7 +3,7 @@ /** Sets up the WordPress Environment. */ require __DIR__ . '/wp-load.php'; -add_action( 'wp_head', 'wp_no_robots' ); +add_filter( 'wp_robots', 'wp_robots_no_robots' ); require __DIR__ . '/wp-blog-header.php'; diff --git a/tests/phpunit/tests/customize/manager.php b/tests/phpunit/tests/customize/manager.php index 379f955d93..e6494ca707 100644 --- a/tests/phpunit/tests/customize/manager.php +++ b/tests/phpunit/tests/customize/manager.php @@ -893,7 +893,7 @@ class Tests_WP_Customize_Manager extends WP_UnitTestCase { $wp_customize->customize_preview_init(); $this->assertSame( $did_action_customize_preview_init + 1, did_action( 'customize_preview_init' ) ); - $this->assertSame( 10, has_action( 'wp_head', 'wp_no_robots' ) ); + $this->assertSame( 10, has_filter( 'wp_robots', 'wp_robots_no_robots' ) ); $this->assertSame( 10, has_action( 'wp_head', array( $wp_customize, 'remove_frameless_preview_messenger_channel' ) ) ); $this->assertSame( 10, has_filter( 'wp_headers', array( $wp_customize, 'filter_iframe_security_headers' ) ) ); $this->assertSame( 10, has_filter( 'wp_redirect', array( $wp_customize, 'add_state_query_params' ) ) ); diff --git a/tests/phpunit/tests/general/template.php b/tests/phpunit/tests/general/template.php index f434efdf73..8d00307080 100644 --- a/tests/phpunit/tests/general/template.php +++ b/tests/phpunit/tests/general/template.php @@ -474,21 +474,6 @@ class Tests_General_Template extends WP_UnitTestCase { $this->assertSame( $expected, $result ); } - /** - * @ticket 43590 - */ - function test_wp_no_robots() { - // Simulate private site (search engines discouraged). - update_option( 'blog_public', '0' ); - $actual_private = get_echo( 'wp_no_robots' ); - $this->assertSame( "\n", $actual_private ); - - // Simulate public site. - update_option( 'blog_public', '1' ); - $actual_public = get_echo( 'wp_no_robots' ); - $this->assertSame( "\n", $actual_public ); - } - /** * @ticket 40969 */ diff --git a/tests/phpunit/tests/robots.php b/tests/phpunit/tests/robots.php new file mode 100644 index 0000000000..dc8f3964b8 --- /dev/null +++ b/tests/phpunit/tests/robots.php @@ -0,0 +1,213 @@ +assertEmpty( $output ); + + // Render robots meta tag with noindex. + add_filter( 'wp_robots', array( $this, 'add_noindex_directive' ) ); + $output = get_echo( 'wp_robots' ); + $this->assertEquals( "\n", $output ); + + // Do not render robots meta tag when there are only false-y directives. + add_filter( 'wp_robots', array( $this, 'remove_noindex_directive' ), 11 ); + $output = get_echo( 'wp_robots' ); + $this->assertEmpty( $output ); + } + + /** + * @ticket 51511 + */ + public function test_wp_robots_parses_directives_correctly() { + add_filter( + 'wp_robots', + function( array $robots ) { + // Directives that should have values must use strings. + $robots['directive-with-value'] = 'yes'; + $robots['directive-with-numeric-value'] = '1'; + // Any non-string value will be evaluated as boolean. + // False-y directives will not be included. + $robots['directive-active-boolean'] = true; + $robots['directive-inactive-boolean'] = false; + $robots['directive-active-integer'] = 1; + $robots['directive-inactive-integer'] = 0; + return $robots; + } + ); + + $expected_directives_string = implode( + ', ', + array( + 'directive-with-value:yes', + 'directive-with-numeric-value:1', + 'directive-active-boolean', + 'directive-active-integer', + ) + ); + + $output = get_echo( 'wp_robots' ); + $this->assertContains( "'{$expected_directives_string}'", $output ); + } + + /** + * @ticket 51511 + */ + public function test_wp_robots_includes_basic_sanitization_follow_nofollow() { + // Only follow or nofollow can be present, with follow taking precedence. + add_filter( 'wp_robots', array( $this, 'add_follow_directive' ) ); + add_filter( 'wp_robots', array( $this, 'add_nofollow_directive' ) ); + $output = get_echo( 'wp_robots' ); + $this->assertContains( "'follow'", $output ); + + // Consider truthyness of the directive value though. + // Here nofollow is true, follow is false. + add_filter( 'wp_robots', array( $this, 'remove_follow_directive' ), 11 ); + add_filter( 'wp_robots', array( $this, 'add_nofollow_directive' ), 11 ); + $output = get_echo( 'wp_robots' ); + $this->assertContains( "'nofollow'", $output ); + + // Consider truthyness of the directive value though. + // Here follow is true, nofollow is false. + add_filter( 'wp_robots', array( $this, 'add_follow_directive' ), 12 ); + add_filter( 'wp_robots', array( $this, 'remove_nofollow_directive' ), 12 ); + $output = get_echo( 'wp_robots' ); + $this->assertContains( "'follow'", $output ); + } + + /** + * @ticket 51511 + */ + public function test_wp_robots_includes_basic_sanitization_archive_noarchive() { + // Only archive or noarchive can be present, with archive taking precedence. + add_filter( 'wp_robots', array( $this, 'add_archive_directive' ) ); + add_filter( 'wp_robots', array( $this, 'add_noarchive_directive' ) ); + $output = get_echo( 'wp_robots' ); + $this->assertContains( "'archive'", $output ); + + // Consider truthyness of the directive value though. + // Here noarchive is true, archive is false. + add_filter( 'wp_robots', array( $this, 'remove_archive_directive' ), 11 ); + add_filter( 'wp_robots', array( $this, 'add_noarchive_directive' ), 11 ); + $output = get_echo( 'wp_robots' ); + $this->assertContains( "'noarchive'", $output ); + + // Consider truthyness of the directive value though. + // Here archive is true, noarchive is false. + add_filter( 'wp_robots', array( $this, 'add_archive_directive' ), 12 ); + add_filter( 'wp_robots', array( $this, 'remove_noarchive_directive' ), 12 ); + $output = get_echo( 'wp_robots' ); + $this->assertContains( "'archive'", $output ); + } + + /** + * @ticket 51511 + */ + public function test_wp_robots_noindex() { + add_filter( 'wp_robots', 'wp_robots_noindex' ); + + update_option( 'blog_public', '1' ); + $output = get_echo( 'wp_robots' ); + $this->assertEmpty( $output ); + + update_option( 'blog_public', '0' ); + $output = get_echo( 'wp_robots' ); + $this->assertContains( "'noindex, nofollow'", $output ); + } + + /** + * @ticket 51511 + */ + public function test_wp_robots_no_robots() { + add_filter( 'wp_robots', 'wp_robots_no_robots' ); + + update_option( 'blog_public', '1' ); + $output = get_echo( 'wp_robots' ); + $this->assertContains( "'noindex, follow'", $output ); + + update_option( 'blog_public', '0' ); + $output = get_echo( 'wp_robots' ); + $this->assertContains( "'noindex, nofollow'", $output ); + } + + /** + * @ticket 51511 + */ + public function test_wp_robots_sensitive_page() { + add_filter( 'wp_robots', 'wp_robots_sensitive_page' ); + + $output = get_echo( 'wp_robots' ); + $this->assertContains( "'noindex, noarchive'", $output ); + } + + public function add_noindex_directive( array $robots ) { + $robots['noindex'] = true; + return $robots; + } + + public function remove_noindex_directive( array $robots ) { + $robots['noindex'] = false; + return $robots; + } + + public function add_follow_directive( array $robots ) { + $robots['follow'] = true; + return $robots; + } + + public function remove_follow_directive( array $robots ) { + $robots['follow'] = false; + return $robots; + } + + public function add_nofollow_directive( array $robots ) { + $robots['nofollow'] = true; + return $robots; + } + + public function remove_nofollow_directive( array $robots ) { + $robots['nofollow'] = false; + return $robots; + } + + public function add_archive_directive( array $robots ) { + $robots['archive'] = true; + return $robots; + } + + public function remove_archive_directive( array $robots ) { + $robots['archive'] = false; + return $robots; + } + + public function add_noarchive_directive( array $robots ) { + $robots['noarchive'] = true; + return $robots; + } + + public function remove_noarchive_directive( array $robots ) { + $robots['noarchive'] = false; + return $robots; + } +}