diff --git a/src/wp-admin/includes/class-wp-press-this.php b/src/wp-admin/includes/class-wp-press-this.php
index 3ea927d3d2..0865f6ac70 100644
--- a/src/wp-admin/includes/class-wp-press-this.php
+++ b/src/wp-admin/includes/class-wp-press-this.php
@@ -40,7 +40,7 @@ class WP_Press_This {
return array(
// Used to trigger the bookmarklet update notice.
// Needs to be set here and in get_shortcut_link() in wp-includes/link-template.php.
- 'version' => '5',
+ 'version' => '6',
/**
* Filter whether or not Press This should redirect the user in the parent window upon save.
@@ -278,7 +278,7 @@ class WP_Press_This {
*/
public function fetch_source_html( $url ) {
// Download source page to tmp file.
- $source_tmp_file = ( ! empty( $url ) ) ? download_url( $url ) : '';
+ $source_tmp_file = ( ! empty( $url ) ) ? download_url( $url, 30 ) : '';
$source_content = '';
if ( ! is_wp_error( $source_tmp_file ) && file_exists( $source_tmp_file ) ) {
@@ -318,6 +318,162 @@ class WP_Press_This {
return $source_content;
}
+ private function _limit_array( $value ) {
+ if ( is_array( $value ) ) {
+ if ( count( $value ) > 50 ) {
+ return array_slice( $value, 0, 50 );
+ }
+
+ return $value;
+ }
+
+ return array();
+ }
+
+ private function _limit_string( $value ) {
+ $return = '';
+
+ if ( is_numeric( $value ) || is_bool( $value ) ) {
+ $return = (string) $value;
+ } else if ( is_string( $value ) ) {
+ if ( mb_strlen( $value ) > 5000 ) {
+ $return = mb_substr( $value, 0, 5000 );
+ } else {
+ $return = $value;
+ }
+
+ $return = html_entity_decode( $return, ENT_QUOTES, 'UTF-8' );
+ $return = sanitize_text_field( trim( $return ) );
+ }
+
+ return $return;
+ }
+
+ private function _limit_url( $url ) {
+ if ( ! is_string( $url ) ) {
+ return '';
+ }
+
+ $url = $this->_limit_string( $url );
+
+ // HTTP 1.1 allows 8000 chars but the "de-facto" standard supported in all current browsers is 2048.
+ if ( mb_strlen( $url ) > 2048 ) {
+ return ''; // Return empty rather than a trunacted/invalid URL
+ }
+
+ // Only allow http(s) or protocol relative URLs.
+ if ( ! preg_match( '%^(https?:)?//%i', $url ) ) {
+ return '';
+ }
+
+ if ( strpos( $url, '"' ) !== false || strpos( $url, ' ' ) !== false ) {
+ return '';
+ }
+
+ return $url;
+ }
+
+ private function _limit_img( $src ) {
+ $src = $this->_limit_url( $src );
+
+ if ( preg_match( '/\/ad[sx]{1}?\//', $src ) ) {
+ // Ads
+ return '';
+ } else if ( preg_match( '/(\/share-?this[^\.]+?\.[a-z0-9]{3,4})(\?.*)?$/', $src ) ) {
+ // Share-this type button
+ return '';
+ } else if ( preg_match( '/\/(spinner|loading|spacer|blank|rss)\.(gif|jpg|png)/', $src ) ) {
+ // Loaders, spinners, spacers
+ return '';
+ } else if ( preg_match( '/\/([^\.\/]+[-_]{1})?(spinner|loading|spacer|blank)s?([-_]{1}[^\.\/]+)?\.[a-z0-9]{3,4}/', $src ) ) {
+ // Fancy loaders, spinners, spacers
+ return '';
+ } else if ( preg_match( '/([^\.\/]+[-_]{1})?thumb[^.]*\.(gif|jpg|png)$/', $src ) ) {
+ // Thumbnails, too small, usually irrelevant to context
+ return '';
+ } else if ( preg_match( '/\/wp-includes\//', $src ) ) {
+ // Classic WP interface images
+ return '';
+ } else if ( preg_match( '/[^\d]{1}\d{1,2}x\d+\.(gif|jpg|png)$/', $src ) ) {
+ // Most often tiny buttons/thumbs (< 100px wide)
+ return '';
+ } else if ( preg_match( '/\/pixel\.(mathtag|quantserve)\.com/', $src ) ) {
+ // See mathtag.com and https://www.quantcast.com/how-we-do-it/iab-standard-measurement/how-we-collect-data/
+ return '';
+ } else if ( false !== strpos( $src, '/g.gif' ) ) {
+ // Classic WP stats gif
+ return '';
+ }
+
+ return $src;
+ }
+
+ private function _limit_embed( $src ) {
+ $src = $this->_limit_url( $src );
+
+ if ( preg_match( '/\/\/www\.youtube\.com\/(embed|v)\/([^\?]+)\?.+$/', $src, $src_matches ) ) {
+ $src = 'https://www.youtube.com/watch?v=' . $src_matches[2];
+ } else if ( preg_match( '/\/\/player\.vimeo\.com\/video\/([\d]+)([\?\/]{1}.*)?$/', $src, $src_matches ) ) {
+ $src = 'https://vimeo.com/' . (int) $src_matches[1];
+ } else if ( preg_match( '/\/\/vimeo\.com\/moogaloop\.swf\?clip_id=([\d]+)$/', $src, $src_matches ) ) {
+ $src = 'https://vimeo.com/' . (int) $src_matches[1];
+ } else if ( preg_match( '/\/\/vine\.co\/v\/([^\/]+)\/embed/', $src, $src_matches ) ) {
+ $src = 'https://vine.co/v/' . $src_matches[1];
+ } else if ( ! preg_match( '/\/\/(m\.|www\.)?youtube\.com\/watch\?/', $src )
+ && ! preg_match( '/\/youtu\.be\/.+$/', $src )
+ && ! preg_match( '/\/\/vimeo\.com\/[\d]+$/', $src )
+ && ! preg_match( '/\/\/(www\.)?dailymotion\.com\/video\/.+$/', $src )
+ && ! preg_match( '/\/\/soundcloud\.com\/.+$/', $src )
+ && ! preg_match( '/\/\/twitter\.com\/[^\/]+\/status\/[\d]+$/', $src )
+ && ! preg_match( '/\/\/vine\.co\/v\/[^\/]+/', $src ) ) {
+ $src = '';
+ }
+
+ return $src;
+ }
+
+ private function _process_meta_entry( $meta_name, $meta_value, $data ) {
+ if ( preg_match( '/:?(title|description|keywords)$/', $meta_name ) ) {
+ $data['_meta'][ $meta_name ] = $meta_value;
+ } else {
+ switch ( $meta_name ) {
+ case 'og:url':
+ case 'og:video':
+ case 'og:video:secure_url':
+ $meta_value = $this->_limit_embed( $meta_value );
+
+ if ( ! isset( $data['_embed'] ) ) {
+ $data['_embed'] = array();
+ }
+
+ if ( ! empty( $meta_value ) && ! in_array( $meta_value, $data['_embed'] ) ) {
+ $data['_embed'][] = $meta_value;
+ }
+
+ break;
+ case 'og:image':
+ case 'og:image:secure_url':
+ case 'twitter:image0:src':
+ case 'twitter:image0':
+ case 'twitter:image:src':
+ case 'twitter:image':
+ $meta_value = $this->_limit_img( $meta_value );
+
+ if ( ! isset( $data['_img'] ) ) {
+ $data['_img'] = array();
+ }
+
+ if ( ! empty( $meta_value ) && ! in_array( $meta_value, $data['_img'] ) ) {
+ $data['_img'][] = $meta_value;
+ }
+
+ break;
+ }
+ }
+
+ return $data;
+ }
+
/**
* Fetches and parses _meta, _img, and _links data from the source.
*
@@ -339,18 +495,42 @@ class WP_Press_This {
return array( 'errors' => $source_content->get_error_messages() );
}
+ // Fetch and gather data first, so discovered media is offered 1st to user.
+ if ( empty( $data['_meta'] ) ) {
+ $data['_meta'] = array();
+ }
+
+ if ( preg_match_all( '/]+>/', $source_content, $matches ) ) {
+ $items = $this->_limit_array( $matches[0] );
+
+ foreach ( $items as $value ) {
+ if ( preg_match( '/(property|name)="([^"]+)"[^>]+content="([^"]+)"/', $value, $new_matches ) ) {
+ $meta_name = $this->_limit_string( $new_matches[2] );
+ $meta_value = $this->_limit_string( $new_matches[3] );
+
+ // Sanity check. $key is usually things like 'title', 'description', 'keywords', etc.
+ if ( strlen( $meta_name ) > 100 ) {
+ continue;
+ }
+
+ $data = $this->_process_meta_entry( $meta_name, $meta_value, $data );
+ }
+ }
+ }
+
// Fetch and gather
data.
if ( empty( $data['_img'] ) ) {
$data['_img'] = array();
}
- if ( preg_match_all( '/
/', $source_content, $matches ) ) {
- if ( ! empty( $matches[0] ) ) {
- foreach ( $matches[0] as $value ) {
- if ( preg_match( '/
]+src="([^"]+)"[^>]+\/>/', $value, $new_matches ) ) {
- if ( ! in_array( $new_matches[1], $data['_img'] ) ) {
- $data['_img'][] = $new_matches[1];
- }
+ if ( preg_match_all( '/
]+>/', $source_content, $matches ) ) {
+ $items = $this->_limit_array( $matches[0] );
+
+ foreach ( $items as $value ) {
+ if ( preg_match( '/src=(\'|")([^\'"]+)\\1/', $value, $new_matches ) ) {
+ $src = $this->_limit_img( $new_matches[2] );
+ if ( ! empty( $src ) && ! in_array( $src, $data['_img'] ) ) {
+ $data['_img'][] = $src;
}
}
}
@@ -361,66 +541,15 @@ class WP_Press_This {
$data['_embed'] = array();
}
- if ( preg_match_all( '/