Replace the ancient phpfreaks.com RegEx to extract urls to ping with a more robust matcher. URLs with commas and things like & were not being pinged. The new matcher even works for most IDN URLs. Adds unit tests.

Fixes #9064.



git-svn-id: https://develop.svn.wordpress.org/trunk@25313 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Scott Taylor
2013-09-10 03:17:51 +00:00
parent e72ffa5d96
commit 5a75531f19
3 changed files with 187 additions and 23 deletions

View File

@@ -1827,17 +1827,9 @@ function pingback($content, $post_ID) {
$pung = get_pung($post_ID);
// Variables
$ltrs = '\w';
$gunk = '/#~:.?+=&%@!\-';
$punc = '.:?\-';
$any = $ltrs . $gunk . $punc;
// Step 1
// Parsing the post, external links (if any) are stored in the $post_links array
// This regexp comes straight from phpfreaks.com
// http://www.phpfreaks.com/quickcode/Extract_All_URLs_on_a_Page/15.php
preg_match_all("{\b http : [$any] +? (?= [$punc] * [^$any] | $)}x", $content, $post_links_temp);
$post_links_temp = wp_extract_urls( $content );
// Step 2.
// Walking thru the links array
@@ -1848,7 +1840,7 @@ function pingback($content, $post_ID) {
// http://dummy-weblog.org/post.php
// We don't wanna ping first and second types, even if they have a valid <link/>
foreach ( (array) $post_links_temp[0] as $link_test ) :
foreach ( (array) $post_links_temp as $link_test ) :
if ( !in_array($link_test, $pung) && (url_to_postid($link_test) != $post_ID) // If we haven't pung it already and it isn't a link to itself
&& !is_local_attachment($link_test) ) : // Also, let's never ping local attachments.
if ( $test = @parse_url($link_test) ) {