From ed8b9a8d2707eb9c704635cf98129ab1a77e0802 Mon Sep 17 00:00:00 2001 From: Scott Taylor Date: Fri, 19 Jun 2015 20:05:52 +0000 Subject: [PATCH] `wptexturize()` improvements: * Make sure that strings ending with a number and quotation mark get the proper smart quotes * Introduce `wptexturize_primes()`, a logic tree to determine whether or not "7'." represents seven feet, then converts the special char into either a prime char or a closing quote char. Adds unit tests. Props miqrogroove. Fixes #29256. git-svn-id: https://develop.svn.wordpress.org/trunk@32863 602fd350-edb4-49c9-b593-d223f7449a82 --- src/wp-includes/formatting.php | 121 ++++++++++---- .../phpunit/tests/formatting/WPTexturize.php | 154 +++++++++++++++++- 2 files changed, 243 insertions(+), 32 deletions(-) diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php index d224ab7ccc..f255942b29 100644 --- a/src/wp-includes/formatting.php +++ b/src/wp-includes/formatting.php @@ -46,7 +46,17 @@ function wptexturize( $text, $reset = false ) { $dynamic_replacements = null, $default_no_texturize_tags = null, $default_no_texturize_shortcodes = null, - $run_texturize = true; + $run_texturize = true, + $apos = null, + $prime = null, + $double_prime = null, + $opening_quote = null, + $closing_quote = null, + $opening_single_quote = null, + $closing_single_quote = null, + $open_q_flag = '', + $open_sq_flag = '', + $apos_flag = ''; // If there's nothing to do, just stop. if ( empty( $text ) || false === $run_texturize ) { @@ -129,40 +139,30 @@ function wptexturize( $text, $reset = false ) { // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation. if ( "'" !== $apos || "'" !== $closing_single_quote ) { - $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos . '$1' . $closing_single_quote; + $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_single_quote; } if ( "'" !== $apos || '"' !== $closing_quote ) { - $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos . '$1' . $closing_quote; + $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_quote; } // '99 '99s '99's (apostrophe) But never '9 or '99% or '999 or '99.0. if ( "'" !== $apos ) { - $dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos; + $dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos_flag; } // Quoted Numbers like '0.42' if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) { - $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $opening_single_quote . '$1' . $closing_single_quote; + $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $closing_single_quote; } // Single quote at start, or preceded by (, {, <, [, ", -, or spaces. if ( "'" !== $opening_single_quote ) { - $dynamic[ '/(?<=\A|[([{"\-]|<|' . $spaces . ')\'/' ] = $opening_single_quote; + $dynamic[ '/(?<=\A|[([{"\-]|<|' . $spaces . ')\'/' ] = $open_sq_flag; } // Apostrophe in a word. No spaces, double apostrophes, or other punctuation. if ( "'" !== $apos ) { - $dynamic[ '/(?'; + $quote_pattern = "/$needle(?=\\Z|[.,:;!?)}\\-\\]]|>|" . $spaces . ")/"; + $prime_pattern = "/(?<=\\d)$needle/"; + $flag_after_digit = "/(?<=\\d)$flag/"; + $flag_no_digit = "/(? &$sentence ) { + if ( false === strpos( $sentence, $needle ) ) { + continue; + } elseif ( 0 !== $key && 0 === substr_count( $sentence, $close_quote ) ) { + $sentence = preg_replace( $quote_pattern, $flag, $sentence, -1, $count ); + if ( $count > 1 ) { + // This sentence appears to have multiple closing quotes. Attempt Vulcan logic. + $sentence = preg_replace( $flag_no_digit, $close_quote, $sentence, -1, $count2 ); + if ( 0 === $count2 ) { + // Try looking for a quote followed by a period. + $count2 = substr_count( $sentence, "$flag." ); + if ( $count2 > 0 ) { + // Assume the rightmost quote-period match is the end of quotation. + $pos = strrpos( $sentence, "$flag." ); + } else { + // When all else fails, make the rightmost candidate a closing quote. + // This is most likely to be problematic in the context of bug #18549. + $pos = strrpos( $sentence, $flag ); + } + $sentence = substr_replace( $sentence, $close_quote, $pos, strlen( $flag ) ); + } + // Use conventional replacement on any remaining primes and quotes. + $sentence = preg_replace( $prime_pattern, $prime, $sentence ); + $sentence = preg_replace( $flag_after_digit, $prime, $sentence ); + $sentence = str_replace( $flag, $close_quote, $sentence ); + } elseif ( 1 == $count ) { + // Found only one closing quote candidate, so give it priority over primes. + $sentence = str_replace( $flag, $close_quote, $sentence ); + $sentence = preg_replace( $prime_pattern, $prime, $sentence ); + } else { + // No closing quotes found. Just run primes pattern. + $sentence = preg_replace( $prime_pattern, $prime, $sentence ); + } + } else { + $sentence = preg_replace( $prime_pattern, $prime, $sentence ); + $sentence = preg_replace( $quote_pattern, $close_quote, $sentence ); + } + if ( '"' == $needle && false !== strpos( $sentence, '"' ) ) { + $sentence = str_replace( '"', $close_quote, $sentence ); + } + } + + return implode( $open_quote, $sentences ); +} + /** * Search for disabled element tags. Push element to stack on tag open and pop * on tag close. diff --git a/tests/phpunit/tests/formatting/WPTexturize.php b/tests/phpunit/tests/formatting/WPTexturize.php index 326a91e52c..70627eb9fd 100644 --- a/tests/phpunit/tests/formatting/WPTexturize.php +++ b/tests/phpunit/tests/formatting/WPTexturize.php @@ -90,8 +90,8 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase { //$this->assertEquals('Here is “a test with a link”… and ellipses.', wptexturize('Here is "a test with a link"... and ellipses.')); //$this->assertEquals('Here is “a test with a link”.', wptexturize('Here is "a test with a link".')); //$this->assertEquals('Here is “a test with a link”and a work stuck to the end.', wptexturize('Here is "a test with a link"and a work stuck to the end.')); - //$this->assertEquals('A test with a finishing number, “like 23”.', wptexturize('A test with a finishing number, "like 23".')); - //$this->assertEquals('A test with a number, “like 62”, is nice to have.', wptexturize('A test with a number, "like 62", is nice to have.')); + $this->assertEquals('A test with a finishing number, “like 23”.', wptexturize('A test with a finishing number, "like 23".')); + $this->assertEquals('A test with a number, “like 62”, is nice to have.', wptexturize('A test with a number, "like 62", is nice to have.')); } /** @@ -121,7 +121,7 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase { $this->assertEquals('‘Class of ’99’?', wptexturize("'Class of '99'?")); $this->assertEquals('‘Class of ’99’s’', wptexturize("'Class of '99's'")); $this->assertEquals('‘Class of ’99’s’', wptexturize("'Class of '99’s'")); - //$this->assertEquals('“Class of 99”', wptexturize("\"Class of 99\"")); + $this->assertEquals('“Class of 99”', wptexturize("\"Class of 99\"")); $this->assertEquals('“Class of ’99”', wptexturize("\"Class of '99\"")); $this->assertEquals('{“Class of ’99”}', wptexturize("{\"Class of '99\"}")); $this->assertEquals(' “Class of ’99” ', wptexturize(" \"Class of '99\" ")); @@ -1900,4 +1900,152 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase { ), ); } + + /** + * Ensure primes logic is not too greedy at the end of a quotation. + * + * @ticket 29256 + * @dataProvider data_primes_vs_quotes + */ + function test_primes_vs_quotes( $input, $output ) { + return $this->assertEquals( $output, wptexturize( $input ) ); + } + + function data_primes_vs_quotes() { + return array( + array( + "George's porch is 99' long.", + "George’s porch is 99′ long.", + ), + array( + 'The best year "was that time in 2012" when everyone partied, he said.', + 'The best year “was that time in 2012” when everyone partied, he said.', + ), + array( + "I need 4 x 20' = 80' of trim.", // Works only with a space before the = char. + "I need 4 x 20′ = 80′ of trim.", + ), + array( + '"Lorem ipsum dolor sit amet 1234"', + '“Lorem ipsum dolor sit amet 1234”', + ), + array( + "'Etiam eu egestas dui 1234'", + "‘Etiam eu egestas dui 1234’", + ), + array( + 'according to our source, "33% of all students scored less than 50" on the test.', + 'according to our source, “33% of all students scored less than 50” on the test.', + ), + array( + "The doctor said, 'An average height is between 5' and 6' in study group 7'. He then produced a 6' chart of averages. A man of 7', incredibly, is very possible.", + "The doctor said, ‘An average height is between 5′ and 6′ in study group 7’. He then produced a 6′ chart of averages. A man of 7′, incredibly, is very possible.", + ), + array( + 'Pirates have voted on "The Expendables 3" with their clicks -- and it turns out the Sylvester Stallone-starrer hasn\'t been astoundingly popular among digital thieves, relatively speaking. + +As of Sunday, 5.12 million people worldwide had pirated "Expendables 3" since a high-quality copy hit torrent-sharing sites July 23, according to piracy-tracking firm Excipio. + +That likely contributed to the action movie\'s dismal box-office debut this weekend. But over the same July 23-Aug. 18 time period, the movie was No. 4 in downloads, after "Captain America: The Winter Soldier" (7.31 million), "Divergent" (6.29 million) and "The Amazing Spider-Man 2" (5.88 million). Moreover, that\'s despite "Expendables 3" becoming available more than three weeks prior to the film\'s U.S. theatrical debut. + +String with a number followed by a single quote \'Expendables 3\' vestibulum in arcu mi.', + + 'Pirates have voted on “The Expendables 3” with their clicks — and it turns out the Sylvester Stallone-starrer hasn’t been astoundingly popular among digital thieves, relatively speaking. + +As of Sunday, 5.12 million people worldwide had pirated “Expendables 3” since a high-quality copy hit torrent-sharing sites July 23, according to piracy-tracking firm Excipio. + +That likely contributed to the action movie’s dismal box-office debut this weekend. But over the same July 23-Aug. 18 time period, the movie was No. 4 in downloads, after “Captain America: The Winter Soldier” (7.31 million), “Divergent” (6.29 million) and “The Amazing Spider-Man 2” (5.88 million). Moreover, that’s despite “Expendables 3” becoming available more than three weeks prior to the film’s U.S. theatrical debut. + +String with a number followed by a single quote ‘Expendables 3’ vestibulum in arcu mi.', + ), + ); + } + + /** + * Make sure translation actually works. + * + * Also make sure opening and closing quotes are allowed to be identical. + * + * @ticket 29256 + * @dataProvider data_primes_quotes_translation + */ + function test_primes_quotes_translation( $input, $output ) { + add_filter( 'gettext_with_context', array( $this, 'filter_translate2' ), 10, 4 ); + + $result = wptexturize( $input, true ); + + remove_filter( 'gettext_with_context', array( $this, 'filter_translate2' ), 10, 4 ); + wptexturize( 'reset', true ); + + return $this->assertEquals( $output, $result ); + } + + function filter_translate2( $translations, $text, $context, $domain ) { + switch ($text) { + case '–' : return '!endash!'; + case '—' : return '!emdash!'; + case '‘' : return '!q1!'; + case '’' : + if ( 'apostrophe' == $context ) { + return '!apos!'; + } else { + return '!q1!'; + } + case '“' : return '!q2!'; + case '”' : return '!q2!'; + case '′' : return '!prime1!'; + case '″' : return '!prime2!'; + default : return $translations; + } + } + + function data_primes_quotes_translation() { + return array( + array( + "George's porch is 99' long.", + "George!apos!s porch is 99!prime1! long.", + ), + array( + 'The best year "was that time in 2012" when everyone partied, he said.', + 'The best year !q2!was that time in 2012!q2! when everyone partied, he said.', + ), + array( + "I need 4 x 20' = 80' of trim.", // Works only with a space before the = char. + "I need 4 x 20!prime1! = 80!prime1! of trim.", + ), + array( + '"Lorem ipsum dolor sit amet 1234"', + '!q2!Lorem ipsum dolor sit amet 1234!q2!', + ), + array( + "'Etiam eu egestas dui 1234'", + "!q1!Etiam eu egestas dui 1234!q1!", + ), + array( + 'according to our source, "33% of all students scored less than 50" on the test.', + 'according to our source, !q2!33% of all students scored less than 50!q2! on the test.', + ), + array( + "The doctor said, 'An average height is between 5' and 6' in study group 7'. He then produced a 6' chart of averages. A man of 7', incredibly, is very possible.", + "The doctor said, !q1!An average height is between 5!prime1! and 6!prime1! in study group 7!q1!. He then produced a 6!prime1! chart of averages. A man of 7!prime1!, incredibly, is very possible.", + ), + array( + 'Pirates have voted on "The Expendables 3" with their clicks -- and it turns out the Sylvester Stallone-starrer hasn\'t been astoundingly popular among digital thieves, relatively speaking. + +As of Sunday, 5.12 million people worldwide had pirated "Expendables 3" since a high-quality copy hit torrent-sharing sites July 23, according to piracy-tracking firm Excipio. + +That likely contributed to the action movie\'s dismal box-office debut this weekend. But over the same July 23-Aug. 18 time period, the movie was No. 4 in downloads, after "Captain America: The Winter Soldier" (7.31 million), "Divergent" (6.29 million) and "The Amazing Spider-Man 2" (5.88 million). Moreover, that\'s despite "Expendables 3" becoming available more than three weeks prior to the film\'s U.S. theatrical debut. + +String with a number followed by a single quote \'Expendables 3\' vestibulum in arcu mi.', + + 'Pirates have voted on !q2!The Expendables 3!q2! with their clicks !emdash! and it turns out the Sylvester Stallone-starrer hasn!apos!t been astoundingly popular among digital thieves, relatively speaking. + +As of Sunday, 5.12 million people worldwide had pirated !q2!Expendables 3!q2! since a high-quality copy hit torrent-sharing sites July 23, according to piracy-tracking firm Excipio. + +That likely contributed to the action movie!apos!s dismal box-office debut this weekend. But over the same July 23-Aug. 18 time period, the movie was No. 4 in downloads, after !q2!Captain America: The Winter Soldier!q2! (7.31 million), !q2!Divergent!q2! (6.29 million) and !q2!The Amazing Spider-Man 2!q2! (5.88 million). Moreover, that!apos!s despite !q2!Expendables 3!q2! becoming available more than three weeks prior to the film!apos!s U.S. theatrical debut. + +String with a number followed by a single quote !q1!Expendables 3!q1! vestibulum in arcu mi.', + ), + ); + } } \ No newline at end of file