diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php index ee3bcd0a8d..e1a0ee3104 100644 --- a/src/wp-includes/formatting.php +++ b/src/wp-includes/formatting.php @@ -73,27 +73,50 @@ function wptexturize($text) { $static_characters = array_merge( array( '---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)' ), $cockney ); $static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace ); - $dynamic = array(); - if ( "'" != $apos ) { - $dynamic[ '/\'(\d\d(?:’|\')?s)/' ] = $apos . '$1'; // '99's - $dynamic[ '/\'(\d)/' ] = $apos . '$1'; // '99 - } - if ( "'" != $opening_single_quote ) - $dynamic[ '/(\s|\A|[([{<]|")\'/' ] = '$1' . $opening_single_quote; // opening single quote, even after (, {, <, [ - if ( '"' != $double_prime ) - $dynamic[ '/(\d)"/' ] = '$1' . $double_prime; // 9" (double prime) - if ( "'" != $prime ) - $dynamic[ '/(\d)\'/' ] = '$1' . $prime; // 9' (prime) - if ( "'" != $apos ) - $dynamic[ '/(\S)\'([^\'\s])/' ] = '$1' . $apos . '$2'; // apostrophe in a word - if ( '"' != $opening_quote ) - $dynamic[ '/(\s|\A|[([{<])"(?!\s)/' ] = '$1' . $opening_quote . '$2'; // opening double quote, even after (, {, <, [ - if ( '"' != $closing_quote ) - $dynamic[ '/"(\s|\S|\Z)/' ] = $closing_quote . '$1'; // closing double quote - if ( "'" != $closing_single_quote ) - $dynamic[ '/\'([\s.]|\Z)/' ] = $closing_single_quote . '$1'; // closing single quote + /* + * Regex for common whitespace characters. + * + * By default, spaces include new lines, tabs, nbsp entities, and the UTF-8 nbsp. + * This is designed to replace the PCRE \s sequence. In #WP22692, that sequence + * was found to be unreliable due to random inclusion of the A0 byte. + */ + $spaces = '[\r\n\t ]|\xC2\xA0| '; - $dynamic[ '/\b(\d+)x(\d+)\b/' ] = '$1×$2'; // 9x9 (times) + + // Pattern-based replacements of characters. + $dynamic = array(); + + // '99 '99s '99's (apostrophe) + if ( "'" != $apos ) + $dynamic[ '/\'(?=\d)/' ] = $apos; + + // Single quote at start, or preceded by (, {, <, [, ", or spaces. + if ( "'" != $opening_single_quote ) + $dynamic[ '/(?<=\A|[([{<"]|' . $spaces . ')\'/' ] = $opening_single_quote; + + // 9" (double prime) + if ( '"' != $double_prime ) + $dynamic[ '/(?<=\d)"/' ] = $double_prime; + + // 9' (prime) + if ( "'" != $prime ) + $dynamic[ '/(?<=\d)\'/' ] = $prime; + + // Apostrophe in a word. No spaces or double primes. + if ( "'" != $apos ) + $dynamic[ '/(?assertEquals('[a]a–b[code]---[/code]a–b[/a]', wptexturize('[a]a--b[code]---[/code]a--b[/a]')); $this->assertEquals('
--
', wptexturize('
--
')); - $this->assertEquals('---', wptexturize('---')); + $this->assertEquals( '---', wptexturize( '---' ) ); + $this->assertEquals( '---', wptexturize( '---' ) ); + $this->assertEquals( '', wptexturize( '' ) ); + $this->assertEquals( '', wptexturize( '' ) ); + $this->assertEquals( '---', wptexturize( '---' ) ); $this->assertEquals('href="baba" “baba”', wptexturize('href="baba" "baba"')); @@ -44,7 +48,6 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase { //WP Ticket #4539 function test_basic_quotes() { $this->assertEquals('test’s', wptexturize('test\'s')); - $this->assertEquals('test’s', wptexturize('test\'s')); $this->assertEquals('‘quoted’', wptexturize('\'quoted\'')); $this->assertEquals('“quoted”', wptexturize('"quoted"')); @@ -194,4 +197,801 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase { $this->assertEquals( ' — ', wptexturize( ' -- ' ) ); $this->assertEquals( ' — ', wptexturize( ' -- ') ); } + + /** + * Test spaces around quotes. + * + * These should never happen, even if the desired output changes some day. + * + * @ticket 22692 + */ + function test_spaces_around_quotes_never() { + $nbsp = "\xC2\xA0"; + + $problem_input = "$nbsp\"A"; + $problem_output = "$nbsp”A"; + + $this->assertNotEquals( $problem_output, wptexturize( $problem_input ) ); + } + + /** + * Test spaces around quotes. + * + * These are desirable outputs for the current design. + * + * @ticket 22692 + * @dataProvider data_spaces_around_quotes + */ + function test_spaces_around_quotes( $input, $output ) { + return $this->assertEquals( $output, wptexturize( $input ) ); + } + + function data_spaces_around_quotes() { + $nbsp = "\xC2\xA0"; + $pi = "\xCE\xA0"; + + return array( + array( + "stop. $nbsp\"A quote after 2 spaces.\"", + "stop. $nbsp“A quote after 2 spaces.”", + ), + array( + "stop.$nbsp$nbsp\"A quote after 2 spaces.\"", + "stop.$nbsp$nbsp“A quote after 2 spaces.”", + ), + array( + "stop. $nbsp'A quote after 2 spaces.'", + "stop. $nbsp‘A quote after 2 spaces.’", + ), + array( + "stop.$nbsp$nbsp'A quote after 2 spaces.'", + "stop.$nbsp$nbsp‘A quote after 2 spaces.’", + ), + array( + "stop.  \"A quote after 2 spaces.\"", + "stop.  “A quote after 2 spaces.”", + ), + array( + "stop.  \"A quote after 2 spaces.\"", + "stop.  “A quote after 2 spaces.”", + ), + array( + "stop.  'A quote after 2 spaces.'", + "stop.  ‘A quote after 2 spaces.’", + ), + array( + "stop.  'A quote after 2 spaces.'", + "stop.  ‘A quote after 2 spaces.’", + ), + array( + "Contraction: $pi's", + "Contraction: $pi’s", + ), + ); + } + + /** + * Apostrophe before a number always becomes ’ (apos); + * + * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. + * + * @ticket 22692 + * @dataProvider data_apos_before_digits + */ + function test_apos_before_digits( $input, $output ) { + return $this->assertEquals( $output, wptexturize( $input ) ); + } + + function data_apos_before_digits() { + return array( + array( + "word '99 word", + "word ’99 word", + ), + array( + "word'99 word", + "word’99 word", + ), + array( + "word '99word", + "word ’99word", + ), + array( + "word'99word", + "word’99word", + ), + array( + "word '99’s word", // Appears as a separate but logically superfluous pattern in 3.8. + "word ’99’s word", + ), + array( + "word '99's word", // Due to the logic error, second apos becomes a prime. See ticket #22823 + "word ’99′s word", + ), + array( + "word '99'samsonite", + "word ’99′samsonite", + ), + array( + "according to our source, '33% of all students scored less than 50' on the test.", // Apostrophes and primes have priority over quotes + "according to our source, ’33% of all students scored less than 50′ on the test.", + ), + array( + "word '99' word", // See ticket #8775 + "word ’99′ word", + ), + ); + } + + /** + * Apostrophe after a space or ([{<" becomes ‘ (opening_single_quote) + * + * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. + * + * @ticket 22692 + * @dataProvider data_opening_single_quote + */ + function test_opening_single_quote( $input, $output ) { + return $this->assertEquals( $output, wptexturize( $input ) ); + } + + function data_opening_single_quote() { + return array( + array( + "word 'word word", + "word ‘word word", + ), + array( + "word ('word word", + "word (‘word word", + ), + array( + "word ['word word", + "word [‘word word", + ), + array( + "word <'word word", // Invalid HTML input? + "word <‘word word", + ), + array( + "word <'word word", // Valid HTML input triggers the apos in a word pattern + "word <’word word", + ), + array( + "word {'word word", + "word {‘word word", + ), + array( + "word \"'word word", + "word “‘word word", // Two opening quotes + ), + array( + "'word word", + "‘word word", + ), + array( + "word('word word", + "word(‘word word", + ), + array( + "word['word word", + "word[‘word word", + ), + array( + "word<'word word", + "word<‘word word", + ), + array( + "word<'word word", + "word<’word word", + ), + array( + "word{'word word", + "word{‘word word", + ), + array( + "word\"'word word", + "word”‘word word", // Closing quote, then opening quote + ), + array( + "word ' word word", + "word ‘ word word", + ), + array( + "word (' word word", + "word (‘ word word", + ), + array( + "word [' word word", + "word [‘ word word", + ), + array( + "word <' word word", // Invalid HTML input? + "word <‘ word word", + ), + array( + "word <' word word", // Valid HTML input triggers the closing single quote here + "word <’ word word", + ), + array( + "word {' word word", + "word {‘ word word", + ), + array( + "word \"' word word", + "word “‘ word word", // Two opening quotes + ), + array( + "' word word", + "‘ word word", + ), + array( + "word(' word word", + "word(‘ word word", + ), + array( + "word[' word word", + "word[‘ word word", + ), + array( + "word<' word word", + "word<‘ word word", + ), + array( + "word<' word word", + "word<’ word word", + ), + array( + "word{' word word", + "word{‘ word word", + ), + array( + "word\"' word word", + "word”‘ word word", // Closing quote, then opening quote + ), + ); + } + + /** + * Double quote after a number becomes ″ (double_prime) + * + * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. + * + * @ticket 22692 + * @dataProvider data_double_prime + */ + function test_double_prime( $input, $output ) { + return $this->assertEquals( $output, wptexturize( $input ) ); + } + + function data_double_prime() { + return array( + array( + 'word 99" word', + 'word 99″ word', + ), + array( + 'word 99"word', + 'word 99″word', + ), + array( + 'word99" word', + 'word99″ word', + ), + array( + 'word99"word', + 'word99″word', + ), + ); + } + + /** + * Apostrophe after a number becomes ′ (prime) + * + * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. + * + * @ticket 22692 + * @dataProvider data_single_prime + */ + function test_single_prime( $input, $output ) { + return $this->assertEquals( $output, wptexturize( $input ) ); + } + + function data_single_prime() { + return array( + array( + "word 99' word", + "word 99′ word", + ), + array( + "word 99'word", + "word 99′word", + ), + array( + "word99' word", + "word99′ word", + ), + array( + "word99'word", + "word99′word", + ), + ); + } + + /** + * Apostrophe "in a word" becomes ’ (apos) + * + * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. + * + * @ticket 22692 + * @dataProvider data_contractions + */ + function test_contractions( $input, $output ) { + return $this->assertEquals( $output, wptexturize( $input ) ); + } + + function data_contractions() { + return array( + array( + "word word's word", + "word word’s word", + ), + array( + "word word'. word", // Quotes with outside punctuation could end with apostrophes instead of closing quotes (may affect i18n) + "word word’. word", + ), + array( + "word ]'. word", + "word ]’. word", + ), + array( + "word )'. word", + "word )’. word", + ), + array( + "word }'. word", + "word }’. word", + ), + array( + "word >'. word", // Not tested + "word >’. word", + ), + array( + "word >'. word", + "word >’. word", + ), + ); + } + + /** + * Double quote after a space or ([{< becomes “ (opening_quote) if not followed by spaces + * + * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. + * + * @ticket 22692 + * @dataProvider data_opening_quote + */ + function test_opening_quote( $input, $output ) { + return $this->assertEquals( $output, wptexturize( $input ) ); + } + + function data_opening_quote() { + return array( + array( + 'word "word word', + 'word “word word', + ), + array( + 'word ("word word', + 'word (“word word', + ), + array( + 'word ["word word', + 'word [“word word', + ), + array( + 'word <"word word', // Invalid HTML input? + 'word <“word word', + ), + array( + 'word <"word word', // Valid HTML input triggers the closing quote pattern + 'word <”word word', + ), + array( + 'word {"word word', + 'word {“word word', + ), + array( + '"word word', + '“word word', + ), + array( + 'word("word word', + 'word(“word word', + ), + array( + 'word["word word', + 'word[“word word', + ), + array( + 'word<"word word', // Invalid HTML input? + 'word<“word word', + ), + array( + 'word<"word word', // Valid HTML input triggers the closing quote pattern + 'word<”word word', + ), + array( + 'word{"word word', + 'word{“word word', + ), + array( + 'word "99 word', + 'word “99 word', + ), + ); + } + + /** + * Double quote becomes ” (closing_quote) unless it is already converted to double_prime or opening_quote. + * + * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. + * + * @ticket 22692 + * @dataProvider data_closing_quote + */ + function test_closing_quote( $input, $output ) { + return $this->assertEquals( $output, wptexturize( $input ) ); + } + + function data_closing_quote() { + return array( + array( + 'word word" word', + 'word word” word', + ), + array( + 'word word") word', + 'word word”) word', + ), + array( + 'word word"] word', + 'word word”] word', + ), + array( + 'word word"} word', + 'word word”} word', + ), + array( + 'word word"> word', // Invalid HTML input? + 'word word”> word', + ), + array( + 'word word"> word', // Valid HTML should work + 'word word”> word', + ), + array( + 'word word"', + 'word word”', + ), + array( + 'word word"word', + 'word word”word', + ), + array( + 'word"word"word', + 'word”word”word', + ), + array( + 'test sentence".', + 'test sentence”.', + ), + array( + 'test sentence."', + 'test sentence.”', + ), + array( + 'test sentence". word', + 'test sentence”. word', + ), + array( + 'test sentence." word', + 'test sentence.” word', + ), + ); + } + + /** + * Test that single quotes followed by a space or a period become ’ (closing_single_quote) + * + * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. + * + * @ticket 22692 + * @dataProvider data_closing_single_quote + */ + function test_closing_single_quote( $input, $output ) { + return $this->assertEquals( $output, wptexturize( $input ) ); + } + + function data_closing_single_quote() { + return array( + array( + "word word' word", + "word word’ word", + ), + array( + "word word'. word", + "word word’. word", + ), + array( + "word word'.word", + "word word’.word", + ), + array( + "word word'", + "word word’", + ), + array( + "test sentence'.", + "test sentence’.", + ), + array( + "test sentence.'", + "test sentence.’", + ), + array( + "test sentence'. word", + "test sentence’. word", + ), + array( + "test sentence.' word", + "test sentence.’ word", + ), + ); + } + + /** + * Tests multiplication. + * + * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. + * + * @ticket 22692 + * @dataProvider data_multiplication + */ + function test_multiplication( $input, $output ) { + return $this->assertEquals( $output, wptexturize( $input ) ); + } + + function data_multiplication() { + return array( + array( + "9x9", + "9×9", + ), + array( + "12x34", + "12×34", + ), + array( + "9 x 9", + "9 x 9", + ), + ); + } + + /** + * Test ampersands. & always becomes & unless it is followed by # or ; + * + * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. + * + * @ticket 22692 + * @dataProvider data_ampersand + */ + function test_ampersand( $input, $output ) { + return $this->assertEquals( $output, wptexturize( $input ) ); + } + + function data_ampersand() { + return array( + array( + "word & word", + "word & word", + ), + array( + "word&word", + "word&word", + ), + array( + "word   word", + "word   word", + ), + array( + "word & word", + "word & word", + ), + array( + "word &# word", + "word &# word", // invalid output? + ), + array( + "word &44; word", + "word &44; word", + ), + array( + "word && word", + "word && word", + ), + array( + "word &!amp; word", + "word &!amp; word", + ), + ); + } + + /** + * Test "cockney" phrases, which begin with an apostrophe instead of an opening single quote. + * + * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. + * + * @ticket 22692 + * @dataProvider data_cockney + */ + function test_cockney( $input, $output ) { + return $this->assertEquals( $output, wptexturize( $input ) ); + } + + function data_cockney() { + return array( + array( + "word 'tain't word", + "word ’tain’t word", + ), + array( + "word 'twere word", + "word ’twere word", + ), + array( + "word 'twas word", + "word ’twas word", + ), + array( + "word 'tis word", + "word ’tis word", + ), + array( + "word 'twill word", + "word ’twill word", + ), + array( + "word 'til word", + "word ’til word", + ), + array( + "word 'bout word", + "word ’bout word", + ), + array( + "word 'nuff word", + "word ’nuff word", + ), + array( + "word 'round word", + "word ’round word", + ), + array( + "word 'cause word", + "word ’cause word", + ), + array( + "word 'em word", + "word ‘em word", + ), + ); + } + + /** + * Test smart dashes. + * + * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. + * + * @ticket 22692 + * @dataProvider data_smart_dashes + */ + function test_smart_dashes( $input, $output ) { + return $this->assertEquals( $output, wptexturize( $input ) ); + } + + function data_smart_dashes() { + return array( + array( + "word --- word", + "word — word", + ), + array( + "word---word", + "word—word", + ), + array( + "word -- word", + "word — word", + ), + array( + "word--word", + "word–word", + ), + array( + "word - word", + "word – word", + ), + array( + "word-word", + "word-word", + ), + array( + "word xn– word", + "word xn-- word", + ), + array( + "wordxn–word", + "wordxn--word", + ), + ); + } + + /** + * Test miscellaneous static replacements. + * + * Checks all baseline patterns. If anything ever changes in wptexturize(), these tests may fail. + * + * @ticket 22692 + * @dataProvider data_misc_static_replacements + */ + function test_misc_static_replacements( $input, $output ) { + return $this->assertEquals( $output, wptexturize( $input ) ); + } + + function data_misc_static_replacements() { + return array( + array( + "word ... word", + "word … word", + ), + array( + "word...word", + "word…word", + ), + array( + "word `` word", + "word “ word", + ), + array( + "word``word", + "word“word", + ), + array( + "word '' word", + "word ” word", + ), + array( + "word''word", + "word”word", + ), + array( + "word (tm) word", + "word ™ word", + ), + array( + "word (tm)word", + "word ™word", + ), + array( + "word(tm) word", + "word(tm) word", + ), + array( + "word(tm)word", + "word(tm)word", + ), + ); + } }