From 32963b0f1d03c5cb5eef77092747a7515cf22cb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Renan=20Gon=C3=A7alves?= Date: Tue, 24 Feb 2015 13:15:47 +0100 Subject: [PATCH 1/2] Adding a Polish domain to prove the library correctly encodes/decodes it. Refs #8. --- tests/PunycodeTest.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/PunycodeTest.php b/tests/PunycodeTest.php index 15c6890..9b94fab 100644 --- a/tests/PunycodeTest.php +++ b/tests/PunycodeTest.php @@ -125,6 +125,10 @@ public function domainNamesProvider() 'guangdong.广东', 'guangdong.xn--xhq521b', ), + array( + 'gwóźdź.pl', + 'xn--gwd-hna98db.pl', + ), ); } } From 92db63dce393550161fa4ae7cccf1d1994318153 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Renan=20Gon=C3=A7alves?= Date: Tue, 24 Feb 2015 13:20:37 +0100 Subject: [PATCH 2/2] Passing a character encoding to mb_* functions instead of defining it internally. Fix #8. --- README.md | 15 ++------------- src/Punycode.php | 25 +++++++++++++++++++++---- tests/PunycodeTest.php | 11 ----------- 3 files changed, 23 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 9ef3285..94e6eb6 100644 --- a/README.md +++ b/README.md @@ -22,9 +22,6 @@ composer require true/punycode:~1.0 // Import Punycode use True\Punycode; -// Use UTF-8 as the encoding -mb_internal_encoding('utf-8'); - $Punycode = new Punycode(); var_dump($Punycode->encode('renangonçalves.com')); // outputs: xn--renangonalves-pgb.com @@ -38,18 +35,10 @@ var_dump($Punycode->decode('xn--renangonalves-pgb.com')); ### 1. What is this library for? -This library converts a UTF-8 encoded domain name to a IDNA ASCII form and vice-versa. - - -### 2. Do I need to use UTF-8? - -Yes, domain names should be UTF-8 encoded. - -Unless your application is not focused on international users, you should have been using a Unicode charset already. -Take your time to read [The Absolute Minimum Every Software Developer Must Know About Unicode](http://www.joelonsoftware.com/articles/Unicode.html). +This library converts a Unicode encoded domain name to a IDNA ASCII form and vice-versa. -### 3. Why should I use this instead of [PHP's IDN Functions](http://php.net/manual/en/ref.intl.idn.php)? +### 2. Why should I use this instead of [PHP's IDN Functions](http://php.net/manual/en/ref.intl.idn.php)? If you can compile the needed dependencies (intl, libidn) there is not much difference. But if you want to write portable code between hosts (including Windows and Mac OS), or can't install PECL extensions, this is the right library for you. diff --git a/src/Punycode.php b/src/Punycode.php index d7fbd66..7054312 100644 --- a/src/Punycode.php +++ b/src/Punycode.php @@ -48,6 +48,23 @@ class Punycode '4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35 ); + /** + * Character encoding + * + * @param string + */ + protected $encoding; + + /** + * Constructor + * + * @param string $encoding Character encoding + */ + public function __construct($encoding = 'UTF-8') + { + $this->encoding = $encoding; + } + /** * Encode a domain to its Punycode version * @@ -94,7 +111,7 @@ protected function _encodePart($input) sort($codePoints['nonBasic']); $i = 0; - $length = mb_strlen($input); + $length = mb_strlen($input, $this->encoding); while ($h < $length) { $m = $codePoints['nonBasic'][$i++]; $delta = $delta + ($m - $n) * ($h + 1); @@ -194,7 +211,7 @@ protected function _decodePart($input) $bias = $this->_adapt($i - $oldi, ++$outputLength, ($oldi === 0)); $n = $n + (int) ($i / $outputLength); $i = $i % ($outputLength); - $output = mb_substr($output, 0, $i) . $this->_codePointToChar($n) . mb_substr($output, $i, $outputLength - 1); + $output = mb_substr($output, 0, $i, $this->encoding) . $this->_codePointToChar($n) . mb_substr($output, $i, $outputLength - 1, $this->encoding); $i++; } @@ -260,9 +277,9 @@ protected function _codePoints($input) 'nonBasic' => array(), ); - $length = mb_strlen($input); + $length = mb_strlen($input, $this->encoding); for ($i = 0; $i < $length; $i++) { - $char = mb_substr($input, $i, 1); + $char = mb_substr($input, $i, 1, $this->encoding); $code = $this->_charToCodePoint($char); if ($code < 128) { $codePoints['all'][] = $codePoints['basic'][] = $code; diff --git a/tests/PunycodeTest.php b/tests/PunycodeTest.php index 9b94fab..f631b87 100644 --- a/tests/PunycodeTest.php +++ b/tests/PunycodeTest.php @@ -4,17 +4,6 @@ class PunycodeTest extends \PHPUnit_Framework_TestCase { - /** - * Make sure the right internal encoding is defined when testing - * - */ - public function setUp() - { - parent::setUp(); - - mb_internal_encoding('utf-8'); - } - /** * Test encoding Punycode *