diff --git a/README.md b/README.md index 9ef3285..94e6eb6 100644 --- a/README.md +++ b/README.md @@ -22,9 +22,6 @@ composer require true/punycode:~1.0 // Import Punycode use True\Punycode; -// Use UTF-8 as the encoding -mb_internal_encoding('utf-8'); - $Punycode = new Punycode(); var_dump($Punycode->encode('renangonçalves.com')); // outputs: xn--renangonalves-pgb.com @@ -38,18 +35,10 @@ var_dump($Punycode->decode('xn--renangonalves-pgb.com')); ### 1. What is this library for? -This library converts a UTF-8 encoded domain name to a IDNA ASCII form and vice-versa. - - -### 2. Do I need to use UTF-8? - -Yes, domain names should be UTF-8 encoded. - -Unless your application is not focused on international users, you should have been using a Unicode charset already. -Take your time to read [The Absolute Minimum Every Software Developer Must Know About Unicode](http://www.joelonsoftware.com/articles/Unicode.html). +This library converts a Unicode encoded domain name to a IDNA ASCII form and vice-versa. -### 3. Why should I use this instead of [PHP's IDN Functions](http://php.net/manual/en/ref.intl.idn.php)? +### 2. Why should I use this instead of [PHP's IDN Functions](http://php.net/manual/en/ref.intl.idn.php)? If you can compile the needed dependencies (intl, libidn) there is not much difference. But if you want to write portable code between hosts (including Windows and Mac OS), or can't install PECL extensions, this is the right library for you. diff --git a/src/Punycode.php b/src/Punycode.php index d7fbd66..7054312 100644 --- a/src/Punycode.php +++ b/src/Punycode.php @@ -48,6 +48,23 @@ class Punycode '4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35 ); + /** + * Character encoding + * + * @param string + */ + protected $encoding; + + /** + * Constructor + * + * @param string $encoding Character encoding + */ + public function __construct($encoding = 'UTF-8') + { + $this->encoding = $encoding; + } + /** * Encode a domain to its Punycode version * @@ -94,7 +111,7 @@ protected function _encodePart($input) sort($codePoints['nonBasic']); $i = 0; - $length = mb_strlen($input); + $length = mb_strlen($input, $this->encoding); while ($h < $length) { $m = $codePoints['nonBasic'][$i++]; $delta = $delta + ($m - $n) * ($h + 1); @@ -194,7 +211,7 @@ protected function _decodePart($input) $bias = $this->_adapt($i - $oldi, ++$outputLength, ($oldi === 0)); $n = $n + (int) ($i / $outputLength); $i = $i % ($outputLength); - $output = mb_substr($output, 0, $i) . $this->_codePointToChar($n) . mb_substr($output, $i, $outputLength - 1); + $output = mb_substr($output, 0, $i, $this->encoding) . $this->_codePointToChar($n) . mb_substr($output, $i, $outputLength - 1, $this->encoding); $i++; } @@ -260,9 +277,9 @@ protected function _codePoints($input) 'nonBasic' => array(), ); - $length = mb_strlen($input); + $length = mb_strlen($input, $this->encoding); for ($i = 0; $i < $length; $i++) { - $char = mb_substr($input, $i, 1); + $char = mb_substr($input, $i, 1, $this->encoding); $code = $this->_charToCodePoint($char); if ($code < 128) { $codePoints['all'][] = $codePoints['basic'][] = $code; diff --git a/tests/PunycodeTest.php b/tests/PunycodeTest.php index 15c6890..f631b87 100644 --- a/tests/PunycodeTest.php +++ b/tests/PunycodeTest.php @@ -4,17 +4,6 @@ class PunycodeTest extends \PHPUnit_Framework_TestCase { - /** - * Make sure the right internal encoding is defined when testing - * - */ - public function setUp() - { - parent::setUp(); - - mb_internal_encoding('utf-8'); - } - /** * Test encoding Punycode * @@ -125,6 +114,10 @@ public function domainNamesProvider() 'guangdong.广东', 'guangdong.xn--xhq521b', ), + array( + 'gwóźdź.pl', + 'xn--gwd-hna98db.pl', + ), ); } }