Skip to content
This repository was archived by the owner on Jul 25, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 2 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ composer require true/punycode:~1.0
// Import Punycode
use True\Punycode;

// Use UTF-8 as the encoding
mb_internal_encoding('utf-8');

$Punycode = new Punycode();
var_dump($Punycode->encode('renangonçalves.com'));
// outputs: xn--renangonalves-pgb.com
Expand All @@ -38,18 +35,10 @@ var_dump($Punycode->decode('xn--renangonalves-pgb.com'));

### 1. What is this library for?

This library converts a UTF-8 encoded domain name to a IDNA ASCII form and vice-versa.


### 2. Do I need to use UTF-8?

Yes, domain names should be UTF-8 encoded.

Unless your application is not focused on international users, you should have been using a Unicode charset already.
Take your time to read [The Absolute Minimum Every Software Developer Must Know About Unicode](http://www.joelonsoftware.com/articles/Unicode.html).
This library converts a Unicode encoded domain name to a IDNA ASCII form and vice-versa.


### 3. Why should I use this instead of [PHP's IDN Functions](http://php.net/manual/en/ref.intl.idn.php)?
### 2. Why should I use this instead of [PHP's IDN Functions](http://php.net/manual/en/ref.intl.idn.php)?

If you can compile the needed dependencies (intl, libidn) there is not much difference.
But if you want to write portable code between hosts (including Windows and Mac OS), or can't install PECL extensions, this is the right library for you.
25 changes: 21 additions & 4 deletions src/Punycode.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,23 @@ class Punycode
'4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35
);

/**
* Character encoding
*
* @param string
*/
protected $encoding;

/**
* Constructor
*
* @param string $encoding Character encoding
*/
public function __construct($encoding = 'UTF-8')
{
$this->encoding = $encoding;
}

/**
* Encode a domain to its Punycode version
*
Expand Down Expand Up @@ -94,7 +111,7 @@ protected function _encodePart($input)
sort($codePoints['nonBasic']);

$i = 0;
$length = mb_strlen($input);
$length = mb_strlen($input, $this->encoding);
while ($h < $length) {
$m = $codePoints['nonBasic'][$i++];
$delta = $delta + ($m - $n) * ($h + 1);
Expand Down Expand Up @@ -194,7 +211,7 @@ protected function _decodePart($input)
$bias = $this->_adapt($i - $oldi, ++$outputLength, ($oldi === 0));
$n = $n + (int) ($i / $outputLength);
$i = $i % ($outputLength);
$output = mb_substr($output, 0, $i) . $this->_codePointToChar($n) . mb_substr($output, $i, $outputLength - 1);
$output = mb_substr($output, 0, $i, $this->encoding) . $this->_codePointToChar($n) . mb_substr($output, $i, $outputLength - 1, $this->encoding);

$i++;
}
Expand Down Expand Up @@ -260,9 +277,9 @@ protected function _codePoints($input)
'nonBasic' => array(),
);

$length = mb_strlen($input);
$length = mb_strlen($input, $this->encoding);
for ($i = 0; $i < $length; $i++) {
$char = mb_substr($input, $i, 1);
$char = mb_substr($input, $i, 1, $this->encoding);
$code = $this->_charToCodePoint($char);
if ($code < 128) {
$codePoints['all'][] = $codePoints['basic'][] = $code;
Expand Down
15 changes: 4 additions & 11 deletions tests/PunycodeTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,6 @@
class PunycodeTest extends \PHPUnit_Framework_TestCase
{

/**
* Make sure the right internal encoding is defined when testing
*
*/
public function setUp()
{
parent::setUp();

mb_internal_encoding('utf-8');
}

/**
* Test encoding Punycode
*
Expand Down Expand Up @@ -125,6 +114,10 @@ public function domainNamesProvider()
'guangdong.广东',
'guangdong.xn--xhq521b',
),
array(
'gwóźdź.pl',
'xn--gwd-hna98db.pl',
),
);
}
}