diff --git a/composer.json b/composer.json index bef2839..e5a9cc9 100644 --- a/composer.json +++ b/composer.json @@ -3,11 +3,13 @@ "type": "library", "description": "RebaseData API client for PHP", "require": { - "php": ">=7.2" + "php": ">=7.2", + "ext-curl": "*", + "ext-json": "*", + "ext-zip": "*" }, "require-dev": { - "phpunit/phpunit": "^8", - "ext-zip": "*" + "phpunit/phpunit": "^8" }, "suggest": { "ext-zip": "Allows compressed up- and downloads" diff --git a/src/Config/Config.php b/src/Config/Config.php index f7338ee..5622af3 100644 --- a/src/Config/Config.php +++ b/src/Config/Config.php @@ -17,7 +17,7 @@ class Config public function __construct() { $this->protocol = 'https'; - $this->host = 'www.rebasedata.com'; + $this->host = 'www.rebasedata.com/api/v1'; $this->cacheEnabled = false; $this->useZipIfAvailable = true; } @@ -86,6 +86,11 @@ public function getCacheEnabled() return $this->cacheEnabled; } + public function getUrl(): string + { + return $this->protocol . '://' . $this->host; + } + public function setCacheDirectory($cacheDirectory) { if (!is_dir($cacheDirectory)) { diff --git a/src/Converter/Converter.php b/src/Converter/Converter.php index 92902d9..1316f23 100644 --- a/src/Converter/Converter.php +++ b/src/Converter/Converter.php @@ -2,12 +2,13 @@ namespace RebaseData\Converter; +use Exception; use RebaseData\Config\Config; use RebaseData\Database\Database; use RebaseData\Database\Table\Table; use RebaseData\Exception\InvalidArgumentException; -use RebaseData\Exception\LogicException; use RebaseData\Exception\RuntimeException; +use RebaseData\InputFile\InputFile; use RebaseData\Service\CheckInputFilesService; use RebaseData\Service\CopyFilesService; use RebaseData\Service\DeleteDirectoryService; @@ -18,6 +19,8 @@ class Converter { + private const CHUNK_SIZE = 100 * (1024 ** 2); // 100MB + private $config; public function __construct(Config $config = null) @@ -122,220 +125,215 @@ public function convertAndSaveToZipFile(array $inputFiles, string $format, strin $this->doConversion($inputFiles, $format, $zipFilePath, null, $options); } - private function doConversion(array $inputFiles, string $format, ?string $zipFilePath, ?string $targetDirectory, array $options = []) - { + private function doConversion( + array $inputFiles, + string $format, + ?string $zipFilePath, + ?string $targetDirectory, + array $options = [] + ) { CheckInputFilesService::execute($inputFiles); if (!($zipFilePath !== null xor $targetDirectory !== null)) { - throw new InvalidArgumentException('You need to specify the ZIP file path or (exclusive or) the target directory path'); + throw new InvalidArgumentException( + 'You need to specify the ZIP file path or (exclusive or) the target directory path' + ); } if ($zipFilePath !== null and file_exists($zipFilePath)) { throw new InvalidArgumentException('Zip file must not exist yet: '.$zipFilePath); } - $options['outputFormat'] = $format; - - if ($this->config->getApiKey()) { - $options['customerKey'] = $this->config->getApiKey(); - } - - $queryString = ''; - if (count($options) > 0) { - $queryString = '?'.http_build_query($options); - } - - // Establish SSL connection - if ($this->config->getProtocol() === 'https') { - $address = 'ssl://'.$this->config->getHost().':443'; - } else { - $address = 'tcp://'.$this->config->getHost().':80'; - } - $socket = stream_socket_client($address, $errno, $errstr); - if ($socket === false) { - throw new RuntimeException('Cannot create socket: '.$errno.' '.$errstr); - } - stream_set_timeout($socket, 86400); + $dataSet = $this->createDataSet($inputFiles); - // Calculate the content length - $contentLength = count($inputFiles) * 8 * 2; + /** @var InputFile $inputFile */ foreach ($inputFiles as $inputFile) { - $contentLength += strlen(basename($inputFile->getName())); - $contentLength += filesize($inputFile->getPath()); - } + $dataSetFile = $this->createDataSetFile($dataSet, $inputFile); - // Build request headers - if ($zipFilePath !== null) { // We need to deliver a ZIP file - $acceptHeaderValue = 'application/zip'; - } else { // We need to deliver files in the target directory - if (extension_loaded('zip') and $this->config->getUseZipIfAvailable()) { - $acceptHeaderValue = 'application/zip'; - } else { - $acceptHeaderValue = 'application/rebasedata.v1'; - } + $this->uploadFile($dataSet, $dataSetFile, $inputFile); } - $requestHeaders = "POST /api/v1/convert$queryString HTTP/1.1\r\n"; - $requestHeaders .= "Host: ".$this->config->getHost()."\r\n"; - $requestHeaders .= "User-Agent: rebasedata/php-client\r\n"; - $requestHeaders .= "Content-Type: application/rebasedata.v1\r\n"; - $requestHeaders .= "Content-Length: $contentLength\r\n"; - $requestHeaders .= "Accept: $acceptHeaderValue\r\n"; - $requestHeaders .= "Connection: Close\r\n"; - $requestHeaders .= "\r\n"; - - // Send request headers - fwrite($socket, $requestHeaders); + $conversion = $this->startConversion($dataSet, $options, $format); - // Send request body - foreach ($inputFiles as $inputFile) { - $name = basename($inputFile->getName()); + $this->downloadConversion($conversion, $zipFilePath, $targetDirectory); + } - $nameLength = pack('J', strlen($name)); + private function createDataSet(array $inputFiles): array + { + $data = [ + 'name' => $this->generateDataSetNameFromFiles($inputFiles), + ]; - fwrite($socket, $nameLength); - fwrite($socket, $name); + $headers = ['Content-Type: application/json']; - $contentLength = pack('J', filesize($inputFile->getPath())); + return $this->sendRequest('/dataSets', 'POST', $data, $headers); + } - fwrite ($socket, $contentLength); + private function createDataSetFile(array $dataSet, InputFile $inputFile): array + { + $data = [ + 'name' => $inputFile->getName(), + 'size' => $inputFile->getSize(), + ]; - $inputFileHandle = fopen($inputFile->getPath(), 'r'); - while (!feof($inputFileHandle)) { - $chunk = fread($inputFileHandle, 2048); + $headers = ['Content-Type: application/json']; - fwrite($socket, $chunk); - } - fclose($inputFileHandle); - } + return $this->sendRequest(sprintf('/dataSets/%s/files', $dataSet['id']), 'POST', $data, $headers); + } - // Read response status line - $line = fgets($socket, 4096); - preg_match('#^HTTP/\d\.\d (\d+) #', $line, $matches); - if (!isset($matches[1])) { - throw new RuntimeException('Could not parse response status line: '.$line); - } - $responseCode = (int) $matches[1]; - if ($responseCode !== 200) { - throw new RuntimeException('Got invalid response code from API: '.$responseCode); + private function uploadFile(array $dataSet, array $dataSetFile, InputFile $inputFile): void + { + $headers = [ + 'Content-Type: application/octet-stream', + ]; + + $url = sprintf( + '/dataSets/%d/files/%d/chunks', + $dataSet['id'], + $dataSetFile['id'] + ); + + $file = fopen($inputFile->getPath(), 'rb'); + if (!$file) { + throw new RuntimeException('Could not open file: ' . $inputFile->getPath()); } - // Read response headers - $responseHeaders = []; - while (!feof($socket)) { - $line = rtrim(fgets($socket, 4096)); + try { + $chunkIndex = 0; + while (!feof($file)) { + $chunk = fread($file, self::CHUNK_SIZE); + if ($chunk === false) { + fclose($file); - if ($line === '') { - break; - } + throw new RuntimeException('Error reading file: ' . $inputFile->getPath()); + } - preg_match('/^([a-zA-Z0-9\-]+): (.*)$/', $line, $matches); - if (!isset($matches[1]) or !isset($matches[2])) { - throw new RuntimeException('Could not parse response header: '.$line); + $this->sendRequest($url . '/' . $chunkIndex, 'POST', $chunk, $headers); } - $responseHeaders[$matches[1]] = $matches[2]; - } + fclose($file); - // Check if we got an error back - if (isset($responseHeaders['Content-Type']) and - $responseHeaders['Content-Type'] === 'application/json') { - $responseData = fgets($socket); + $this->mergeFileChunks($dataSet, $dataSetFile); + } catch (Exception $exception) { + $this->deleteFile($dataSet, $dataSetFile); - $json = json_decode($responseData, true); - throw new RuntimeException('Got error from API: '.$json['error']); + throw $exception; } + } - // Handle response body - if ($zipFilePath !== null) { - // Read connection and write data to ZIP file + private function deleteFile(array $dataSet, array $dataSetFile): ?array + { + $headers = ['Content-Type: application/json']; + + return $this->sendRequest( + sprintf('/dataSets/%s/files/%s', $dataSet['id'], $dataSetFile['id']), + 'DELETE', + [], + $headers + ); + } - $zipFileHandle = fopen($zipFilePath, 'w'); - if ($zipFileHandle === false) { - throw new RuntimeException('Cannot open ZIP file: ' . $zipFilePath); - } + private function mergeFileChunks(array $dataSet, array $dataSetFile): array + { + $headers = ['Content-Type: application/json']; + + return $this->sendRequest( + sprintf('/dataSets/%s/files/%s/chunks/merge', $dataSet['id'], $dataSetFile['id']), + 'POST', + [], + $headers + ); + } - while (!feof($socket)) { - $chunk = fgets($socket, 2048); - fwrite($zipFileHandle, $chunk); - } + private function startConversion(array $dataSet, array $options, string $format): array + { + $options['outputFormat'] = $format; - fclose($socket); - fclose($zipFileHandle); - } else { - // We need to deliver the files to the target directory + $queryString = ''; + if (count($options) > 0) { + $queryString = '?' . http_build_query($options); + } - if ($responseHeaders['Content-Type'] === 'application/zip') { - // Receive zip file and extract it to target directory + $headers = ['Content-Type: application/json']; - if (!extension_loaded('zip')) { - throw new LogicException('Should not happen'); - } + return $this->sendRequest( + sprintf('/dataSets/%s/conversions' . $queryString, $dataSet['id']), + 'POST', + [], + $headers + ); + } - $workingDirectory = $this->config->getWorkingDirectory(); - $randomHash = GenerateRandomHash::execute(); + private function getConversionStatus(array $conversion): array + { + $headers = ['Content-Type: application/json']; - $temporaryZipFilePath = $workingDirectory.DIRECTORY_SEPARATOR.'convert-to-format-zip-'.$randomHash; + return $this->sendRequest(sprintf('/conversions/%s', $conversion['id']), 'GET', [], $headers); + } - $temporaryZipFileHandle = fopen($temporaryZipFilePath, 'w+'); - while (!feof($socket)) { - $chunk = fread($socket, 2048); + private function downloadConversion(array $conversion, ?string $zipFilePath, ?string $targetDirectory): void + { + do { + $result = $this->getConversionStatus($conversion); - fwrite($temporaryZipFileHandle, $chunk); - } - fclose($temporaryZipFileHandle); - fclose($socket); + $status = $result['status']; - $zipArchive = new ZipArchive(); - $zipArchive->open($temporaryZipFilePath); - $zipArchive->extractTo($targetDirectory); - $zipArchive->close(); + if ($status === 'error') { + throw new RuntimeException('Failed to convert database!'); + } - unlink($temporaryZipFilePath); + sleep(1); + } while ($result['status'] !== 'done'); - } else if ($responseHeaders['Content-Type'] === 'application/rebasedata.v1') { - // We need to deliver the files to the target directory. Since we don't have ext-zip, we - // receive a RebaseData-encoded binary response instead of a ZIP file. + $headers = [ + 'Content-Type: application/octet-stream', + ]; + $url = sprintf('/conversions/%s/chunks', $conversion['id']); + $totalChunksNumber = $result['downloadChunks']; - while (!feof($socket)) { - $nameLength = fread($socket, 8); - $nameLength = unpack('J', $nameLength); - $nameLength = $nameLength[1]; + if ($zipFilePath === null) { + $workingDirectory = $this->config->getWorkingDirectory(); + $randomHash = GenerateRandomHash::execute(); - $name = fread($socket, $nameLength); + $zipFilePath = $workingDirectory . DIRECTORY_SEPARATOR . 'convert-to-format-zip-' . $randomHash; + $needsToExtract = true; + } - if (strstr($name, '/') or strstr($name, '\\')) { - throw new InvalidArgumentException('Not allowed for security reasons'); - } + $file = fopen($zipFilePath, 'wb'); - $contentLength = fread($socket, 8); - $contentLength = unpack('J', $contentLength); - $contentLength = $contentLength[1]; + if (!$file) { + throw new RuntimeException('Could not create file: ' . $zipFilePath); + } - $outputFilePath = $targetDirectory.DIRECTORY_SEPARATOR.$name; + for ($chunkIndex = 0; $chunkIndex < $totalChunksNumber; $chunkIndex++) { + try { + $chunk = $this->sendRequest($url . '/' . $chunkIndex, 'GET', [], $headers); - $contentDone = 0; - $outputFileHandle = fopen($outputFilePath, 'w+'); - while ($contentDone < $contentLength) { - $toRead = $contentLength - $contentDone; - if ($toRead > 2048) { - $toRead = 2048; - } + if (!is_string($chunk) || empty($chunk)) { + throw new RuntimeException(sprintf('Chunk %s is empty or invalid.', $chunkIndex)); + } - $chunk = fread($socket, $toRead); + fwrite($file, $chunk); + } catch (Exception $exception) { + fclose($file); + unlink($zipFilePath); - fwrite($outputFileHandle, $chunk); + throw new RuntimeException( + sprintf('Failed to download chunk [%s]: %s', $chunkIndex, $exception->getMessage()) + ); + } + } - $contentDone += strlen($chunk); - } - fclose($outputFileHandle); - } + if (!empty($needsToExtract)) { + $zipArchive = new ZipArchive(); + $zipArchive->open($zipFilePath); + $zipArchive->extractTo($targetDirectory); + $zipArchive->close(); - fclose($socket); - } else { - throw new RuntimeException('Got response with invalid Content-Type header: '.$responseHeaders['Content-Type']); - } + unlink($zipFilePath); } + + fclose($file); } /** @@ -353,4 +351,81 @@ public function convertToFormatAndSaveAsZipFile(array $inputFiles, string $forma { $this->doConversion($inputFiles, $format, $zipFilePath, null, $options); } + + private function sendRequest( + string $endpoint, + string $method = 'GET', + $data = null, + array $headers = [], + bool $useAuthorization = true + ) + { + if ($useAuthorization) { + $headers[] = sprintf('api-key: %s', $this->config->getApiKey()); + } + + $ch = curl_init(); + + curl_setopt($ch, CURLOPT_URL, $this->config->getUrl() . $endpoint); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_HEADER, true); + + if (is_array($data)) { + $data = json_encode($data); + } + + switch (strtoupper($method)) { + case 'POST': + curl_setopt($ch, CURLOPT_POST, true); + curl_setopt($ch, CURLOPT_POSTFIELDS, $data); + break; + case 'PUT': + curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); + curl_setopt($ch, CURLOPT_POSTFIELDS, $data); + break; + case 'DELETE': + curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'DELETE'); + curl_setopt($ch, CURLOPT_POSTFIELDS, $data); + break; + case 'GET': + default: + break; + } + + if (!empty($headers)) { + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + } + + $response = curl_exec($ch); + $headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE); + $body = substr($response, $headerSize); + + $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + $contentType = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); + + if ($response === false || !in_array($httpCode, [200, 201, 204])) { + $error = curl_error($ch); + curl_close($ch); + + throw new Exception(sprintf('HTTP status code: %s, Request Error: %s', $httpCode, $error)); + } + + curl_close($ch); + + if ($contentType === 'application/octet-stream') { + return $body; + } + + return json_decode($body, true); + } + + private function generateDataSetNameFromFiles(array $files): string + { + $fileNames = ''; + foreach ($files as $file) { + $fileNames .= $file->getName(); + } + + return 'dataset_' . md5($fileNames); + } } diff --git a/src/InputFile/InputFile.php b/src/InputFile/InputFile.php index 98b2c63..88597bd 100644 --- a/src/InputFile/InputFile.php +++ b/src/InputFile/InputFile.php @@ -8,8 +8,9 @@ class InputFile { private $path; private $name; + private $size; - public function __construct($path) + public function __construct(string $path) { if (!file_exists($path)) { throw new InvalidArgumentException('Path must exist: '.$path); @@ -17,14 +18,15 @@ public function __construct($path) $this->path = $path; $this->name = basename($path); + $this->size = (int)filesize($path); } - public function getPath() + public function getPath(): string { return $this->path; } - public function setName($name) + public function setName(string $name): void { if (empty($name)) { throw new InvalidArgumentException('Name must not be empty!'); @@ -33,8 +35,13 @@ public function setName($name) $this->name = $name; } - public function getName() + public function getName(): string { return $this->name; } + + public function getSize(): int + { + return $this->size; + } }