Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 115 additions & 11 deletions src/Service/FileService.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
use Psr\Log\LoggerInterface;
use App\Entity\Gateway as Source;
use Smalot\PdfParser\Parser;
use PhpOffice\PhpWord\IOFactory;

/**
* Service responsible for woo files.
Expand Down Expand Up @@ -196,6 +197,64 @@ public function generateDownloadEndpoint(string $id, Endpoint $downloadEndpoint)
}//end generateDownloadEndpoint()


/**
* Writes a temporary file for short use.
*
* Don't forget to unlink($tempFilePath) after using the file to remove the temporary file.
*
* @param File $file File to write a temporary file from.
* @param string $fileExtension Extension to write the file with.
* @param $base64Decoded File in its decoded form.
*
* @return string|null $tempFilePath The temporary file path.
*/
private function createTemporaryFile(File $file, string $fileExtension, $base64Decoded): ?string
{
$tempFilePath = tempnam(sys_get_temp_dir(), $fileExtension);
if ($tempFilePath === false) {
$this->logger->error('Failed to create a temporary file '.$file->getName());
$this->style && $this->style->error('Failed to create a temporary file '.$file->getName());

return null;
}

file_put_contents($tempFilePath, $base64Decoded);

return $tempFilePath;

}//end createTemporaryFile()


/**
* Extracts text from a docx file.
*
* @param File $file to get text from.
* @param $base64Decoded File in its decoded form.
*
* @return string
*/
private function getTextFromDocx(File $file, $base64Decoded): string
{
$tempFilePath = $this->createTemporaryFile($file, 'docx', $base64Decoded);
if ($tempFilePath === null) {
return '';
}

$phpWord = IOFactory::load($tempFilePath);

$text = '';
foreach ($phpWord->getSections() as $section) {
$text .= $this->processElements($section->getElements(), $text);
}

// Remove temp file.
unlink($tempFilePath);

return $text;

}//end getTextFromDocx()


/**
* Extracts text from a document (File).
*
Expand All @@ -211,20 +270,29 @@ public function getTextFromDocument(Value $value): ?string
return null;
}

switch ($file->getMimeType()) {
case 'pdf':
case 'application/pdf':
try {
$pdf = $this->pdfParser->parseContent(\Safe\base64_decode($file->getBase64()));
$text = $pdf->getText();
} catch (\Exception $e) {
$this->logger->error('Something went wrong extracting text from '.$file->getName().' '.$e->getMessage());
$this->style && $this->style->error('Something went wrong extracting text from '.$file->getName().' '.$e->getMessage());
$base64Decoded = \Safe\base64_decode($file->getBase64());

try {
switch ($file->getMimeType()) {
case 'pdf':
case 'application/pdf':
$pdf = $this->pdfParser->parseContent($base64Decoded);
$text = $pdf->getText();
break;
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
$text = $this->getTextFromDocx($file, $base64Decoded);
break;
default:
$text = null;
}
break;
default:
} catch (\Exception $e) {
$this->logger->error('Something went wrong extracting text from '.$file->getName().' '.$e->getMessage());
$this->style && $this->style->error('Something went wrong extracting text from '.$file->getName().' '.$e->getMessage());

$text = null;
}

if (empty($text) === true) {
$text = null;
}

Expand All @@ -233,6 +301,42 @@ public function getTextFromDocument(Value $value): ?string
}//end getTextFromDocument()


/**
* Loops through docx elements to get the text from.
*
* @param $elements Docx elements.
* @param string $text variable to extend.
*
* @return string $text
*/
private function processElements($elements, string $text): string
{
foreach ($elements as $element) {
switch (get_class($element)) {
case 'PhpOffice\PhpWord\Element\TextRun':
case 'PhpOffice\PhpWord\Element\Cell':
$text .= $this->processElements($element->getElements(), $text);
break;

case 'PhpOffice\PhpWord\Element\Table':
foreach ($element->getRows() as $row) {
foreach ($row->getCells() as $cell) {
$text .= $this->processElements($cell->getElements(), $text);
}
}
break;

case 'PhpOffice\PhpWord\Element\Text':
$text .= $element->getText();
break;
}
}

return $text;

}//end processElements()


/**
* Returns the data from an document as a response.
*
Expand Down
3 changes: 3 additions & 0 deletions src/Service/SyncOpenWooService.php
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,9 @@ public function syncOpenWooHandler(array $data, array $configuration): array
$mappedResult = $this->mappingService->mapping($mapping, $result);
// Map categories to prevent multiple variants of the same categorie.
$mappedResult = $this->mappingService->mapping($categorieMapping, $mappedResult);
if (isset($mappedResult['samenvatting']) === true) {
$mappedResult['samenvatting'] = html_entity_decode($mappedResult['samenvatting']);
}

$validationErrors = $this->validationService->validateData($mappedResult, $schema, 'POST');
if ($validationErrors !== null) {
Expand Down
3 changes: 3 additions & 0 deletions src/Service/SyncXxllncCasesService.php
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,9 @@ public function syncXxllncCasesHandler(array $data, array $configuration): array
$mappedResult = $this->mappingService->mapping($mapping, $result);
// Map categories to prevent multiple variants of the same categorie.
$mappedResult = $this->mappingService->mapping($categorieMapping, $mappedResult);
if (isset($mappedResult['samenvatting']) === true) {
$mappedResult['samenvatting'] = html_entity_decode($mappedResult['samenvatting']);
}

$validationErrors = $this->validationService->validateData($mappedResult, $schema, 'POST');
if ($validationErrors !== null) {
Expand Down