From b47b6f496d0c89f98bc71293457a52610c783e7a Mon Sep 17 00:00:00 2001 From: Bernd Oliver Suenderhauf <bos@suenderhauf.de> Date: Thu, 23 May 2019 15:24:28 +0200 Subject: [PATCH] Issue #3056207 by Pancho: Use pdftk dump_data_fields_utf8 --- .../FillPdfBackend/PdftkFillPdfBackend.php | 17 ++++------------- tests/src/Functional/PdfParseTest.php | 8 +++----- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/src/Plugin/FillPdfBackend/PdftkFillPdfBackend.php b/src/Plugin/FillPdfBackend/PdftkFillPdfBackend.php index 68f71aa..0c4ea30 100644 --- a/src/Plugin/FillPdfBackend/PdftkFillPdfBackend.php +++ b/src/Plugin/FillPdfBackend/PdftkFillPdfBackend.php @@ -2,7 +2,6 @@ namespace Drupal\fillpdf\Plugin\FillPdfBackend; -use Drupal\Component\Utility\Html; use Drupal\Core\File\FileSystem; use Drupal\Core\Plugin\ContainerFactoryPluginInterface; use Drupal\Core\StringTranslation\StringTranslationTrait; @@ -116,15 +115,15 @@ class PdftkFillPdfBackend implements FillPdfBackendPluginInterface, ContainerFac // Use exec() to call pdftk (because it will be easier to go line-by-line // parsing the output) and pass $content via stdin. Retrieve the fields with - // dump_data_fields(). + // dump_data_fields_utf8(). $output = []; - exec("{$pdftk_path} {$template_path} dump_data_fields", $output, $status); + exec("{$pdftk_path} {$template_path} dump_data_fields_utf8", $output, $status); if (count($output) === 0) { \Drupal::messenger()->addWarning($this->t('PDF does not contain fillable fields.')); return []; } - // Build a simple map of dump_data_fields keys to our own array keys. + // Build a simple map of dump_data_fields_utf8 keys to our own array keys. $data_fields_map = [ 'FieldType' => 'type', 'FieldName' => 'name', @@ -144,16 +143,8 @@ class PdftkFillPdfBackend implements FillPdfBackendPluginInterface, ContainerFac // Separate the data key from the data value. list($key, $value) = explode(':', $lineitem); if (in_array($key, array_keys($data_fields_map), NULL)) { - // pdftk sometimes inserts random � markers - strip these out. - // NOTE: This may break forms that actually DO contain this pattern, - // but 99%-of-the-time functionality is better than merge failing due - // to improper parsing. - $value = str_replace('�', '', $value); - // pdftk encodes diacritics as XML numerical entities, e.g. 'ü' is - // represented by ü We need to reverse this. - $decoded = Html::decodeEntities($value); // Trim spaces. - $fields[$fieldindex][$data_fields_map[$key]] = trim($decoded); + $fields[$fieldindex][$data_fields_map[$key]] = trim($value); } } diff --git a/tests/src/Functional/PdfParseTest.php b/tests/src/Functional/PdfParseTest.php index d10a67e..998d335 100644 --- a/tests/src/Functional/PdfParseTest.php +++ b/tests/src/Functional/PdfParseTest.php @@ -2,7 +2,6 @@ namespace Drupal\Tests\fillpdf\Functional; -use Drupal\Component\Utility\Html; use Drupal\Core\Url; use Drupal\Tests\BrowserTestBase; use Drupal\Tests\fillpdf\Traits\TestFillPdfTrait; @@ -110,12 +109,11 @@ class PdfParseTest extends BrowserTestBase { $backend = $backend_manager->createInstance('pdftk', $config->get()); // Reparse the sample PDF file and check for each text field that the - // HTML-decoded value equals the HTML-decoded name. + // field value equals the field name. foreach ($backend->parse($fillpdf_form) as $field) { if ($field['type'] == 'Text') { - $name = Html::decodeEntities($field['name']); - $value = isset($field['value']) ? Html::decodeEntities($field['value']) : NULL; - static::assertEquals($name, $value); + $value = isset($field['value']) ? $field['value'] : NULL; + static::assertEquals($field['name'], $value); } } } -- GitLab