From b47b6f496d0c89f98bc71293457a52610c783e7a Mon Sep 17 00:00:00 2001
From: Bernd Oliver Suenderhauf <bos@suenderhauf.de>
Date: Thu, 23 May 2019 15:24:28 +0200
Subject: [PATCH] Issue #3056207 by Pancho: Use pdftk dump_data_fields_utf8

---
 .../FillPdfBackend/PdftkFillPdfBackend.php      | 17 ++++-------------
 tests/src/Functional/PdfParseTest.php           |  8 +++-----
 2 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/src/Plugin/FillPdfBackend/PdftkFillPdfBackend.php b/src/Plugin/FillPdfBackend/PdftkFillPdfBackend.php
index 68f71aa..0c4ea30 100644
--- a/src/Plugin/FillPdfBackend/PdftkFillPdfBackend.php
+++ b/src/Plugin/FillPdfBackend/PdftkFillPdfBackend.php
@@ -2,7 +2,6 @@
 
 namespace Drupal\fillpdf\Plugin\FillPdfBackend;
 
-use Drupal\Component\Utility\Html;
 use Drupal\Core\File\FileSystem;
 use Drupal\Core\Plugin\ContainerFactoryPluginInterface;
 use Drupal\Core\StringTranslation\StringTranslationTrait;
@@ -116,15 +115,15 @@ class PdftkFillPdfBackend implements FillPdfBackendPluginInterface, ContainerFac
 
     // Use exec() to call pdftk (because it will be easier to go line-by-line
     // parsing the output) and pass $content via stdin. Retrieve the fields with
-    // dump_data_fields().
+    // dump_data_fields_utf8().
     $output = [];
-    exec("{$pdftk_path} {$template_path} dump_data_fields", $output, $status);
+    exec("{$pdftk_path} {$template_path} dump_data_fields_utf8", $output, $status);
     if (count($output) === 0) {
       \Drupal::messenger()->addWarning($this->t('PDF does not contain fillable fields.'));
       return [];
     }
 
-    // Build a simple map of dump_data_fields keys to our own array keys.
+    // Build a simple map of dump_data_fields_utf8 keys to our own array keys.
     $data_fields_map = [
       'FieldType' => 'type',
       'FieldName' => 'name',
@@ -144,16 +143,8 @@ class PdftkFillPdfBackend implements FillPdfBackendPluginInterface, ContainerFac
       // Separate the data key from the data value.
       list($key, $value) = explode(':', $lineitem);
       if (in_array($key, array_keys($data_fields_map), NULL)) {
-        // pdftk sometimes inserts random &#0; markers - strip these out.
-        // NOTE: This may break forms that actually DO contain this pattern,
-        // but 99%-of-the-time functionality is better than merge failing due
-        // to improper parsing.
-        $value = str_replace('&#0;', '', $value);
-        // pdftk encodes diacritics as XML numerical entities, e.g. 'ü' is
-        // represented by &#195;&#188; We need to reverse this.
-        $decoded = Html::decodeEntities($value);
         // Trim spaces.
-        $fields[$fieldindex][$data_fields_map[$key]] = trim($decoded);
+        $fields[$fieldindex][$data_fields_map[$key]] = trim($value);
       }
     }
 
diff --git a/tests/src/Functional/PdfParseTest.php b/tests/src/Functional/PdfParseTest.php
index d10a67e..998d335 100644
--- a/tests/src/Functional/PdfParseTest.php
+++ b/tests/src/Functional/PdfParseTest.php
@@ -2,7 +2,6 @@
 
 namespace Drupal\Tests\fillpdf\Functional;
 
-use Drupal\Component\Utility\Html;
 use Drupal\Core\Url;
 use Drupal\Tests\BrowserTestBase;
 use Drupal\Tests\fillpdf\Traits\TestFillPdfTrait;
@@ -110,12 +109,11 @@ class PdfParseTest extends BrowserTestBase {
     $backend = $backend_manager->createInstance('pdftk', $config->get());
 
     // Reparse the sample PDF file and check for each text field that the
-    // HTML-decoded value equals the HTML-decoded name.
+    // field value equals the field name.
     foreach ($backend->parse($fillpdf_form) as $field) {
       if ($field['type'] == 'Text') {
-        $name = Html::decodeEntities($field['name']);
-        $value = isset($field['value']) ? Html::decodeEntities($field['value']) : NULL;
-        static::assertEquals($name, $value);
+        $value = isset($field['value']) ? $field['value'] : NULL;
+        static::assertEquals($field['name'], $value);
       }
     }
   }
-- 
GitLab