TableGroupingPreprocessor.php 7.79 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
<?php

namespace Drupal\d3_sankey_table_group_pp;

use Drupal\d3_sankey\DrupalCoreAdapter;
use Drupal\d3_sankey\Model\Link;
use Drupal\d3_sankey\Model\Node;
use Drupal\d3_sankey\Model\RawSankeyData;
use Drupal\d3_sankey\PreprocessorInterface;
use Drupal\d3_sankey_table_group_pp\Model\KeyedLink;

/**
 * A data preprocessor that links values in the same row, grouping duplicates.
 *
 * This preprocessor considers each cell in the table to be a node, and adds a
 * link between each column in the same row. If any two cells can be represented
 * by the same string, they are considered to be the same node.
 *
 * To use an example, it would transform data like...
 *
 * Account | LoB | Client
 * --------|-----|--------
 * Revenue | SLA | One
 * Revenue | NW  | One
 * Revenue | SLA | Two
 *
 * ... into the following list of nodes...
 *
 * 1. Revenue
 * 2. SLA
 * 3. One
 * 4. NW
 * 5. Two
 *
 * ... and the following list of links...
 *
 * 1. Revenue -> SLA
 * 2. SLA -> One
 * 3. Revenue -> NW
 * 4. NW -> One
 * 5. SLA -> Two
 *
 * ... which would result in a Sankey diagram (roughly) like...
 *
 * ```
 * Revenue --- NW --- One
 *         \       /
 *          -- SLA -- Two
 * ```
 */
class TableGroupingPreprocessor implements PreprocessorInterface {

  /**
   * A wrapper around Drupal core functions.
   *
   * @var \Drupal\d3_sankey\DrupalCoreAdapter
   */
  private $adapter;

  /**
   * An associative array of nodes that make up this Sankey diagram.
   *
   * @var \Drupal\d3_sankey\Model\Node[]
   */
  private $nodes;

  /**
   * An associative array of links that make up this Sankey diagram.
   *
   * @var \Drupal\d3_sankey_table_group_pp\Model\KeyedLink[]
   */
  private $links;

  /**
   * D3SankeyGroupingPreprocessor constructor.
   *
   * @param \Drupal\d3_sankey\Model\Node[] $nodes
   *   An associative array of nodes that make up this Sankey diagram.
   * @param \Drupal\d3_sankey_table_group_pp\Model\KeyedLink[] $links
   *   An associative array of links that make up this Sankey diagram.
   * @param \Drupal\d3_sankey\DrupalCoreAdapter $adapter
   *   A wrapper around Drupal core functions.
   */
  public function __construct($nodes = array(), $links = array(), DrupalCoreAdapter $adapter = NULL) {
    $this->nodes = $nodes;
    $this->links = $links;
    $this->adapter = ($adapter) ? $adapter : new DrupalCoreAdapter();
  }

  /**
   * Get the associative array of nodes that make up this Sankey diagram.
   *
   * @return \Drupal\d3_sankey\Model\Node[]
   *   An associative array of nodes that make up this Sankey diagram.
   */
  public function getNodes() {
    return $this->nodes;
  }

  /**
   * Get the associative array of links that make up this Sankey diagram.
   *
   * @return \Drupal\d3_sankey_table_group_pp\Model\KeyedLink[]
   *   An associative array of links that make up this Sankey diagram.
   */
  public function getLinks() {
    return $this->links;
  }

  /**
   * Set the associative array of nodes that make up this Sankey diagram.
   *
   * @param \Drupal\d3_sankey\Model\Node[] $nodes
   *   An associative array of nodes that make up this Sankey diagram.
   */
  public function setNodes($nodes) {
    $this->nodes = $nodes;
  }

  /**
   * Set the associative array of links that make up this Sankey diagram.
   *
   * @param \Drupal\d3_sankey_table_group_pp\Model\KeyedLink[] $links
   *   An associative array of links that make up this Sankey diagram.
   */
  public function setLinks($links) {
    $this->links = $links;
  }

  /**
   * Load a row of data into the table.
   *
   * @param array $row
   *   A row of data.
   */
  public function ingestRow($row) {
    $previous_node_key = NULL;

    // Ensure this row is an array.
    $row = (array) $row;

    // Loop through each column in the row...
    foreach ($row as $current_node_label) {
      $current_node_key = $this->generateNodeKey($current_node_label);

      // If the current node doesn't exist in $this->nodes yet, add it to the
      // nodes array.
      if (!array_key_exists($current_node_key, $this->nodes)) {
        $this->nodes[$current_node_key] = new Node($current_node_label, NULL, $this->adapter);
      }

      // If there is a reference to the previous node in this row (i.e.: if this
      // is not the first column in the row), add a link from the previous node
      // to the current node.
      if (!is_null($previous_node_key)) {
        $link_key = $this->generateLinkKey($previous_node_key, $current_node_key);

        // If the current link doesn't exist in $this->links yet, add it.
        if (!array_key_exists($link_key, $this->links)) {
          $this->links[$link_key] = new KeyedLink($previous_node_key, $current_node_key);
        }
        // If it does exist, increment the weight.
        else {
          $this->links[$link_key]->value++;
        }
      }

      // Keep track of this row for the next iteration through the loop.
      $previous_node_key = $current_node_key;
    }
  }

  /**
   * {@inheritdoc}
   */
  public function getRawData() {
    $nodes = array();
    $nodes_mapping = array();
    $links = array();

    // GroupingPreprocessor uses keys in $this->nodes as an implementation
    // detail, so that we can identify duplicates. But we must not pass that
    // implementation detail to RawSankeyData.
    foreach ($this->nodes as $string_key => $node) {
      // Add the node to the output array and get the new size of the array.
      $new_size = array_push($nodes, $node);

      // The new size will be the index of the element we just added, plus one.
      $numeric_key = $new_size - 1;

      // Record the mapping between the string key and the numeric key.
      $nodes_mapping[$string_key] = $numeric_key;
    }

    // GroupingPreprocessor uses an associative array of GroupingLinks instead
    // of a numeric array of Links as an implementation detail, so we can
    // identify links between nodes, and ensure we don't enter duplicates. But
    // we must not pass that implementation detail to RawSankeyData.
    foreach ($this->links as $link) {
      // Identify the numeric indices of the source and target nodes.
      // The typecasts here are provided for clarity to the reader, but are not
      // strictly necessary.
      $source_index = (int) $nodes_mapping[(string) $link->source];
      $target_index = (int) $nodes_mapping[(string) $link->target];

      // Add a regular Link with the numeric indices along with the current
      // link's value.
      $links[] = new Link($source_index, $target_index, $link->value);
    }

    return new RawSankeyData($nodes, $links);
  }

  /**
   * Generate a key for the Nodes array, given a node label.
   *
   * @param string $node_label
   *   A label for a node.
   *
   * @return string
   *   A key for the array of nodes.
   */
  private static function generateNodeKey($node_label) {
    // In theory, this function could hash the key; however, for simplicity, it
    // currently just uses the label as the key. This could cause problems if
    // the node label is a string longer than PHP's max size for array key
    // strings (e.g.: if we were trying to use BLOBs as node labels).
    return (string) $node_label;
  }

  /**
   * Generate a key for the Links array, given source and target node keys.
   *
   * @param string $source_key
   *   A key for the array of nodes.
   * @param string $target_key
   *   A key for the array of nodes.
   *
   * @return string
   *   A key for the array of links.
   */
  private static function generateLinkKey($source_key, $target_key) {
    // In theory, this function could hash the keys; however, for simplicity, it
    // currently just joins the two keys with a '---' in the middle. This could
    // cause problems if the combined node labels are a string longer than PHP's
    // max size for array key strings (e.g.: if we were trying to use BLOBs as
    // node labels).
    return (string) $source_key . '---' . (string) $target_key;
  }

}