Commit 8813bbcc authored by M Parker's avatar M Parker

Add a preprocessor that links values in the same row, grouping duplicates.

parent 467755da
{
"name": "drupal/d3_sankey_table_group_pp",
"description": "Provides a Sankey preprocessor that links values in the same row, grouping duplicates.",
"type": "drupal-module",
"require-dev": {
"phpunit/phpunit": "5.5.*"
},
"license": "GPL-2.0+",
"require": {}
}
This diff is collapsed.
name = D3 Sankey: Table grouping preprocessor
description = Provides a Sankey preprocessor that links values in the same row, grouping duplicates.
core = 7.x
package = Visualization
dependencies[] = d3_sankey
dependencies[] = composer_manager
dependencies[] = xautoload (>= 7.x-5.0)
<?php
/**
* @file
* Hooks and helper functions for the d3_sankey_table_group_pp module.
*/
/* Hooks. */
/**
* Implements hook_xautoload().
*/
function d3_sankey_table_group_pp_xautoload($adapter) {
$adapter->absolute()->addPsr4('Drupal\d3_sankey_table_group_pp', 'src');
}
<?xml version="1.0" encoding="UTF-8"?>
<phpunit backupGlobals="false"
backupStaticAttributes="false"
colors="true"
convertErrorsToExceptions="true"
convertNoticesToExceptions="true"
convertWarningsToExceptions="true"
processIsolation="false"
stopOnFailure="false"
syntaxCheck="false"
bootstrap="tests/bootstrap.php"
>
<testsuites>
<testsuite name="Unit">
<directory>./tests/src/Unit</directory>
</testsuite>
</testsuites>
</phpunit>
<?php
namespace Drupal\d3_sankey_table_group_pp\Model;
use Drupal\d3_sankey\Model\Link;
/**
* A type of Sankey link that supports strings for source and target.
*/
class KeyedLink extends Link {
/**
* The array key of the node to connect from.
*
* @var string
*/
public $source;
/**
* The array key of the node to connect to.
*
* @var string
*/
public $target;
/**
* A number representing how big the link is.
*
* @var numeric
*/
public $value;
/**
* D3SankeyLink constructor.
*
* @param string $source
* The array index of the node to connect from.
* @param string $target
* The array index of the node to connect to.
* @param int|float $value
* A number representing how big the link is.
*/
public function __construct($source, $target, $value = 1) {
$this->source = (string) $source;
$this->target = (string) $target;
$this->value = is_numeric($value) ? $value + 0 : 1;
}
}
<?php
namespace Drupal\d3_sankey_table_group_pp;
use Drupal\d3_sankey\DrupalCoreAdapter;
use Drupal\d3_sankey\Model\Link;
use Drupal\d3_sankey\Model\Node;
use Drupal\d3_sankey\Model\RawSankeyData;
use Drupal\d3_sankey\PreprocessorInterface;
use Drupal\d3_sankey_table_group_pp\Model\KeyedLink;
/**
* A data preprocessor that links values in the same row, grouping duplicates.
*
* This preprocessor considers each cell in the table to be a node, and adds a
* link between each column in the same row. If any two cells can be represented
* by the same string, they are considered to be the same node.
*
* To use an example, it would transform data like...
*
* Account | LoB | Client
* --------|-----|--------
* Revenue | SLA | One
* Revenue | NW | One
* Revenue | SLA | Two
*
* ... into the following list of nodes...
*
* 1. Revenue
* 2. SLA
* 3. One
* 4. NW
* 5. Two
*
* ... and the following list of links...
*
* 1. Revenue -> SLA
* 2. SLA -> One
* 3. Revenue -> NW
* 4. NW -> One
* 5. SLA -> Two
*
* ... which would result in a Sankey diagram (roughly) like...
*
* ```
* Revenue --- NW --- One
* \ /
* -- SLA -- Two
* ```
*/
class TableGroupingPreprocessor implements PreprocessorInterface {
/**
* A wrapper around Drupal core functions.
*
* @var \Drupal\d3_sankey\DrupalCoreAdapter
*/
private $adapter;
/**
* An associative array of nodes that make up this Sankey diagram.
*
* @var \Drupal\d3_sankey\Model\Node[]
*/
private $nodes;
/**
* An associative array of links that make up this Sankey diagram.
*
* @var \Drupal\d3_sankey_table_group_pp\Model\KeyedLink[]
*/
private $links;
/**
* D3SankeyGroupingPreprocessor constructor.
*
* @param \Drupal\d3_sankey\Model\Node[] $nodes
* An associative array of nodes that make up this Sankey diagram.
* @param \Drupal\d3_sankey_table_group_pp\Model\KeyedLink[] $links
* An associative array of links that make up this Sankey diagram.
* @param \Drupal\d3_sankey\DrupalCoreAdapter $adapter
* A wrapper around Drupal core functions.
*/
public function __construct($nodes = array(), $links = array(), DrupalCoreAdapter $adapter = NULL) {
$this->nodes = $nodes;
$this->links = $links;
$this->adapter = ($adapter) ? $adapter : new DrupalCoreAdapter();
}
/**
* Get the associative array of nodes that make up this Sankey diagram.
*
* @return \Drupal\d3_sankey\Model\Node[]
* An associative array of nodes that make up this Sankey diagram.
*/
public function getNodes() {
return $this->nodes;
}
/**
* Get the associative array of links that make up this Sankey diagram.
*
* @return \Drupal\d3_sankey_table_group_pp\Model\KeyedLink[]
* An associative array of links that make up this Sankey diagram.
*/
public function getLinks() {
return $this->links;
}
/**
* Set the associative array of nodes that make up this Sankey diagram.
*
* @param \Drupal\d3_sankey\Model\Node[] $nodes
* An associative array of nodes that make up this Sankey diagram.
*/
public function setNodes($nodes) {
$this->nodes = $nodes;
}
/**
* Set the associative array of links that make up this Sankey diagram.
*
* @param \Drupal\d3_sankey_table_group_pp\Model\KeyedLink[] $links
* An associative array of links that make up this Sankey diagram.
*/
public function setLinks($links) {
$this->links = $links;
}
/**
* Load a row of data into the table.
*
* @param array $row
* A row of data.
*/
public function ingestRow($row) {
$previous_node_key = NULL;
// Ensure this row is an array.
$row = (array) $row;
// Loop through each column in the row...
foreach ($row as $current_node_label) {
$current_node_key = $this->generateNodeKey($current_node_label);
// If the current node doesn't exist in $this->nodes yet, add it to the
// nodes array.
if (!array_key_exists($current_node_key, $this->nodes)) {
$this->nodes[$current_node_key] = new Node($current_node_label, NULL, $this->adapter);
}
// If there is a reference to the previous node in this row (i.e.: if this
// is not the first column in the row), add a link from the previous node
// to the current node.
if (!is_null($previous_node_key)) {
$link_key = $this->generateLinkKey($previous_node_key, $current_node_key);
// If the current link doesn't exist in $this->links yet, add it.
if (!array_key_exists($link_key, $this->links)) {
$this->links[$link_key] = new KeyedLink($previous_node_key, $current_node_key);
}
// If it does exist, increment the weight.
else {
$this->links[$link_key]->value++;
}
}
// Keep track of this row for the next iteration through the loop.
$previous_node_key = $current_node_key;
}
}
/**
* {@inheritdoc}
*/
public function getRawData() {
$nodes = array();
$nodes_mapping = array();
$links = array();
// GroupingPreprocessor uses keys in $this->nodes as an implementation
// detail, so that we can identify duplicates. But we must not pass that
// implementation detail to RawSankeyData.
foreach ($this->nodes as $string_key => $node) {
// Add the node to the output array and get the new size of the array.
$new_size = array_push($nodes, $node);
// The new size will be the index of the element we just added, plus one.
$numeric_key = $new_size - 1;
// Record the mapping between the string key and the numeric key.
$nodes_mapping[$string_key] = $numeric_key;
}
// GroupingPreprocessor uses an associative array of GroupingLinks instead
// of a numeric array of Links as an implementation detail, so we can
// identify links between nodes, and ensure we don't enter duplicates. But
// we must not pass that implementation detail to RawSankeyData.
foreach ($this->links as $link) {
// Identify the numeric indices of the source and target nodes.
// The typecasts here are provided for clarity to the reader, but are not
// strictly necessary.
$source_index = (int) $nodes_mapping[(string) $link->source];
$target_index = (int) $nodes_mapping[(string) $link->target];
// Add a regular Link with the numeric indices along with the current
// link's value.
$links[] = new Link($source_index, $target_index, $link->value);
}
return new RawSankeyData($nodes, $links);
}
/**
* Generate a key for the Nodes array, given a node label.
*
* @param string $node_label
* A label for a node.
*
* @return string
* A key for the array of nodes.
*/
private static function generateNodeKey($node_label) {
// In theory, this function could hash the key; however, for simplicity, it
// currently just uses the label as the key. This could cause problems if
// the node label is a string longer than PHP's max size for array key
// strings (e.g.: if we were trying to use BLOBs as node labels).
return (string) $node_label;
}
/**
* Generate a key for the Links array, given source and target node keys.
*
* @param string $source_key
* A key for the array of nodes.
* @param string $target_key
* A key for the array of nodes.
*
* @return string
* A key for the array of links.
*/
private static function generateLinkKey($source_key, $target_key) {
// In theory, this function could hash the keys; however, for simplicity, it
// currently just joins the two keys with a '---' in the middle. This could
// cause problems if the combined node labels are a string longer than PHP's
// max size for array key strings (e.g.: if we were trying to use BLOBs as
// node labels).
return (string) $source_key . '---' . (string) $target_key;
}
}
<?php
/**
* @file
* Bootstrap PHPUnit tests.
*
* Since PHPUnit doesn't bootstrap Drupal, and we don't have any way of knowing
* where the xautoload module is in relation to this module, we have to manually
* include the files we need.
*/
require_once __DIR__ . '/../../../src/DrupalCoreAdapter.php';
require_once __DIR__ . '/../../../src/PreprocessorInterface.php';
require_once __DIR__ . '/../../../src/Model/Link.php';
require_once __DIR__ . '/../../../src/Model/Node.php';
require_once __DIR__ . '/../../../src/Model/RawSankeyData.php';
require_once __DIR__ . '/../src/Model/KeyedLink.php';
require_once __DIR__ . '/../src/TableGroupingPreprocessor.php';
<?php
namespace Drupal\Tests\D3SankeyGroupingPreprocess;
use Drupal\d3_sankey\DrupalCoreAdapter;
use Drupal\d3_sankey\Model\Link;
use Drupal\d3_sankey\Model\Node;
use Drupal\d3_sankey\Model\RawSankeyData;
use Drupal\d3_sankey_table_group_pp\TableGroupingPreprocessor;
use Drupal\d3_sankey_table_group_pp\Model\KeyedLink;
use PHPUnit\Framework\TestCase;
/**
* Tests for \Drupal\d3_sankey_table_group_pp\GroupingPreprocessor.
*/
class TableGroupingPreprocessorTest extends TestCase {
/**
* A test double for a wrapper around Drupal core functions.
*
* @var \PHPUnit_Framework_MockObject_MockObject
*/
private $mockAdapter;
/**
* {@inheritdoc}
*/
protected function setUp() {
// Create a test double for the wrapper around Drupal core functions.
$this->mockAdapter = $this->createMock(DrupalCoreAdapter::class);
// Mock the drupalHtmlId() method that returns the string it was passed.
$this->mockAdapter->method('drupalHtmlId')->willReturnArgument(0);
}
/**
* Test that the preprocessor will return raw data in the correct format.
*/
public function testGetRawData() {
// Prepare some test data.
// Note that we're specifically _not_ testing the implementation details of
// generateNodeKey() and generateLinkKey() by using keys that would not
// normally be generated by those functions.
$input_nodes = array(
'test0' => new Node('foo', 'baz', $this->mockAdapter),
'test1' => new Node('bar', NULL, $this->mockAdapter),
);
$input_links = array(
'fizz0' => new KeyedLink('test0', 'test1', 314),
);
// Initialize the system under test.
$sut = new TableGroupingPreprocessor(array(), array(), $this->mockAdapter);
$sut->setNodes($input_nodes);
$sut->setLinks($input_links);
// Run the system under test.
$output = $sut->getRawData();
// Ensure the output is what we expect.
$this->assertInstanceOf(RawSankeyData::class, $output,
'The raw output is a RawSankeyData.');
// Transform the output data to make it easier for us to run assertions.
$output_nodes = $output->getNodes();
$output_links = $output->getLinks();
// Ensure the output is what we expect.
$this->assertCount(2, $output_nodes,
'There are 2 nodes in the output because there were 2 nodes in the input.');
$this->assertArrayNotHasKey('test0', $output_nodes,
'The test0 key is not in the node array output by getRawData().');
$this->assertInstanceOf(Node::class, $output_nodes[0],
'The first thing in the node output array is a Node.');
$this->assertEquals('foo', $output_nodes[0]->name,
'The name of the first output node is foo.');
$this->assertEquals('baz', $output_nodes[0]->id,
'The id of the first output node is baz.');
$this->assertArrayNotHasKey('test1', $output_nodes,
'The test1 key is not in the node array output by getRawData().');
$this->assertInstanceOf(Node::class, $output_nodes[1],
'The second thing in the node output array is a Node.');
$this->assertEquals('bar', $output_nodes[1]->name,
'The name of the first output node is bar.');
$this->assertCount(1, $output_links,
'There is 1 link in the output because there was 1 link in the input.');
$this->assertArrayNotHasKey('fizz0', $output_links,
'The fizz0 key is not in the link array output by getRawData().');
$this->assertInstanceOf(Link::class, $output_links[0],
'The first thing in the output array of links is a Link or one of its sub-types..');
$this->assertNotInstanceOf(KeyedLink::class, $output_links[0],
'The first thing in the output array of links is not a GroupingLink.');
$this->assertEquals(0, $output_links[0]->source,
'The source of the first output link is 0.');
$this->assertEquals(1, $output_links[0]->target,
'The target of the first output link is 1.');
$this->assertEquals(314, $output_links[0]->value,
'The value of the first output link is 314.');
}
/**
* Test that the preprocessor will ingest rows of data properly.
*/
public function testIngestRow() {
// Initialize the system under test.
$sut = new TableGroupingPreprocessor(array(), array(), $this->mockAdapter);
// Prepare some test data.
$input_data_0 = array(
array('Revenue', 'Service level agreements', 'Client 1'),
array('Revenue', 'New work', 'Client 1'),
array('Revenue', 'Tech support', 'Client 1'),
);
// Run the system under test.
foreach ($input_data_0 as $row) {
$sut->ingestRow($row);
}
// Ensure the output is what we expect.
$this->assertCount(5, $sut->getNodes(),
'There are 5 unique nodes, because there were 5 unique strings in the initial ingested data.');
$this->assertCount(6, $sut->getLinks(),
'There are 6 unique links, because there were 6 unique connections in the initial ingested data.');
// Prepare some additional test data.
$input_data_1 = array(
array('Revenue', 'Service level agreements', 'Client 2'),
array('Revenue', 'New work', 'Client 2'),
array('Revenue', 'Tech support', 'Client 2'),
);
// Run the system under test again.
foreach ($input_data_1 as $row) {
$sut->ingestRow($row);
}
// Ensure the new output is what we expect.
$this->assertCount(6, $sut->getNodes(),
'There are 6 unique nodes, because there were 6 unique strings in the combined ingested data.');
$this->assertCount(9, $sut->getLinks(),
'There are 9 unique links, because there were 9 unique connections in the combined ingested data.');
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment