Skip to content
Snippets Groups Projects
Commit b21cd3e0 authored by Alex Barth's avatar Alex Barth
Browse files

#759904 lyricnz: Provide a Google Sitemap Parser.

parent 5a6d3e6e
No related branches found
No related tags found
No related merge requests found
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
Feeds 6.x 1.0 xxxxxxxxxxxxxxxxxxxx Feeds 6.x 1.0 xxxxxxxxxxxxxxxxxxxx
---------------------------------- ----------------------------------
- #759904 lyricnz: Provide a Google Sitemap Parser.
- #774858 rjbrown99: Fix Node Processor updates node "created" time when - #774858 rjbrown99: Fix Node Processor updates node "created" time when
updating. updating.
- #704236 jerdavis: Support mapping to CCK float field. - #704236 jerdavis: Support mapping to CCK float field.
......
...@@ -120,6 +120,16 @@ function _feeds_feeds_plugins() { ...@@ -120,6 +120,16 @@ function _feeds_feeds_plugins() {
), ),
); );
} }
$info['FeedsSitemapParser'] = array(
'name' => 'Sitemap parser',
'description' => 'Parse Sitemap XML format feeds.',
'handler' => array(
'parent' => 'FeedsParser',
'class' => 'FeedsSitemapParser',
'file' => 'FeedsSitemapParser.inc',
'path' => $path,
),
);
$info['FeedsNodeProcessor'] = array( $info['FeedsNodeProcessor'] = array(
'name' => 'Node processor', 'name' => 'Node processor',
'description' => 'Create nodes.', 'description' => 'Create nodes.',
......
<?php
// $Id$
/**
* A parser for the Sitemap specification http://www.sitemaps.org/protocol.php
*/
class FeedsSitemapParser extends FeedsParser {
/**
* Implementation of FeedsParser::parse().
*/
public function parse(FeedsImportBatch $batch, FeedsSource $source) {
// Yes, using a DOM parser is a bit inefficient, but will do for now
$xml = new SimpleXMLElement($batch->getRaw());
foreach ($xml->url as $url) {
$item = array('url' => (string) $url->loc);
if ($url->lastmod) {
$item['lastmod'] = strtotime($url->lastmod);
}
if ($url->changefreq) {
$item['changefreq'] = $url->changefreq;
}
if ($url->priority) {
$item['priority'] = $url->priority;
}
$batch->addItem($item);
}
}
/**
* Implementation of FeedsParser::getMappingSources().
*/
public function getMappingSources() {
return array(
'url' => array(
'name' => t('Item URL (link)'),
'description' => t('URL of the feed item.'),
),
'lastmod' => array(
'name' => t('Last modification date'),
'description' => t('Last modified date as UNIX time GMT of the feed item.'),
),
'changefreq' => array(
'name' => t('Change frequency'),
'description' => t('How frequently the page is likely to change.'),
),
'priority' => array(
'name' => t('Priority'),
'description' => t('The priority of this URL relative to other URLs on the site.'),
),
);
}
}
...@@ -841,3 +841,134 @@ class FeedsSyndicationParserTestCase extends FeedsWebTestCase { ...@@ -841,3 +841,134 @@ class FeedsSyndicationParserTestCase extends FeedsWebTestCase {
); );
} }
} }
/**
* Test Sitemap parser.
*/
class FeedsSitemapParserTestCase extends FeedsWebTestCase {
/**
* Describe this test.
*/
public function getInfo() {
return array(
'name' => t('Sitemap parser'),
'description' => t('Regression tests for Sitemap XML format parser.'),
'group' => t('Feeds'),
);
}
/**
* Set up test.
*/
public function setUp() {
parent::setUp('feeds', 'feeds_ui', 'ctools');
$this->drupalLogin(
$this->drupalCreateUser(
array(
'administer feeds', 'administer nodes',
)
)
);
}
/**
* Run tests.
*/
public function test() {
$this->createFeedConfiguration('Sitemap', 'sitemap');
$this->setPlugin('sitemap', 'FeedsSitemapParser');
$this->addMappings('sitemap',
array(
array(
'source' => 'changefreq',
'target' => 'title',
'unique' => FALSE,
),
array(
'source' => 'priority',
'target' => 'body',
'unique' => FALSE,
),
array(
'source' => 'lastmod',
'target' => 'created',
'unique' => FALSE,
),
array(
'source' => 'url',
'target' => 'url',
'unique' => TRUE,
),
array(
'source' => 'url',
'target' => 'guid',
'unique' => TRUE,
),
)
);
$path = $GLOBALS['base_url'] .'/'. drupal_get_path('module', 'feeds') .'/tests/feeds/';
$nid = $this->createFeedNode('sitemap', $path .'sitemap-example.xml', 'Testing Sitemap Parser');
$this->assertText('Created 5 Story nodes.');
// Assert DB status.
$count = db_result(db_query("SELECT COUNT(*) FROM {feeds_node_item}"));
$this->assertEqual($count, 5, 'Accurate number of items in database.');
// Check items against known content of feed.
$result = db_query('SELECT * FROM {feeds_node_item} WHERE feed_nid = %d ORDER BY nid', $nid);
// Check first item.
$item = db_fetch_object($result);
$node = node_load($item->nid);
$this->assertEqual($node->title, 'monthly', 'Feed item 1 changefreq is correct.');
$this->assertEqual($node->body, '0.8', 'Feed item 1 priority is correct.');
$this->assertEqual($node->created, strtotime('2005-01-01'), 'Feed item 1 lastmod is correct.');
$this->assertEqual($node->feeds_node_item->url, 'http://www.example.com/', 'Feed item 1 url is correct.');
$this->assertEqual($node->feeds_node_item->url, $node->feeds_node_item->guid, 'Feed item 1 guid is correct.');
// Check second item.
$item = db_fetch_object($result);
$node = node_load($item->nid);
$this->assertEqual($node->title, 'weekly', 'Feed item 2 changefreq is correct.');
$this->assertEqual($node->body, '', 'Feed item 2 priority is correct.');
// $node->created is... recently
$this->assertEqual($node->feeds_node_item->url, 'http://www.example.com/catalog?item=12&desc=vacation_hawaii', 'Feed item 2 url is correct.');
$this->assertEqual($node->feeds_node_item->url, $node->feeds_node_item->guid, 'Feed item 2 guid is correct.');
// Check third item.
$item = db_fetch_object($result);
$node = node_load($item->nid);
$this->assertEqual($node->title, 'weekly', 'Feed item 3 changefreq is correct.');
$this->assertEqual($node->body, '', 'Feed item 3 priority is correct.');
$this->assertEqual($node->created, strtotime('2004-12-23'), 'Feed item 3 lastmod is correct.');
$this->assertEqual($node->feeds_node_item->url, 'http://www.example.com/catalog?item=73&desc=vacation_new_zealand', 'Feed item 3 url is correct.');
$this->assertEqual($node->feeds_node_item->url, $node->feeds_node_item->guid, 'Feed item 3 guid is correct.');
// Check fourth item.
$item = db_fetch_object($result);
$node = node_load($item->nid);
$this->assertEqual($node->title, '', 'Feed item 4 changefreq is correct.');
$this->assertEqual($node->body, '0.3', 'Feed item 4 priority is correct.');
$this->assertEqual($node->created, strtotime('2004-12-23T18:00:15+00:00'), 'Feed item 4 lastmod is correct.');
$this->assertEqual($node->feeds_node_item->url, 'http://www.example.com/catalog?item=74&desc=vacation_newfoundland', 'Feed item 4 url is correct.');
$this->assertEqual($node->feeds_node_item->url, $node->feeds_node_item->guid, 'Feed item 1 guid is correct.');
// Check fifth item.
$item = db_fetch_object($result);
$node = node_load($item->nid);
$this->assertEqual($node->title, '', 'Feed item 5 changefreq is correct.');
$this->assertEqual($node->body, '', 'Feed item 5 priority is correct.');
$this->assertEqual($node->created, strtotime('2004-11-23'), 'Feed item 5 lastmod is correct.');
$this->assertEqual($node->feeds_node_item->url, 'http://www.example.com/catalog?item=83&desc=vacation_usa', 'Feed item 5 url is correct.');
$this->assertEqual($node->feeds_node_item->url, $node->feeds_node_item->guid, 'Feed item 5 guid is correct.');
// Check for more items.
$item = db_fetch_object($result);
$this->assertFalse($item, 'Correct number of feed items recorded.');
}
}
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>http://www.example.com/</loc>
<lastmod>2005-01-01</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>http://www.example.com/catalog?item=12&amp;desc=vacation_hawaii</loc>
<changefreq>weekly</changefreq>
</url>
<url>
<loc>http://www.example.com/catalog?item=73&amp;desc=vacation_new_zealand</loc>
<lastmod>2004-12-23</lastmod>
<changefreq>weekly</changefreq>
</url>
<url>
<loc>http://www.example.com/catalog?item=74&amp;desc=vacation_newfoundland</loc>
<lastmod>2004-12-23T18:00:15+00:00</lastmod>
<priority>0.3</priority>
</url>
<url>
<loc>http://www.example.com/catalog?item=83&amp;desc=vacation_usa</loc>
<lastmod>2004-11-23</lastmod>
</url>
</urlset>
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment