diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 7fc6f6c8203be9d4e4ab210e497f978e61f6eedf..b90a628be7f3cd4dfdc4912f97f7296fdb3a720c 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -3,6 +3,7 @@ Feeds 6.x 1.0 xxxxxxxxxxxxxxxxxxxx ---------------------------------- +- #759904 lyricnz: Provide a Google Sitemap Parser. - #774858 rjbrown99: Fix Node Processor updates node "created" time when updating. - #704236 jerdavis: Support mapping to CCK float field. diff --git a/feeds.plugins.inc b/feeds.plugins.inc index e1e7e4975608510b007e5f5e09d79e64e3f6ae59..6aa87d01a5d73c203c82ab4a6501438d6809be67 100644 --- a/feeds.plugins.inc +++ b/feeds.plugins.inc @@ -120,6 +120,16 @@ function _feeds_feeds_plugins() { ), ); } + $info['FeedsSitemapParser'] = array( + 'name' => 'Sitemap parser', + 'description' => 'Parse Sitemap XML format feeds.', + 'handler' => array( + 'parent' => 'FeedsParser', + 'class' => 'FeedsSitemapParser', + 'file' => 'FeedsSitemapParser.inc', + 'path' => $path, + ), + ); $info['FeedsNodeProcessor'] = array( 'name' => 'Node processor', 'description' => 'Create nodes.', diff --git a/plugins/FeedsSitemapParser.inc b/plugins/FeedsSitemapParser.inc new file mode 100644 index 0000000000000000000000000000000000000000..b0180a57837c6783d91eaa0f1f567941333a59e9 --- /dev/null +++ b/plugins/FeedsSitemapParser.inc @@ -0,0 +1,52 @@ +<?php +// $Id$ + +/** + * A parser for the Sitemap specification http://www.sitemaps.org/protocol.php + */ +class FeedsSitemapParser extends FeedsParser { + /** + * Implementation of FeedsParser::parse(). + */ + public function parse(FeedsImportBatch $batch, FeedsSource $source) { + // Yes, using a DOM parser is a bit inefficient, but will do for now + $xml = new SimpleXMLElement($batch->getRaw()); + foreach ($xml->url as $url) { + $item = array('url' => (string) $url->loc); + if ($url->lastmod) { + $item['lastmod'] = strtotime($url->lastmod); + } + if ($url->changefreq) { + $item['changefreq'] = $url->changefreq; + } + if ($url->priority) { + $item['priority'] = $url->priority; + } + $batch->addItem($item); + } + } + + /** + * Implementation of FeedsParser::getMappingSources(). + */ + public function getMappingSources() { + return array( + 'url' => array( + 'name' => t('Item URL (link)'), + 'description' => t('URL of the feed item.'), + ), + 'lastmod' => array( + 'name' => t('Last modification date'), + 'description' => t('Last modified date as UNIX time GMT of the feed item.'), + ), + 'changefreq' => array( + 'name' => t('Change frequency'), + 'description' => t('How frequently the page is likely to change.'), + ), + 'priority' => array( + 'name' => t('Priority'), + 'description' => t('The priority of this URL relative to other URLs on the site.'), + ), + ); + } +} diff --git a/tests/feeds.test b/tests/feeds.test index 269bb58696b72c8721ad6f38ebafe62568743202..68a77a8e25afb6ed041b5151812483d301e074b7 100644 --- a/tests/feeds.test +++ b/tests/feeds.test @@ -841,3 +841,134 @@ class FeedsSyndicationParserTestCase extends FeedsWebTestCase { ); } } + +/** + * Test Sitemap parser. + */ +class FeedsSitemapParserTestCase extends FeedsWebTestCase { + + /** + * Describe this test. + */ + public function getInfo() { + return array( + 'name' => t('Sitemap parser'), + 'description' => t('Regression tests for Sitemap XML format parser.'), + 'group' => t('Feeds'), + ); + } + + /** + * Set up test. + */ + public function setUp() { + parent::setUp('feeds', 'feeds_ui', 'ctools'); + + $this->drupalLogin( + $this->drupalCreateUser( + array( + 'administer feeds', 'administer nodes', + ) + ) + ); + } + + /** + * Run tests. + */ + public function test() { + $this->createFeedConfiguration('Sitemap', 'sitemap'); + $this->setPlugin('sitemap', 'FeedsSitemapParser'); + + $this->addMappings('sitemap', + array( + array( + 'source' => 'changefreq', + 'target' => 'title', + 'unique' => FALSE, + ), + array( + 'source' => 'priority', + 'target' => 'body', + 'unique' => FALSE, + ), + array( + 'source' => 'lastmod', + 'target' => 'created', + 'unique' => FALSE, + ), + array( + 'source' => 'url', + 'target' => 'url', + 'unique' => TRUE, + ), + array( + 'source' => 'url', + 'target' => 'guid', + 'unique' => TRUE, + ), + ) + ); + + + $path = $GLOBALS['base_url'] .'/'. drupal_get_path('module', 'feeds') .'/tests/feeds/'; + $nid = $this->createFeedNode('sitemap', $path .'sitemap-example.xml', 'Testing Sitemap Parser'); + $this->assertText('Created 5 Story nodes.'); + + // Assert DB status. + $count = db_result(db_query("SELECT COUNT(*) FROM {feeds_node_item}")); + $this->assertEqual($count, 5, 'Accurate number of items in database.'); + + // Check items against known content of feed. + $result = db_query('SELECT * FROM {feeds_node_item} WHERE feed_nid = %d ORDER BY nid', $nid); + + // Check first item. + $item = db_fetch_object($result); + $node = node_load($item->nid); + $this->assertEqual($node->title, 'monthly', 'Feed item 1 changefreq is correct.'); + $this->assertEqual($node->body, '0.8', 'Feed item 1 priority is correct.'); + $this->assertEqual($node->created, strtotime('2005-01-01'), 'Feed item 1 lastmod is correct.'); + $this->assertEqual($node->feeds_node_item->url, 'http://www.example.com/', 'Feed item 1 url is correct.'); + $this->assertEqual($node->feeds_node_item->url, $node->feeds_node_item->guid, 'Feed item 1 guid is correct.'); + + // Check second item. + $item = db_fetch_object($result); + $node = node_load($item->nid); + $this->assertEqual($node->title, 'weekly', 'Feed item 2 changefreq is correct.'); + $this->assertEqual($node->body, '', 'Feed item 2 priority is correct.'); + // $node->created is... recently + $this->assertEqual($node->feeds_node_item->url, 'http://www.example.com/catalog?item=12&desc=vacation_hawaii', 'Feed item 2 url is correct.'); + $this->assertEqual($node->feeds_node_item->url, $node->feeds_node_item->guid, 'Feed item 2 guid is correct.'); + + // Check third item. + $item = db_fetch_object($result); + $node = node_load($item->nid); + $this->assertEqual($node->title, 'weekly', 'Feed item 3 changefreq is correct.'); + $this->assertEqual($node->body, '', 'Feed item 3 priority is correct.'); + $this->assertEqual($node->created, strtotime('2004-12-23'), 'Feed item 3 lastmod is correct.'); + $this->assertEqual($node->feeds_node_item->url, 'http://www.example.com/catalog?item=73&desc=vacation_new_zealand', 'Feed item 3 url is correct.'); + $this->assertEqual($node->feeds_node_item->url, $node->feeds_node_item->guid, 'Feed item 3 guid is correct.'); + + // Check fourth item. + $item = db_fetch_object($result); + $node = node_load($item->nid); + $this->assertEqual($node->title, '', 'Feed item 4 changefreq is correct.'); + $this->assertEqual($node->body, '0.3', 'Feed item 4 priority is correct.'); + $this->assertEqual($node->created, strtotime('2004-12-23T18:00:15+00:00'), 'Feed item 4 lastmod is correct.'); + $this->assertEqual($node->feeds_node_item->url, 'http://www.example.com/catalog?item=74&desc=vacation_newfoundland', 'Feed item 4 url is correct.'); + $this->assertEqual($node->feeds_node_item->url, $node->feeds_node_item->guid, 'Feed item 1 guid is correct.'); + + // Check fifth item. + $item = db_fetch_object($result); + $node = node_load($item->nid); + $this->assertEqual($node->title, '', 'Feed item 5 changefreq is correct.'); + $this->assertEqual($node->body, '', 'Feed item 5 priority is correct.'); + $this->assertEqual($node->created, strtotime('2004-11-23'), 'Feed item 5 lastmod is correct.'); + $this->assertEqual($node->feeds_node_item->url, 'http://www.example.com/catalog?item=83&desc=vacation_usa', 'Feed item 5 url is correct.'); + $this->assertEqual($node->feeds_node_item->url, $node->feeds_node_item->guid, 'Feed item 5 guid is correct.'); + + // Check for more items. + $item = db_fetch_object($result); + $this->assertFalse($item, 'Correct number of feed items recorded.'); + } +} diff --git a/tests/feeds/sitemap-example.xml b/tests/feeds/sitemap-example.xml new file mode 100644 index 0000000000000000000000000000000000000000..7695060cc4e47c3a31a6e8f472ea150ec2959197 --- /dev/null +++ b/tests/feeds/sitemap-example.xml @@ -0,0 +1,27 @@ +<?xml version="1.0" encoding="UTF-8"?> +<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> + <url> + <loc>http://www.example.com/</loc> + <lastmod>2005-01-01</lastmod> + <changefreq>monthly</changefreq> + <priority>0.8</priority> + </url> + <url> + <loc>http://www.example.com/catalog?item=12&desc=vacation_hawaii</loc> + <changefreq>weekly</changefreq> + </url> + <url> + <loc>http://www.example.com/catalog?item=73&desc=vacation_new_zealand</loc> + <lastmod>2004-12-23</lastmod> + <changefreq>weekly</changefreq> + </url> + <url> + <loc>http://www.example.com/catalog?item=74&desc=vacation_newfoundland</loc> + <lastmod>2004-12-23T18:00:15+00:00</lastmod> + <priority>0.3</priority> + </url> + <url> + <loc>http://www.example.com/catalog?item=83&desc=vacation_usa</loc> + <lastmod>2004-11-23</lastmod> + </url> +</urlset>