From d12e5a2f85fb76230ccbacc1da2649fc548dd4dc Mon Sep 17 00:00:00 2001 From: Alex Barth <alex_b@53995.no-reply.drupal.org> Date: Sun, 20 Dec 2009 23:27:28 +0000 Subject: [PATCH] #641522 mongolito404, alex_b: Consolidate import stage results. --- CHANGELOG.txt | 1 + feeds.module | 8 +- includes/FeedsBatch.inc | 147 +++++++++++++++++++++++++++++ includes/FeedsImporter.inc | 52 +--------- includes/FeedsSource.inc | 7 +- plugins/FeedsCSVParser.inc | 19 ++-- plugins/FeedsDataProcessor.inc | 4 +- plugins/FeedsFeedNodeProcessor.inc | 4 +- plugins/FeedsFetcher.inc | 11 --- plugins/FeedsFileFetcher.inc | 2 +- plugins/FeedsHTTPFetcher.inc | 14 +-- plugins/FeedsNodeProcessor.inc | 4 +- plugins/FeedsOPMLParser.inc | 14 +-- plugins/FeedsParser.inc | 23 +---- plugins/FeedsProcessor.inc | 8 +- plugins/FeedsSimplePieParser.inc | 23 ++--- plugins/FeedsSyndicationParser.inc | 16 ++-- plugins/FeedsTermProcessor.inc | 4 +- plugins/FeedsUserProcessor.inc | 4 +- 19 files changed, 201 insertions(+), 164 deletions(-) create mode 100644 includes/FeedsBatch.inc diff --git a/CHANGELOG.txt b/CHANGELOG.txt index f57d70af..e182f7e8 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -3,6 +3,7 @@ Feeds 6.x 1.0 XXXXXXX, 20XX-XX-XX --------------------------------- +- #641522 mongolito404, alex_b: Consolidate import stage results. - #662104 Aron Novak: Specify PHP requirement in .info file. - #657374 dtomasch: Common Parser does not get RSS Authors correctly. diff --git a/feeds.module b/feeds.module index f23bf422..06fcda67 100644 --- a/feeds.module +++ b/feeds.module @@ -254,16 +254,16 @@ function feeds_nodeapi(&$node, $op, $form) { try { $source->addConfig($node->feeds); // @todo: Too many indirections. Clean up. - $result = $source->importer->fetcher->fetch($source); - $result = $source->importer->parser->parse($result, $source); - if (!isset($result->value['title']) || trim($result->value['title']) == '') { + $feed = $source->importer->fetcher->fetch($source); + $source->importer->parser->parse($feed, $source); + if (!$feed->getTitle()) { form_set_error('title', t('Could not retrieve title from feed.'), 'error'); } else { // Keep the title in a static cache and populate $node->title on // 'presave' as node module looses any changes to $node after // 'validate'. - $last_title = $result->value['title']; + $last_title = $feed->getTitle(); } } catch (Exception $e) { diff --git a/includes/FeedsBatch.inc b/includes/FeedsBatch.inc new file mode 100644 index 00000000..ae5595a2 --- /dev/null +++ b/includes/FeedsBatch.inc @@ -0,0 +1,147 @@ +<?php +// $Id$ + +/** + * A FeedsImportBatch is the actual content retrieved from a FeedsSource. On + * import, it is created on the fetching stage and passed through the parsing + * and processing stage where it is normalized and consumed. + * + * @see FeedsSource class + * @see FeedsFetcher class + */ +class FeedsImportBatch { + + protected $url; + protected $file_path; + protected $raw; + protected $items; + protected $link; + + /** + * Constructor. + * + * Either $url or $file_path must be given. + */ + public function __construct($url = NULL, $file_path = NULL) { + $this->url = $url; + $this->file_path = $file_path; + $this->items = array(); + } + + /** + * @return + * The raw content of the feed. + */ + public function getRaw() { + if (empty($this->raw)) { + // Prefer file. + if ($this->file_path) { + $this->raw = file_get_contents(realpath($this->file_path)); + } + elseif ($this->url) { + feeds_include_library('http_request.inc', 'http_request'); + $result = http_request_get($this->url); + if ($result->code != 200) { + throw new Exception(t('Download of @url failed with code !code.', array('@url' => $url, '!code' => $result->code))); + } + $this->raw = $result->data; + } + } + return $this->raw; + } + + /** + * @return + * Path to the feed. This path is relative to Drupal's root directory. + * If the feed is not local, getFilePath downloads it to file directory. + */ + public function getFilePath() { + if (!isset($this->file_path)) { + $dest = file_destination(file_directory_path() .'/feeds/'. get_class($this) .'_'. md5($this->url) .'_'. time(), FILE_EXISTS_RENAME); + $this->file_path = file_save_data($this->getRaw(), $dest); + if($this->file_path === 0) { + throw new Exception(t('Cannot write content to %dest', array('%dest' => $dest))); + } + } + return $this->file_path; + } + + /** + * @return + * URL to the document. + */ + public function getURL() { + if (!isset($this->url) && isset($this->file)) { + return $_GLOBALS['base_url'] .'/'. $this->file; + } + } + + /** + * @return + * A string that is the feed's title. + */ + public function getTitle() { + return $this->title; + } + + /** + * @return + * A string that is the feed's description. + */ + public function getDescription() { + return $this->description; + } + + /** + * @return + * A string that is the link to the feed's site (not the actual URL of the + * feed). Falls back to URL if not available. + */ + public function getLink() { + return isset($this->link) ? $this->link : $this->getURL(); + } + + /** + * @return + * Next available item or NULL if there is none. Every returned item is + * removed from the internal array. + */ + public function shiftItem() { + return array_shift($this->items); + } + + /** + * Set title. + */ + public function setTitle($title) { + $this->title = $title; + } + + /** + * Set description. + */ + public function setDescription($description) { + $this->description = $description; + } + + /** + * Set link. + */ + public function setLink($link) { + $this->link = $link; + } + + /** + * Set items. + */ + public function setItems($items) { + $this->items = $items; + } + + /** + * Add an item. + */ + public function addItem($item) { + $this->items[] = $item; + } +} diff --git a/includes/FeedsImporter.inc b/includes/FeedsImporter.inc index 4ad488f9..a59f951c 100644 --- a/includes/FeedsImporter.inc +++ b/includes/FeedsImporter.inc @@ -9,57 +9,7 @@ // Including FeedsImporter.inc automatically includes dependencies. require_once(dirname(__FILE__) .'/FeedsConfigurable.inc'); require_once(dirname(__FILE__) .'/FeedsSource.inc'); - -/** - * A Feeds result class. - * - * @see class FeedsFetcherResult - * @see class FeedsParserResult - */ -abstract class FeedsResult { - - // An array of valid values for $type. - protected $valid_types = array(); - // The type of this result. - protected $type; - // The value of this result. - protected $value; - - /** - * Constructor: create object, validate class variables. - * - * @param $value - * The value of this result. - * @param $type - * The type of this result. Must be one of $valid_types. - */ - public function __construct($value, $type) { - $this->__set('type', $type); - $this->__set('value', $value); - } - - /** - * Control access to class variables. - */ - public function __set($name, $value) { - if ($name == 'valid_types') { - throw new Exception(t('Cannot write FeedsResult::valid_types.')); - } - if ($name == 'type') { - if (!in_array($value, $this->valid_types)) { - throw new Exception(t('Invalid type "!type"', array('!type' => $value))); - } - } - $this->$name = $value; - } - - /** - * Control access to class variables. - */ - public function __get($name) { - return $this->$name; - } -} +require_once(dirname(__FILE__) .'/FeedsBatch.inc'); /** * Class defining an importer object. This is the main hub for Feeds module's diff --git a/includes/FeedsSource.inc b/includes/FeedsSource.inc index 5d2ef4ab..e43b9c3f 100644 --- a/includes/FeedsSource.inc +++ b/includes/FeedsSource.inc @@ -107,9 +107,10 @@ class FeedsSource extends FeedsConfigurable { */ public function import() { try { - $result = $this->importer->fetcher->fetch($this); - $result = $this->importer->parser->parse($result, $this); - $this->importer->processor->process($result, $this); + $feed = $this->importer->fetcher->fetch($this); + $this->importer->parser->parse($feed, $this); + $this->importer->processor->process($feed, $this); + unset($feed); } catch (Exception $e) { drupal_set_message($e->getMessage(), 'error'); diff --git a/plugins/FeedsCSVParser.inc b/plugins/FeedsCSVParser.inc index d18b19b6..40e299c6 100644 --- a/plugins/FeedsCSVParser.inc +++ b/plugins/FeedsCSVParser.inc @@ -7,20 +7,13 @@ class FeedsCSVParser extends FeedsParser { /** - * Parses a raw string and returns a Feed object from it. + * Parses a file in CSV format. */ - public function parse(FeedsFetcherResult $fetcherResult, FeedsSource $source) { - feeds_include_library('ParserCSV.inc', 'ParserCSV'); - - if ($fetcherResult->type == 'text/filepath') { - $iterator = new ParserCSVIterator(realpath($fetcherResult->value)); - } - // @todo: write string buffer iterator. - else { - throw new Exception(t('You must use CSV Parser with File Fetcher.')); - } + public function parse(FeedsImportBatch $batch, FeedsSource $source) { // Parse. + feeds_include_library('ParserCSV.inc', 'ParserCSV'); + $iterator = new ParserCSVIterator(realpath($batch->getFilePath())); $source_config = $source->getConfigFor($this); $parser = new ParserCSV(); $parser->setDelimiter($source_config['delimiter']); @@ -44,8 +37,8 @@ class FeedsCSVParser extends FeedsParser { } unset($rows); - // Return result. - return new FeedsParserResult(array('items' => $result_rows)); + // Populate batch. + $batch->setItems($result_rows); } /** diff --git a/plugins/FeedsDataProcessor.inc b/plugins/FeedsDataProcessor.inc index ac7c3d7f..7292be82 100644 --- a/plugins/FeedsDataProcessor.inc +++ b/plugins/FeedsDataProcessor.inc @@ -14,12 +14,12 @@ class FeedsDataProcessor extends FeedsProcessor { /** * Implementation of FeedsProcessor::process(). */ - public function process(FeedsParserResult $parserResult, FeedsSource $source) { + public function process(FeedsImportBatch $batch, FeedsSource $source) { // Count number of created and updated nodes. $inserted = $updated = 0; - foreach ($parserResult->value['items'] as $item) { + while ($item = $batch->shiftItem()) { if (!($id = $this->existingItemId($item, $source)) || $this->config['update_existing']) { // Map item to a data record, feed_nid and timestamp are mandatory. $data = array(); diff --git a/plugins/FeedsFeedNodeProcessor.inc b/plugins/FeedsFeedNodeProcessor.inc index 29c00885..11d5d385 100644 --- a/plugins/FeedsFeedNodeProcessor.inc +++ b/plugins/FeedsFeedNodeProcessor.inc @@ -15,12 +15,12 @@ class FeedsFeedNodeProcessor extends FeedsProcessor { /** * Implementation of FeedsProcessor::process(). */ - public function process(FeedsParserResult $parserResult, FeedsSource $source) { + public function process(FeedsImportBatch $batch, FeedsSource $source) { // Count number of created and updated nodes. $created = $updated = 0; - foreach ($parserResult->value['items'] as $item) { + while ($item = $batch->shiftItem()) { // If the target item does not exist OR if update_existing is enabled, // map and save. diff --git a/plugins/FeedsFetcher.inc b/plugins/FeedsFetcher.inc index a8381a16..2bcfe67e 100644 --- a/plugins/FeedsFetcher.inc +++ b/plugins/FeedsFetcher.inc @@ -1,17 +1,6 @@ <?php // $Id$ -/** - * Defines the object a Fetcher returns on fetch(). - */ -class FeedsFetcherResult extends FeedsResult { - // Define valid types. - // @todo: does text/filepath make sense? - // @todo: If convenient, we could expand on this concept and build content - // type negotiation between Fetchers and Parsers. - protected $valid_types = array('text/filepath', 'text/xml'); -} - /** * Abstract class, defines shared functionality between fetchers. * diff --git a/plugins/FeedsFileFetcher.inc b/plugins/FeedsFileFetcher.inc index fda66e26..b8d8558b 100644 --- a/plugins/FeedsFileFetcher.inc +++ b/plugins/FeedsFileFetcher.inc @@ -18,7 +18,7 @@ class FeedsFileFetcher extends FeedsFetcher { $source_config = $source->getConfigFor($this); // Just return path to file, contents can be read easily with // file_get_contents($file_path); - return new FeedsFetcherResult($source_config['source'], 'text/filepath'); + return new FeedsImportBatch(NULL, $source_config['source']); } /** diff --git a/plugins/FeedsHTTPFetcher.inc b/plugins/FeedsHTTPFetcher.inc index 11ae30ea..b357d3aa 100644 --- a/plugins/FeedsHTTPFetcher.inc +++ b/plugins/FeedsHTTPFetcher.inc @@ -22,19 +22,7 @@ class FeedsHTTPFetcher extends FeedsFetcher { */ public function fetch(FeedsSource $source) { $source_config = $source->getConfigFor($this); - $url = $source_config['source']; - - feeds_include_library('http_request.inc', 'http_request'); - if ($this->config['auto_detect_feeds']) { - $result = http_request_get_common_syndication($url); - } - else { - $result = http_request_get($url); - } - if ($result->code != 200) { - throw new Exception(t('Download of @url failed with code !code.', array('@url' => $url, '!code' => $result->code))); - } - return new FeedsFetcherResult($result->data, 'text/xml'); + return new FeedsImportBatch($source_config['source']); } /** diff --git a/plugins/FeedsNodeProcessor.inc b/plugins/FeedsNodeProcessor.inc index 37e62b47..efe2cc60 100644 --- a/plugins/FeedsNodeProcessor.inc +++ b/plugins/FeedsNodeProcessor.inc @@ -14,12 +14,12 @@ class FeedsNodeProcessor extends FeedsProcessor { /** * Implementation of FeedsProcessor::process(). */ - public function process(FeedsParserResult $parserResult, FeedsSource $source) { + public function process(FeedsImportBatch $batch, FeedsSource $source) { // Count number of created and updated nodes. $created = $updated = 0; - foreach ($parserResult->value['items'] as $item) { + while ($item = $batch->shiftItem()) { // Create/update if item does not exist or update existing is enabled. if (!($nid = $this->existingItemId($item, $source)) || $this->config['update_existing']) { diff --git a/plugins/FeedsOPMLParser.inc b/plugins/FeedsOPMLParser.inc index b525f125..e581932b 100644 --- a/plugins/FeedsOPMLParser.inc +++ b/plugins/FeedsOPMLParser.inc @@ -12,17 +12,13 @@ class FeedsOPMLParser extends FeedsParser { /** - * Parses a raw string and returns a Feed object from it. + * Parses an RSS/Atom formatted string. */ - public function parse(FeedsFetcherResult $fetcherResult, FeedsSource $source) { - if ($fetcherResult->type == 'text/filepath') { - $string = file_get_contents($fetcherResult->value); - } - else { - $string = $fetcherResult->value; - } + public function parse(FeedsImportBatch $batch, FeedsSource $source) { feeds_include_library('opml_parser.inc', 'opml_parser'); - return new FeedsParserResult(opml_parser_parse($string), 'syndication'); + $result = opml_parser_parse($batch->getRaw()); + $batch->setTitle($result['title']); + $batch->setItems($result['items']); } /** diff --git a/plugins/FeedsParser.inc b/plugins/FeedsParser.inc index cd4308ff..2a8e1000 100644 --- a/plugins/FeedsParser.inc +++ b/plugins/FeedsParser.inc @@ -1,23 +1,6 @@ <?php // $Id$ -/** - * Defines the object a Parser returns on parser(). - */ -class FeedsParserResult extends FeedsResult { - // Define valid types. - // @todo: does this distinction make sense? We may be able to run with - // 'simple' and no special case for 'syndication'. - protected $valid_types = array('simple', 'syndication'); - - /** - * Override constructor to define a default type. - */ - public function __construct($value, $type = 'simple') { - parent::__construct($value, $type); - } -} - /** * Abstract class, defines interface for parsers. * @@ -30,8 +13,8 @@ abstract class FeedsParser extends FeedsPlugin { * * Stub method. Extending classes must implement this method. * - * @param $fetcherResult - * Content returned by fetcher. + * @param $batch + * FeedsImportBatch returned by fetcher. * @param FeedsSource $source * Source information. * @return @@ -40,7 +23,7 @@ abstract class FeedsParser extends FeedsPlugin { * * @todo: Should it be execute() ? */ - public abstract function parse(FeedsFetcherResult $fetcherResult, FeedsSource $source); + public abstract function parse(FeedsImportBatch $batch, FeedsSource $source); /** * Clear all caches for results for given source. diff --git a/plugins/FeedsProcessor.inc b/plugins/FeedsProcessor.inc index 1adf42a4..8b14d0ab 100644 --- a/plugins/FeedsProcessor.inc +++ b/plugins/FeedsProcessor.inc @@ -12,14 +12,14 @@ abstract class FeedsProcessor extends FeedsPlugin { * Process the result of the parser or previous processors. * Extending classes must implement this method. * - * @param FeedsParserResult $parserResult - * The result of the parsing stage. + * @param FeedsImportBatch $batch + * The current feed import data passed in from the parsing stage. * @param FeedsSource $source * Source information about this import. * * @todo: Should it be execute()? */ - public abstract function process(FeedsParserResult $parserResult, FeedsSource $source); + public abstract function process(FeedsImportBatch $batch, FeedsSource $source); /** * Remove all stored results or stored results up to a certain time for this @@ -191,7 +191,7 @@ abstract class FeedsProcessor extends FeedsPlugin { * sources that are unique. * * @param $source_item - * A feed item from a FeedsParserResult. + * A feed item from a FeedsImportBatch. * * @return * An array where the keys are target field names and the values are the diff --git a/plugins/FeedsSimplePieParser.inc b/plugins/FeedsSimplePieParser.inc index 638d18dc..ac03f77b 100644 --- a/plugins/FeedsSimplePieParser.inc +++ b/plugins/FeedsSimplePieParser.inc @@ -9,20 +9,14 @@ class FeedsSimplePieParser extends FeedsParser { /** - * Parses a raw string and returns a Feed object from it. + * Parses a string in RSS/Atom format. */ - public function parse(FeedsFetcherResult $fetcherResult, FeedsSource $source) { - if ($fetcherResult->type == 'text/filepath') { - $string = file_get_contents($fetcherResult->value); - } - else { - $string = $fetcherResult->value; - } + public function parse(FeedsImportBatch $batch, FeedsSource $source) { feeds_include_library('simplepie.inc', 'simplepie'); // Initialize SimplePie. $parser = new SimplePie(); - $parser->set_raw_data($string); + $parser->set_raw_data($batch->getRaw()); $parser->set_stupidly_fast(TRUE); $parser->encode_instead_of_strip(FALSE); // @todo: is caching effective when we pass in raw data? @@ -31,12 +25,10 @@ class FeedsSimplePieParser extends FeedsParser { $parser->init(); // Construct the standard form of the parsed feed - $feed = array(); - $feed['title'] = ($title = $parser->get_title()) ? $title : $this->createTitle($parser->get_description()); - $feed['description'] = $parser->get_description(); - $feed['link'] = html_entity_decode($parser->get_link()); + $batch->setTitle(($title = $parser->get_title()) ? $title : $this->createTitle($parser->get_description())); + $batch->setDescription($parser->get_description()); + $batch->setLink(html_entity_decode($parser->get_link())); - $feed['items'] = array(); $items_num = $parser->get_item_quantity(); for ($i = 0; $i < $items_num; $i++) { $item = array(); @@ -96,11 +88,10 @@ class FeedsSimplePieParser extends FeedsParser { $item['tags'] = $tags; // Stick the raw data onto the feed item. $item['raw'] = $simplepie_item->data; - $feed['items'][] = $item; + $batch->addItem($item); } // Release parser. unset($parser); - return new FeedsParserResult($feed, 'syndication'); } /** diff --git a/plugins/FeedsSyndicationParser.inc b/plugins/FeedsSyndicationParser.inc index 11b86195..8dddacf2 100644 --- a/plugins/FeedsSyndicationParser.inc +++ b/plugins/FeedsSyndicationParser.inc @@ -9,17 +9,15 @@ class FeedsSyndicationParser extends FeedsParser { /** - * Parses a raw string and returns a Feed object from it. + * Parses a string in RSS/Atom format. */ - public function parse(FeedsFetcherResult $fetcherResult, FeedsSource $source) { - if ($fetcherResult->type == 'text/filepath') { - $string = file_get_contents($fetcherResult->value); - } - else { - $string = $fetcherResult->value; - } + public function parse(FeedsImportBatch $batch, FeedsSource $source) { feeds_include_library('common_syndication_parser.inc', 'common_syndication_parser'); - return new FeedsParserResult(common_syndication_parser_parse($string), 'syndication'); + $result = common_syndication_parser_parse($batch->getRaw()); + $batch->setTitle($result['title']); + $batch->setDescription($result['description']); + $batch->setLink($result['link']); + $batch->setItems($result['items']); } /** diff --git a/plugins/FeedsTermProcessor.inc b/plugins/FeedsTermProcessor.inc index e31f0a58..4249017c 100644 --- a/plugins/FeedsTermProcessor.inc +++ b/plugins/FeedsTermProcessor.inc @@ -14,7 +14,7 @@ class FeedsTermProcessor extends FeedsProcessor { /** * Implementation of FeedsProcessor::process(). */ - public function process(FeedsParserResult $parserResult, FeedsSource $source) { + public function process(FeedsImportBatch $batch, FeedsSource $source) { if (empty($this->config['vocabulary'])) { throw new Exception(t('You must define a vocabulary for Taxonomy term processor before importing.')); @@ -23,7 +23,7 @@ class FeedsTermProcessor extends FeedsProcessor { // Count number of created and updated nodes. $created = $updated = $no_name = 0; - foreach ($parserResult->value['items'] as $item) { + while ($item = $batch->shiftItem()) { if (!($tid = $this->existingItemId($item, $source)) || $this->config['update_existing']) { diff --git a/plugins/FeedsUserProcessor.inc b/plugins/FeedsUserProcessor.inc index d3b33abd..cd598c2e 100644 --- a/plugins/FeedsUserProcessor.inc +++ b/plugins/FeedsUserProcessor.inc @@ -14,12 +14,12 @@ class FeedsUserProcessor extends FeedsProcessor { /** * Implementation of FeedsProcessor::process(). */ - public function process(FeedsParserResult $parserResult, FeedsSource $source) { + public function process(FeedsImportBatch $batch, FeedsSource $source) { // Count number of created and updated nodes. $created = $updated = $failed = 0; - foreach ($parserResult->value['items'] as $item) { + while ($item = $batch->shiftItem()) { if (!($uid = $this->existingItemId($item, $source)) || $this->config['update_existing']) { -- GitLab