diff --git a/docs/config.html b/docs/config.html index c6fb04e..abb3f19 100644 --- a/docs/config.html +++ b/docs/config.html @@ -101,8 +101,8 @@ use for logging output. Note: this configuration value is processed raw
feed_timeout
Number of seconds to wait for any given feed
-
new_feed_items
-
Number of items to take from new feeds
+
new_feed_items
+
Maximum number of items to include in the output from any one feed
spider_threads
The number of threads to use when spidering. When set to 0, the default, no threads are used and spidering follows the traditional algorithm.
diff --git a/planet/config.py b/planet/config.py index afae785..53195c4 100644 --- a/planet/config.py +++ b/planet/config.py @@ -106,6 +106,7 @@ def __init__(): define_planet('output_dir', 'output') define_planet('spider_threads', 0) + define_planet_int('new_feed_items', 0) define_planet_int('feed_timeout', 20) define_planet_int('cache_keep_entries', 10) diff --git a/planet/splice.py b/planet/splice.py index 2c23b09..ed2a856 100644 --- a/planet/splice.py +++ b/planet/splice.py @@ -67,6 +67,8 @@ def splice(): # insert entry information items = 0 + count = {} + new_feed_items = config.new_feed_items() for mtime,file in dir: if index != None: base = os.path.basename(file) @@ -75,15 +77,23 @@ def splice(): try: entry=minidom.parse(file) - # verify that this entry is currently subscribed to + # verify that this entry is currently subscribed to and that the + # number of entries contributed by this feed does not exceed + # config.new_feed_items entry.normalize() sources = entry.getElementsByTagName('source') if sources: ids = sources[0].getElementsByTagName('id') - if ids and ids[0].childNodes[0].nodeValue not in sub_ids: - ids = sources[0].getElementsByTagName('planet:id') - if not ids: continue - if ids[0].childNodes[0].nodeValue not in sub_ids: continue + if ids: + id = ids[0].childNodes[0].nodeValue + count[id] = count.get(id,0) + 1 + if new_feed_items and count[id] > new_feed_items: continue + + if id not in sub_ids: + ids = sources[0].getElementsByTagName('planet:id') + if not ids: continue + id = ids[0].childNodes[0].nodeValue + if id not in sub_ids: continue # add entry to feed feed.appendChild(entry.documentElement) diff --git a/tests/test_splice.py b/tests/test_splice.py index 317faeb..66da44f 100644 --- a/tests/test_splice.py +++ b/tests/test_splice.py @@ -24,3 +24,11 @@ class SpliceTest(unittest.TestCase): self.assertEqual(8,len(doc.getElementsByTagName('entry'))) self.assertEqual(3,len(doc.getElementsByTagName('planet:source'))) self.assertEqual(11,len(doc.getElementsByTagName('planet:name'))) + + def test_splice_new_feed_items(self): + config.load(configfile) + config.parser.set('Planet','new_feed_items','3') + doc = splice() + self.assertEqual(9,len(doc.getElementsByTagName('entry'))) + self.assertEqual(4,len(doc.getElementsByTagName('planet:source'))) + self.assertEqual(13,len(doc.getElementsByTagName('planet:name')))