diff --git a/docs/config.html b/docs/config.html
index c6fb04e..abb3f19 100644
--- a/docs/config.html
+++ b/docs/config.html
@@ -101,8 +101,8 @@ use for logging output. Note: this configuration value is processed
raw
feed_timeout
Number of seconds to wait for any given feed
-new_feed_items
-Number of items to take from new feeds
+new_feed_items
+Maximum number of items to include in the output from any one feed
spider_threads
The number of threads to use when spidering. When set to 0, the default,
no threads are used and spidering follows the traditional algorithm.
diff --git a/planet/config.py b/planet/config.py
index afae785..53195c4 100644
--- a/planet/config.py
+++ b/planet/config.py
@@ -106,6 +106,7 @@ def __init__():
define_planet('output_dir', 'output')
define_planet('spider_threads', 0)
+ define_planet_int('new_feed_items', 0)
define_planet_int('feed_timeout', 20)
define_planet_int('cache_keep_entries', 10)
diff --git a/planet/splice.py b/planet/splice.py
index 2c23b09..ed2a856 100644
--- a/planet/splice.py
+++ b/planet/splice.py
@@ -67,6 +67,8 @@ def splice():
# insert entry information
items = 0
+ count = {}
+ new_feed_items = config.new_feed_items()
for mtime,file in dir:
if index != None:
base = os.path.basename(file)
@@ -75,15 +77,23 @@ def splice():
try:
entry=minidom.parse(file)
- # verify that this entry is currently subscribed to
+ # verify that this entry is currently subscribed to and that the
+ # number of entries contributed by this feed does not exceed
+ # config.new_feed_items
entry.normalize()
sources = entry.getElementsByTagName('source')
if sources:
ids = sources[0].getElementsByTagName('id')
- if ids and ids[0].childNodes[0].nodeValue not in sub_ids:
- ids = sources[0].getElementsByTagName('planet:id')
- if not ids: continue
- if ids[0].childNodes[0].nodeValue not in sub_ids: continue
+ if ids:
+ id = ids[0].childNodes[0].nodeValue
+ count[id] = count.get(id,0) + 1
+ if new_feed_items and count[id] > new_feed_items: continue
+
+ if id not in sub_ids:
+ ids = sources[0].getElementsByTagName('planet:id')
+ if not ids: continue
+ id = ids[0].childNodes[0].nodeValue
+ if id not in sub_ids: continue
# add entry to feed
feed.appendChild(entry.documentElement)
diff --git a/tests/test_splice.py b/tests/test_splice.py
index 317faeb..66da44f 100644
--- a/tests/test_splice.py
+++ b/tests/test_splice.py
@@ -24,3 +24,11 @@ class SpliceTest(unittest.TestCase):
self.assertEqual(8,len(doc.getElementsByTagName('entry')))
self.assertEqual(3,len(doc.getElementsByTagName('planet:source')))
self.assertEqual(11,len(doc.getElementsByTagName('planet:name')))
+
+ def test_splice_new_feed_items(self):
+ config.load(configfile)
+ config.parser.set('Planet','new_feed_items','3')
+ doc = splice()
+ self.assertEqual(9,len(doc.getElementsByTagName('entry')))
+ self.assertEqual(4,len(doc.getElementsByTagName('planet:source')))
+ self.assertEqual(13,len(doc.getElementsByTagName('planet:name')))