diff --git a/THANKS b/THANKS
index a4da15e..a58da71 100644
--- a/THANKS
+++ b/THANKS
@@ -9,7 +9,7 @@ Harry Fuecks - Pipe characters in file names, filter bug
Eric van der Vlist - Filters to add language, category information
Chris Dolan - mkdir cache; default template_dirs; fix xsltproc
David Sifry - rss 2.0 xslt template based on http://atom.geekhood.net/
-Morten Fredericksen - Support WordPress LinkManager OPML
+Morten Frederiksen - Support WordPress LinkManager OPML
Harry Fuecks - default item date to feed date
Antonio Cavedoni - Django templates
diff --git a/docs/config.html b/docs/config.html
index b1e6550..c6fb04e 100644
--- a/docs/config.html
+++ b/docs/config.html
@@ -111,6 +111,10 @@ no threads are used and spidering follows the traditional algorithm.
directory to be used for an additional HTTP cache to front end the Venus
cache. If specified as a relative path, it is evaluated relative to the
cache_directory
.
+
cache_keep_entries
+Used by expunge
to determine how many entries should be
+kept for each source when expunging old entries from the cache directory.
+This may be overriden on a per subscription feed basis.
Additional options can be found in
normalization level overrides .
diff --git a/expunge.py b/expunge.py
new file mode 100644
index 0000000..ff5017a
--- /dev/null
+++ b/expunge.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+"""
+Main program to run just the expunge portion of planet
+"""
+
+import os.path
+import sys
+from planet import expunge, config
+
+if __name__ == '__main__':
+
+ if len(sys.argv) == 2 and os.path.isfile(sys.argv[1]):
+ config.load(sys.argv[1])
+ expunge.expungeCache()
+ else:
+ print "Usage:"
+ print " python %s config.ini" % sys.argv[0]
diff --git a/planet.py b/planet.py
index a285f6c..c278c06 100755
--- a/planet.py
+++ b/planet.py
@@ -21,6 +21,7 @@ if __name__ == "__main__":
offline = 0
verbose = 0
only_if_new = 0
+ expunge = 0
for arg in sys.argv[1:]:
if arg == "-h" or arg == "--help":
@@ -31,6 +32,7 @@ if __name__ == "__main__":
print " -o, --offline Update the Planet from the cache only"
print " -h, --help Display this help message and exit"
print " -n, --only-if-new Only spider new feeds"
+ print " -x, --expunge Expunge old entries from cache"
print
sys.exit(0)
elif arg == "-v" or arg == "--verbose":
@@ -39,6 +41,8 @@ if __name__ == "__main__":
offline = 1
elif arg == "-n" or arg == "--only-if-new":
only_if_new = 1
+ elif arg == "-x" or arg == "--expunge":
+ expunge = 1
elif arg.startswith("-"):
print >>sys.stderr, "Unknown option:", arg
sys.exit(1)
@@ -62,3 +66,7 @@ if __name__ == "__main__":
from planet import splice
doc = splice.splice()
splice.apply(doc.toxml('utf-8'))
+
+ if expunge:
+ from planet import expunge
+ expunge.expungeCache
diff --git a/planet/config.py b/planet/config.py
index 669dd68..afae785 100644
--- a/planet/config.py
+++ b/planet/config.py
@@ -107,6 +107,7 @@ def __init__():
define_planet('spider_threads', 0)
define_planet_int('feed_timeout', 20)
+ define_planet_int('cache_keep_entries', 10)
define_planet_list('template_files')
define_planet_list('bill_of_materials')
diff --git a/planet/expunge.py b/planet/expunge.py
new file mode 100644
index 0000000..9f890b9
--- /dev/null
+++ b/planet/expunge.py
@@ -0,0 +1,68 @@
+""" Expunge old entries from a cache of entries """
+import glob, os, planet, config, feedparser
+from xml.dom import minidom
+from spider import filename
+
+def expungeCache():
+ """ Expunge old entries from a cache of entries """
+ import planet
+ log = planet.getLogger(config.log_level(),config.log_format())
+
+ log.info("Determining feed subscriptions")
+ entry_count = {}
+ sources = config.cache_sources_directory()
+ for sub in config.subscriptions():
+ data=feedparser.parse(filename(sources,sub))
+ if not data.feed.has_key('id'): continue
+ if config.feed_options(sub).has_key('cache_keep_entries'):
+ entry_count[data.feed.id] = int(config.feed_options(sub)['cache_keep_entries'])
+ else:
+ entry_count[data.feed.id] = config.cache_keep_entries()
+
+ log.info("Listing cached entries")
+ cache = config.cache_directory()
+ dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*")
+ if not os.path.isdir(file)]
+ dir.sort()
+ dir.reverse()
+
+ for mtime,file in dir:
+
+ try:
+ entry=minidom.parse(file)
+ # determine source of entry
+ entry.normalize()
+ sources = entry.getElementsByTagName('source')
+ if not sources:
+ # no source determined, do not delete
+ log.debug("No source found for %s", file)
+ continue
+ ids = sources[0].getElementsByTagName('id')
+ if not ids:
+ # feed id not found, do not delete
+ log.debug("No source feed id found for %s", file)
+ continue
+ if ids[0].childNodes[0].nodeValue in entry_count:
+ # subscribed to feed, update entry count
+ entry_count[ids[0].childNodes[0].nodeValue] = entry_count[
+ ids[0].childNodes[0].nodeValue] - 1
+ if entry_count[ids[0].childNodes[0].nodeValue] >= 0:
+ # maximum not reached, do not delete
+ log.debug("Maximum not reached for %s from %s",
+ file, ids[0].childNodes[0].nodeValue)
+ continue
+ else:
+ # maximum reached
+ log.debug("Removing %s, maximum reached for %s",
+ file, ids[0].childNodes[0].nodeValue)
+ else:
+ # not subscribed
+ log.debug("Removing %s, not subscribed to %s",
+ file, ids[0].childNodes[0].nodeValue)
+ # remove old entry
+ os.unlink(file)
+
+ except:
+ log.error("Error parsing %s", file)
+
+# end of expungeCache()
diff --git a/tests/data/expunge/config.ini b/tests/data/expunge/config.ini
new file mode 100644
index 0000000..ff750e0
--- /dev/null
+++ b/tests/data/expunge/config.ini
@@ -0,0 +1,20 @@
+[Planet]
+name = test planet
+cache_directory = tests/work/expunge/cache
+cache_keep_entries = 1
+
+[tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed1]
+name = no source
+
+[tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed2]
+name = no source id
+
+[tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3]
+name = global setting
+
+[tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4]
+name = local setting
+cache_keep_entries = 2
+
+#[tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed5]
+#name = unsubbed
diff --git a/tests/data/expunge/test1.entry b/tests/data/expunge/test1.entry
new file mode 100644
index 0000000..1ef50ac
--- /dev/null
+++ b/tests/data/expunge/test1.entry
@@ -0,0 +1,8 @@
+
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-test1/1
+
+ Test 1/1
+ Entry with missing source
+ 2007-03-01T01:01:00Z
+
\ No newline at end of file
diff --git a/tests/data/expunge/test2.entry b/tests/data/expunge/test2.entry
new file mode 100644
index 0000000..b07ae59
--- /dev/null
+++ b/tests/data/expunge/test2.entry
@@ -0,0 +1,11 @@
+
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-test2/1
+
+ Test 2/1
+ Entry with missing source id
+ 2007-03-01T02:01:00Z
+
+ Test 2/1 source
+
+
\ No newline at end of file
diff --git a/tests/data/expunge/test3a.entry b/tests/data/expunge/test3a.entry
new file mode 100644
index 0000000..af85d83
--- /dev/null
+++ b/tests/data/expunge/test3a.entry
@@ -0,0 +1,12 @@
+
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-test3/1
+
+ Test 3/1
+ Entry for global setting 1
+ 2007-03-01T03:01:00Z
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3
+ Test 3 source
+
+
\ No newline at end of file
diff --git a/tests/data/expunge/test3b.entry b/tests/data/expunge/test3b.entry
new file mode 100644
index 0000000..195cadd
--- /dev/null
+++ b/tests/data/expunge/test3b.entry
@@ -0,0 +1,12 @@
+
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-test3/2
+
+ Test 3/2
+ Entry for global setting 2
+ 2007-03-01T03:02:00Z
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3
+ Test 3 source
+
+
\ No newline at end of file
diff --git a/tests/data/expunge/test3c.entry b/tests/data/expunge/test3c.entry
new file mode 100644
index 0000000..0f33eb0
--- /dev/null
+++ b/tests/data/expunge/test3c.entry
@@ -0,0 +1,12 @@
+
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-test3/3
+
+ Test 3/3
+ Entry for global setting 3
+ 2007-03-01T03:03:00Z
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3
+ Test 3 source
+
+
\ No newline at end of file
diff --git a/tests/data/expunge/test4a.entry b/tests/data/expunge/test4a.entry
new file mode 100644
index 0000000..744a83c
--- /dev/null
+++ b/tests/data/expunge/test4a.entry
@@ -0,0 +1,12 @@
+
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-test4/1
+
+ Test 4/1
+ Entry for local setting 1
+ 2007-03-01T04:01:00Z
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4
+ Test 4 source
+
+
\ No newline at end of file
diff --git a/tests/data/expunge/test4b.entry b/tests/data/expunge/test4b.entry
new file mode 100644
index 0000000..4dcb6ba
--- /dev/null
+++ b/tests/data/expunge/test4b.entry
@@ -0,0 +1,12 @@
+
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-test4/2
+
+ Test 4/2
+ Entry for local setting 2
+ 2007-03-01T04:02:00Z
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4
+ Test 4 source
+
+
\ No newline at end of file
diff --git a/tests/data/expunge/test4c.entry b/tests/data/expunge/test4c.entry
new file mode 100644
index 0000000..02fb184
--- /dev/null
+++ b/tests/data/expunge/test4c.entry
@@ -0,0 +1,12 @@
+
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-test4/3
+
+ Test 4/3
+ Entry for local setting 3
+ 2007-03-01T04:03:00Z
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4
+ Test 4 source
+
+
\ No newline at end of file
diff --git a/tests/data/expunge/test5.entry b/tests/data/expunge/test5.entry
new file mode 100644
index 0000000..96d338c
--- /dev/null
+++ b/tests/data/expunge/test5.entry
@@ -0,0 +1,12 @@
+
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-test5/1
+
+ Test 5/1
+ Entry from unsubbed feed
+ 2007-03-01T05:01:00Z
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed5
+ Test 5 source
+
+
\ No newline at end of file
diff --git a/tests/data/expunge/testfeed1.atom b/tests/data/expunge/testfeed1.atom
new file mode 100644
index 0000000..455803e
--- /dev/null
+++ b/tests/data/expunge/testfeed1.atom
@@ -0,0 +1,5 @@
+
+
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed1
+
\ No newline at end of file
diff --git a/tests/data/expunge/testfeed2.atom b/tests/data/expunge/testfeed2.atom
new file mode 100644
index 0000000..58ae023
--- /dev/null
+++ b/tests/data/expunge/testfeed2.atom
@@ -0,0 +1,5 @@
+
+
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed2
+
\ No newline at end of file
diff --git a/tests/data/expunge/testfeed3.atom b/tests/data/expunge/testfeed3.atom
new file mode 100644
index 0000000..a8c111e
--- /dev/null
+++ b/tests/data/expunge/testfeed3.atom
@@ -0,0 +1,5 @@
+
+
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3
+
\ No newline at end of file
diff --git a/tests/data/expunge/testfeed4.atom b/tests/data/expunge/testfeed4.atom
new file mode 100644
index 0000000..10fb50d
--- /dev/null
+++ b/tests/data/expunge/testfeed4.atom
@@ -0,0 +1,5 @@
+
+
+
+ tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4
+
\ No newline at end of file
diff --git a/tests/test_expunge.py b/tests/test_expunge.py
new file mode 100644
index 0000000..fc93b52
--- /dev/null
+++ b/tests/test_expunge.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+import unittest, os, glob, shutil, time
+from planet.spider import filename
+from planet import feedparser, config
+from planet.expunge import expungeCache
+from xml.dom import minidom
+import planet
+
+workdir = 'tests/work/expunge/cache'
+sourcesdir = 'tests/work/expunge/cache/sources'
+testentries = 'tests/data/expunge/test*.entry'
+testfeeds = 'tests/data/expunge/test*.atom'
+configfile = 'tests/data/expunge/config.ini'
+
+class ExpungeTest(unittest.TestCase):
+ def setUp(self):
+ # silence errors
+ planet.logger = None
+ planet.getLogger('CRITICAL',None)
+
+ try:
+ os.makedirs(workdir)
+ os.makedirs(sourcesdir)
+ except:
+ self.tearDown()
+ os.makedirs(workdir)
+ os.makedirs(sourcesdir)
+
+ def tearDown(self):
+ shutil.rmtree(workdir)
+ os.removedirs(os.path.split(workdir)[0])
+
+ def test_expunge(self):
+ config.load(configfile)
+
+ # create test entries in cache with correct timestamp
+ for entry in glob.glob(testentries):
+ e=minidom.parse(entry)
+ e.normalize()
+ eid = e.getElementsByTagName('id')
+ efile = filename(workdir, eid[0].childNodes[0].nodeValue)
+ eupdated = e.getElementsByTagName('updated')[0].childNodes[0].nodeValue
+ emtime = time.mktime(feedparser._parse_date_w3dtf(eupdated))
+ if not eid or not eupdated: continue
+ shutil.copyfile(entry, efile)
+ os.utime(efile, (emtime, emtime))
+
+ # create test feeds in cache
+ sources = config.cache_sources_directory()
+ for feed in glob.glob(testfeeds):
+ f=minidom.parse(feed)
+ f.normalize()
+ fid = f.getElementsByTagName('id')
+ if not fid: continue
+ ffile = filename(sources, fid[0].childNodes[0].nodeValue)
+ shutil.copyfile(feed, ffile)
+
+ # verify that exactly nine entries + one source dir were produced
+ files = glob.glob(workdir+"/*")
+ self.assertEqual(10, len(files))
+
+ # verify that exactly four feeds were produced in source dir
+ files = glob.glob(sources+"/*")
+ self.assertEqual(4, len(files))
+
+ # expunge...
+ expungeCache()
+
+ # verify that five entries and one source dir are left
+ files = glob.glob(workdir+"/*")
+ self.assertEqual(6, len(files))
+
+ # verify that the right five entries are left
+ self.assertTrue(os.path.join(workdir,
+ 'bzr.mfd-consult.dk,2007,venus-expunge-test1,1') in files)
+ self.assertTrue(os.path.join(workdir,
+ 'bzr.mfd-consult.dk,2007,venus-expunge-test2,1') in files)
+ self.assertTrue(os.path.join(workdir,
+ 'bzr.mfd-consult.dk,2007,venus-expunge-test3,3') in files)
+ self.assertTrue(os.path.join(workdir,
+ 'bzr.mfd-consult.dk,2007,venus-expunge-test4,2') in files)
+ self.assertTrue(os.path.join(workdir,
+ 'bzr.mfd-consult.dk,2007,venus-expunge-test4,3') in files)