Added expunge and test cases
This commit is contained in:
commit
5f21b167ff
2
THANKS
2
THANKS
@ -9,7 +9,7 @@ Harry Fuecks - Pipe characters in file names, filter bug
|
||||
Eric van der Vlist - Filters to add language, category information
|
||||
Chris Dolan - mkdir cache; default template_dirs; fix xsltproc
|
||||
David Sifry - rss 2.0 xslt template based on http://atom.geekhood.net/
|
||||
Morten Fredericksen - Support WordPress LinkManager OPML
|
||||
Morten Frederiksen - Support WordPress LinkManager OPML
|
||||
Harry Fuecks - default item date to feed date
|
||||
Antonio Cavedoni - Django templates
|
||||
|
||||
|
@ -111,6 +111,10 @@ no threads are used and spidering follows the traditional algorithm.</dd>
|
||||
directory to be used for an additional HTTP cache to front end the Venus
|
||||
cache. If specified as a relative path, it is evaluated relative to the
|
||||
<code>cache_directory</code>.</dd>
|
||||
<dt><ins>cache_keep_entries</ins></dt>
|
||||
<dd>Used by <code>expunge</code> to determine how many entries should be
|
||||
kept for each source when expunging old entries from the cache directory.
|
||||
This may be overriden on a per subscription feed basis.</dd>
|
||||
</dl>
|
||||
<p>Additional options can be found in
|
||||
<a href="normalization.html#overrides">normalization level overrides</a>.</p>
|
||||
|
17
expunge.py
Normal file
17
expunge.py
Normal file
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Main program to run just the expunge portion of planet
|
||||
"""
|
||||
|
||||
import os.path
|
||||
import sys
|
||||
from planet import expunge, config
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if len(sys.argv) == 2 and os.path.isfile(sys.argv[1]):
|
||||
config.load(sys.argv[1])
|
||||
expunge.expungeCache()
|
||||
else:
|
||||
print "Usage:"
|
||||
print " python %s config.ini" % sys.argv[0]
|
@ -21,6 +21,7 @@ if __name__ == "__main__":
|
||||
offline = 0
|
||||
verbose = 0
|
||||
only_if_new = 0
|
||||
expunge = 0
|
||||
|
||||
for arg in sys.argv[1:]:
|
||||
if arg == "-h" or arg == "--help":
|
||||
@ -31,6 +32,7 @@ if __name__ == "__main__":
|
||||
print " -o, --offline Update the Planet from the cache only"
|
||||
print " -h, --help Display this help message and exit"
|
||||
print " -n, --only-if-new Only spider new feeds"
|
||||
print " -x, --expunge Expunge old entries from cache"
|
||||
print
|
||||
sys.exit(0)
|
||||
elif arg == "-v" or arg == "--verbose":
|
||||
@ -39,6 +41,8 @@ if __name__ == "__main__":
|
||||
offline = 1
|
||||
elif arg == "-n" or arg == "--only-if-new":
|
||||
only_if_new = 1
|
||||
elif arg == "-x" or arg == "--expunge":
|
||||
expunge = 1
|
||||
elif arg.startswith("-"):
|
||||
print >>sys.stderr, "Unknown option:", arg
|
||||
sys.exit(1)
|
||||
@ -62,3 +66,7 @@ if __name__ == "__main__":
|
||||
from planet import splice
|
||||
doc = splice.splice()
|
||||
splice.apply(doc.toxml('utf-8'))
|
||||
|
||||
if expunge:
|
||||
from planet import expunge
|
||||
expunge.expungeCache
|
||||
|
@ -107,6 +107,7 @@ def __init__():
|
||||
define_planet('spider_threads', 0)
|
||||
|
||||
define_planet_int('feed_timeout', 20)
|
||||
define_planet_int('cache_keep_entries', 10)
|
||||
|
||||
define_planet_list('template_files')
|
||||
define_planet_list('bill_of_materials')
|
||||
|
68
planet/expunge.py
Normal file
68
planet/expunge.py
Normal file
@ -0,0 +1,68 @@
|
||||
""" Expunge old entries from a cache of entries """
|
||||
import glob, os, planet, config, feedparser
|
||||
from xml.dom import minidom
|
||||
from spider import filename
|
||||
|
||||
def expungeCache():
|
||||
""" Expunge old entries from a cache of entries """
|
||||
import planet
|
||||
log = planet.getLogger(config.log_level(),config.log_format())
|
||||
|
||||
log.info("Determining feed subscriptions")
|
||||
entry_count = {}
|
||||
sources = config.cache_sources_directory()
|
||||
for sub in config.subscriptions():
|
||||
data=feedparser.parse(filename(sources,sub))
|
||||
if not data.feed.has_key('id'): continue
|
||||
if config.feed_options(sub).has_key('cache_keep_entries'):
|
||||
entry_count[data.feed.id] = int(config.feed_options(sub)['cache_keep_entries'])
|
||||
else:
|
||||
entry_count[data.feed.id] = config.cache_keep_entries()
|
||||
|
||||
log.info("Listing cached entries")
|
||||
cache = config.cache_directory()
|
||||
dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*")
|
||||
if not os.path.isdir(file)]
|
||||
dir.sort()
|
||||
dir.reverse()
|
||||
|
||||
for mtime,file in dir:
|
||||
|
||||
try:
|
||||
entry=minidom.parse(file)
|
||||
# determine source of entry
|
||||
entry.normalize()
|
||||
sources = entry.getElementsByTagName('source')
|
||||
if not sources:
|
||||
# no source determined, do not delete
|
||||
log.debug("No source found for %s", file)
|
||||
continue
|
||||
ids = sources[0].getElementsByTagName('id')
|
||||
if not ids:
|
||||
# feed id not found, do not delete
|
||||
log.debug("No source feed id found for %s", file)
|
||||
continue
|
||||
if ids[0].childNodes[0].nodeValue in entry_count:
|
||||
# subscribed to feed, update entry count
|
||||
entry_count[ids[0].childNodes[0].nodeValue] = entry_count[
|
||||
ids[0].childNodes[0].nodeValue] - 1
|
||||
if entry_count[ids[0].childNodes[0].nodeValue] >= 0:
|
||||
# maximum not reached, do not delete
|
||||
log.debug("Maximum not reached for %s from %s",
|
||||
file, ids[0].childNodes[0].nodeValue)
|
||||
continue
|
||||
else:
|
||||
# maximum reached
|
||||
log.debug("Removing %s, maximum reached for %s",
|
||||
file, ids[0].childNodes[0].nodeValue)
|
||||
else:
|
||||
# not subscribed
|
||||
log.debug("Removing %s, not subscribed to %s",
|
||||
file, ids[0].childNodes[0].nodeValue)
|
||||
# remove old entry
|
||||
os.unlink(file)
|
||||
|
||||
except:
|
||||
log.error("Error parsing %s", file)
|
||||
|
||||
# end of expungeCache()
|
20
tests/data/expunge/config.ini
Normal file
20
tests/data/expunge/config.ini
Normal file
@ -0,0 +1,20 @@
|
||||
[Planet]
|
||||
name = test planet
|
||||
cache_directory = tests/work/expunge/cache
|
||||
cache_keep_entries = 1
|
||||
|
||||
[tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed1]
|
||||
name = no source
|
||||
|
||||
[tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed2]
|
||||
name = no source id
|
||||
|
||||
[tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3]
|
||||
name = global setting
|
||||
|
||||
[tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4]
|
||||
name = local setting
|
||||
cache_keep_entries = 2
|
||||
|
||||
#[tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed5]
|
||||
#name = unsubbed
|
8
tests/data/expunge/test1.entry
Normal file
8
tests/data/expunge/test1.entry
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test1/1</id>
|
||||
<link href="http://example.com/1/1"/>
|
||||
<title>Test 1/1</title>
|
||||
<content>Entry with missing source</content>
|
||||
<updated>2007-03-01T01:01:00Z</updated>
|
||||
</entry>
|
11
tests/data/expunge/test2.entry
Normal file
11
tests/data/expunge/test2.entry
Normal file
@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test2/1</id>
|
||||
<link href="http://example.com/2/1"/>
|
||||
<title>Test 2/1</title>
|
||||
<content>Entry with missing source id</content>
|
||||
<updated>2007-03-01T02:01:00Z</updated>
|
||||
<source>
|
||||
<title>Test 2/1 source</title>
|
||||
</source>
|
||||
</entry>
|
12
tests/data/expunge/test3a.entry
Normal file
12
tests/data/expunge/test3a.entry
Normal file
@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test3/1</id>
|
||||
<link href="http://example.com/3/1"/>
|
||||
<title>Test 3/1</title>
|
||||
<content>Entry for global setting 1</content>
|
||||
<updated>2007-03-01T03:01:00Z</updated>
|
||||
<source>
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3</id>
|
||||
<title>Test 3 source</title>
|
||||
</source>
|
||||
</entry>
|
12
tests/data/expunge/test3b.entry
Normal file
12
tests/data/expunge/test3b.entry
Normal file
@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test3/2</id>
|
||||
<link href="http://example.com/3/2"/>
|
||||
<title>Test 3/2</title>
|
||||
<content>Entry for global setting 2</content>
|
||||
<updated>2007-03-01T03:02:00Z</updated>
|
||||
<source>
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3</id>
|
||||
<title>Test 3 source</title>
|
||||
</source>
|
||||
</entry>
|
12
tests/data/expunge/test3c.entry
Normal file
12
tests/data/expunge/test3c.entry
Normal file
@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test3/3</id>
|
||||
<link href="http://example.com/3/3"/>
|
||||
<title>Test 3/3</title>
|
||||
<content>Entry for global setting 3</content>
|
||||
<updated>2007-03-01T03:03:00Z</updated>
|
||||
<source>
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3</id>
|
||||
<title>Test 3 source</title>
|
||||
</source>
|
||||
</entry>
|
12
tests/data/expunge/test4a.entry
Normal file
12
tests/data/expunge/test4a.entry
Normal file
@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test4/1</id>
|
||||
<link href="http://example.com/4/1"/>
|
||||
<title>Test 4/1</title>
|
||||
<content>Entry for local setting 1</content>
|
||||
<updated>2007-03-01T04:01:00Z</updated>
|
||||
<source>
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4</id>
|
||||
<title>Test 4 source</title>
|
||||
</source>
|
||||
</entry>
|
12
tests/data/expunge/test4b.entry
Normal file
12
tests/data/expunge/test4b.entry
Normal file
@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test4/2</id>
|
||||
<link href="http://example.com/4/2"/>
|
||||
<title>Test 4/2</title>
|
||||
<content>Entry for local setting 2</content>
|
||||
<updated>2007-03-01T04:02:00Z</updated>
|
||||
<source>
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4</id>
|
||||
<title>Test 4 source</title>
|
||||
</source>
|
||||
</entry>
|
12
tests/data/expunge/test4c.entry
Normal file
12
tests/data/expunge/test4c.entry
Normal file
@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test4/3</id>
|
||||
<link href="http://example.com/4/3"/>
|
||||
<title>Test 4/3</title>
|
||||
<content>Entry for local setting 3</content>
|
||||
<updated>2007-03-01T04:03:00Z</updated>
|
||||
<source>
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4</id>
|
||||
<title>Test 4 source</title>
|
||||
</source>
|
||||
</entry>
|
12
tests/data/expunge/test5.entry
Normal file
12
tests/data/expunge/test5.entry
Normal file
@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test5/1</id>
|
||||
<link href="http://example.com/5/1"/>
|
||||
<title>Test 5/1</title>
|
||||
<content>Entry from unsubbed feed</content>
|
||||
<updated>2007-03-01T05:01:00Z</updated>
|
||||
<source>
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed5</id>
|
||||
<title>Test 5 source</title>
|
||||
</source>
|
||||
</entry>
|
5
tests/data/expunge/testfeed1.atom
Normal file
5
tests/data/expunge/testfeed1.atom
Normal file
@ -0,0 +1,5 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<link rel="self" href="http://bzr.mfd-consult.dk/venus/tests/data/expunge/testfeed1.atom"/>
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed1</id>
|
||||
</feed>
|
5
tests/data/expunge/testfeed2.atom
Normal file
5
tests/data/expunge/testfeed2.atom
Normal file
@ -0,0 +1,5 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<link rel="self" href="http://bzr.mfd-consult.dk/venus/tests/data/expunge/testfeed2.atom"/>
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed2</id>
|
||||
</feed>
|
5
tests/data/expunge/testfeed3.atom
Normal file
5
tests/data/expunge/testfeed3.atom
Normal file
@ -0,0 +1,5 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<link rel="self" href="http://bzr.mfd-consult.dk/venus/tests/data/expunge/testfeed3.atom"/>
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3</id>
|
||||
</feed>
|
5
tests/data/expunge/testfeed4.atom
Normal file
5
tests/data/expunge/testfeed4.atom
Normal file
@ -0,0 +1,5 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<link rel="self" href="http://bzr.mfd-consult.dk/venus/tests/data/expunge/testfeed4.atom"/>
|
||||
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4</id>
|
||||
</feed>
|
83
tests/test_expunge.py
Normal file
83
tests/test_expunge.py
Normal file
@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env python
|
||||
import unittest, os, glob, shutil, time
|
||||
from planet.spider import filename
|
||||
from planet import feedparser, config
|
||||
from planet.expunge import expungeCache
|
||||
from xml.dom import minidom
|
||||
import planet
|
||||
|
||||
workdir = 'tests/work/expunge/cache'
|
||||
sourcesdir = 'tests/work/expunge/cache/sources'
|
||||
testentries = 'tests/data/expunge/test*.entry'
|
||||
testfeeds = 'tests/data/expunge/test*.atom'
|
||||
configfile = 'tests/data/expunge/config.ini'
|
||||
|
||||
class ExpungeTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# silence errors
|
||||
planet.logger = None
|
||||
planet.getLogger('CRITICAL',None)
|
||||
|
||||
try:
|
||||
os.makedirs(workdir)
|
||||
os.makedirs(sourcesdir)
|
||||
except:
|
||||
self.tearDown()
|
||||
os.makedirs(workdir)
|
||||
os.makedirs(sourcesdir)
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(workdir)
|
||||
os.removedirs(os.path.split(workdir)[0])
|
||||
|
||||
def test_expunge(self):
|
||||
config.load(configfile)
|
||||
|
||||
# create test entries in cache with correct timestamp
|
||||
for entry in glob.glob(testentries):
|
||||
e=minidom.parse(entry)
|
||||
e.normalize()
|
||||
eid = e.getElementsByTagName('id')
|
||||
efile = filename(workdir, eid[0].childNodes[0].nodeValue)
|
||||
eupdated = e.getElementsByTagName('updated')[0].childNodes[0].nodeValue
|
||||
emtime = time.mktime(feedparser._parse_date_w3dtf(eupdated))
|
||||
if not eid or not eupdated: continue
|
||||
shutil.copyfile(entry, efile)
|
||||
os.utime(efile, (emtime, emtime))
|
||||
|
||||
# create test feeds in cache
|
||||
sources = config.cache_sources_directory()
|
||||
for feed in glob.glob(testfeeds):
|
||||
f=minidom.parse(feed)
|
||||
f.normalize()
|
||||
fid = f.getElementsByTagName('id')
|
||||
if not fid: continue
|
||||
ffile = filename(sources, fid[0].childNodes[0].nodeValue)
|
||||
shutil.copyfile(feed, ffile)
|
||||
|
||||
# verify that exactly nine entries + one source dir were produced
|
||||
files = glob.glob(workdir+"/*")
|
||||
self.assertEqual(10, len(files))
|
||||
|
||||
# verify that exactly four feeds were produced in source dir
|
||||
files = glob.glob(sources+"/*")
|
||||
self.assertEqual(4, len(files))
|
||||
|
||||
# expunge...
|
||||
expungeCache()
|
||||
|
||||
# verify that five entries and one source dir are left
|
||||
files = glob.glob(workdir+"/*")
|
||||
self.assertEqual(6, len(files))
|
||||
|
||||
# verify that the right five entries are left
|
||||
self.assertTrue(os.path.join(workdir,
|
||||
'bzr.mfd-consult.dk,2007,venus-expunge-test1,1') in files)
|
||||
self.assertTrue(os.path.join(workdir,
|
||||
'bzr.mfd-consult.dk,2007,venus-expunge-test2,1') in files)
|
||||
self.assertTrue(os.path.join(workdir,
|
||||
'bzr.mfd-consult.dk,2007,venus-expunge-test3,3') in files)
|
||||
self.assertTrue(os.path.join(workdir,
|
||||
'bzr.mfd-consult.dk,2007,venus-expunge-test4,2') in files)
|
||||
self.assertTrue(os.path.join(workdir,
|
||||
'bzr.mfd-consult.dk,2007,venus-expunge-test4,3') in files)
|
Loading…
x
Reference in New Issue
Block a user