From a51d09ec07d49610c11dee3bcf514c0abec4aa94 Mon Sep 17 00:00:00 2001 From: Morten Frederiksen Date: Sun, 4 Mar 2007 12:00:28 +0100 Subject: [PATCH 1/2] Added expunge and preliminary test cases --- THANKS | 2 +- docs/config.html | 4 + expunge.py | 17 ++++ planet.py | 8 ++ planet/config.py | 1 + planet/expunge.py | 68 ++++++++++++++ tests/data/expunge/config.ini | 20 +++++ tests/data/expunge/test1.entry | 8 ++ tests/data/expunge/test2.entry | 11 +++ tests/data/expunge/test3a.entry | 12 +++ tests/data/expunge/test3b.entry | 12 +++ tests/data/expunge/test3c.entry | 12 +++ tests/data/expunge/test4a.entry | 12 +++ tests/data/expunge/test4b.entry | 12 +++ tests/data/expunge/test4c.entry | 12 +++ tests/data/expunge/test5.entry | 12 +++ tests/data/expunge/testfeed1.atom | 5 ++ tests/data/expunge/testfeed2.atom | 5 ++ tests/data/expunge/testfeed3.atom | 5 ++ tests/data/expunge/testfeed4.atom | 5 ++ tests/test_expunge.py | 145 ++++++++++++++++++++++++++++++ 21 files changed, 387 insertions(+), 1 deletion(-) create mode 100644 expunge.py create mode 100644 planet/expunge.py create mode 100644 tests/data/expunge/config.ini create mode 100644 tests/data/expunge/test1.entry create mode 100644 tests/data/expunge/test2.entry create mode 100644 tests/data/expunge/test3a.entry create mode 100644 tests/data/expunge/test3b.entry create mode 100644 tests/data/expunge/test3c.entry create mode 100644 tests/data/expunge/test4a.entry create mode 100644 tests/data/expunge/test4b.entry create mode 100644 tests/data/expunge/test4c.entry create mode 100644 tests/data/expunge/test5.entry create mode 100644 tests/data/expunge/testfeed1.atom create mode 100644 tests/data/expunge/testfeed2.atom create mode 100644 tests/data/expunge/testfeed3.atom create mode 100644 tests/data/expunge/testfeed4.atom create mode 100644 tests/test_expunge.py diff --git a/THANKS b/THANKS index a4da15e..a58da71 100644 --- a/THANKS +++ b/THANKS @@ -9,7 +9,7 @@ Harry Fuecks - Pipe characters in file names, filter bug Eric van der Vlist - Filters to add language, category information Chris Dolan - mkdir cache; default template_dirs; fix xsltproc David Sifry - rss 2.0 xslt template based on http://atom.geekhood.net/ -Morten Fredericksen - Support WordPress LinkManager OPML +Morten Frederiksen - Support WordPress LinkManager OPML Harry Fuecks - default item date to feed date Antonio Cavedoni - Django templates diff --git a/docs/config.html b/docs/config.html index b1e6550..c6fb04e 100644 --- a/docs/config.html +++ b/docs/config.html @@ -111,6 +111,10 @@ no threads are used and spidering follows the traditional algorithm. directory to be used for an additional HTTP cache to front end the Venus cache. If specified as a relative path, it is evaluated relative to the cache_directory. +
cache_keep_entries
+
Used by expunge to determine how many entries should be +kept for each source when expunging old entries from the cache directory. +This may be overriden on a per subscription feed basis.

Additional options can be found in normalization level overrides.

diff --git a/expunge.py b/expunge.py new file mode 100644 index 0000000..ff5017a --- /dev/null +++ b/expunge.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +""" +Main program to run just the expunge portion of planet +""" + +import os.path +import sys +from planet import expunge, config + +if __name__ == '__main__': + + if len(sys.argv) == 2 and os.path.isfile(sys.argv[1]): + config.load(sys.argv[1]) + expunge.expungeCache() + else: + print "Usage:" + print " python %s config.ini" % sys.argv[0] diff --git a/planet.py b/planet.py index a285f6c..c278c06 100755 --- a/planet.py +++ b/planet.py @@ -21,6 +21,7 @@ if __name__ == "__main__": offline = 0 verbose = 0 only_if_new = 0 + expunge = 0 for arg in sys.argv[1:]: if arg == "-h" or arg == "--help": @@ -31,6 +32,7 @@ if __name__ == "__main__": print " -o, --offline Update the Planet from the cache only" print " -h, --help Display this help message and exit" print " -n, --only-if-new Only spider new feeds" + print " -x, --expunge Expunge old entries from cache" print sys.exit(0) elif arg == "-v" or arg == "--verbose": @@ -39,6 +41,8 @@ if __name__ == "__main__": offline = 1 elif arg == "-n" or arg == "--only-if-new": only_if_new = 1 + elif arg == "-x" or arg == "--expunge": + expunge = 1 elif arg.startswith("-"): print >>sys.stderr, "Unknown option:", arg sys.exit(1) @@ -62,3 +66,7 @@ if __name__ == "__main__": from planet import splice doc = splice.splice() splice.apply(doc.toxml('utf-8')) + + if expunge: + from planet import expunge + expunge.expungeCache diff --git a/planet/config.py b/planet/config.py index 669dd68..afae785 100644 --- a/planet/config.py +++ b/planet/config.py @@ -107,6 +107,7 @@ def __init__(): define_planet('spider_threads', 0) define_planet_int('feed_timeout', 20) + define_planet_int('cache_keep_entries', 10) define_planet_list('template_files') define_planet_list('bill_of_materials') diff --git a/planet/expunge.py b/planet/expunge.py new file mode 100644 index 0000000..ba6d733 --- /dev/null +++ b/planet/expunge.py @@ -0,0 +1,68 @@ +""" Expunge old entries from a cache of entries """ +import glob, os, planet, config, feedparser +from xml.dom import minidom +from spider import filename + +def expungeCache(): + """ Expunge old entries from a cache of entries """ + import planet + log = planet.getLogger(config.log_level(),config.log_format()) + + log.info("Determining feed subscriptions") + entry_count = {} + sources = config.cache_sources_directory() + for sub in config.subscriptions(): + data=feedparser.parse(filename(sources,sub)) + if not data.feed.has_key('id'): continue + if config.feed_options(sub).has_key('cache_keep_entries'): + entry_count[data.feed.id] = config.feed_options(sub)['cache_keep_entries'] + else: + entry_count[data.feed.id] = config.cache_keep_entries() + + log.info("Listing cached entries") + cache = config.cache_directory() + dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*") + if not os.path.isdir(file)] + dir.sort() + dir.reverse() + + for mtime,file in dir: + + try: + entry=minidom.parse(file) + # determine source of entry + entry.normalize() + sources = entry.getElementsByTagName('source') + if not sources: + # no source determined, do not delete + log.debug("No source found for %s", file) + continue + ids = sources[0].getElementsByTagName('id') + if not ids: + # feed id not found, do not delete + log.debug("No source feed id found for %s", file) + continue + if ids[0].childNodes[0].nodeValue in entry_count: + # subscribed to feed, update entry count + entry_count[ids[0].childNodes[0].nodeValue] = entry_count[ + ids[0].childNodes[0].nodeValue] - 1 + if entry_count[ids[0].childNodes[0].nodeValue] >= 0: + # maximum not reached, do not delete + log.debug("Maximum not reached for %s from %s", + file, ids[0].childNodes[0].nodeValue) + continue + else: + # maximum reached + log.debug("Removing %s, maximum reached for %s", + file, ids[0].childNodes[0].nodeValue) + else: + # not subscribed + log.debug("Removing %s, not subscribed to %s", + file, ids[0].childNodes[0].nodeValue) + # remove old entry + #os.unlink(file) + + except: + log.error("Error parsing %s", file) + +# end of expungeCache() diff --git a/tests/data/expunge/config.ini b/tests/data/expunge/config.ini new file mode 100644 index 0000000..8412250 --- /dev/null +++ b/tests/data/expunge/config.ini @@ -0,0 +1,20 @@ +[Planet] +name = test planet +cache_directory = tests/work/expunge/cache +cache_keep_entries = 1 + +[tests/data/expunge/testfeed1.atom] +name = no source + +[tests/data/expunge/testfeed2.atom] +name = no source id + +[tests/data/expunge/testfeed3.atom] +name = global setting + +[tests/data/expunge/testfeed4.atom] +name = local setting +cache_keep_entries = 2 + +#[tests/data/expunge/testfeed5.atom] +#name = unsubbed diff --git a/tests/data/expunge/test1.entry b/tests/data/expunge/test1.entry new file mode 100644 index 0000000..1ef50ac --- /dev/null +++ b/tests/data/expunge/test1.entry @@ -0,0 +1,8 @@ + + + tag:bzr.mfd-consult.dk,2007:venus-expunge-test1/1 + + Test 1/1 + Entry with missing source + 2007-03-01T01:01:00Z + \ No newline at end of file diff --git a/tests/data/expunge/test2.entry b/tests/data/expunge/test2.entry new file mode 100644 index 0000000..b07ae59 --- /dev/null +++ b/tests/data/expunge/test2.entry @@ -0,0 +1,11 @@ + + + tag:bzr.mfd-consult.dk,2007:venus-expunge-test2/1 + + Test 2/1 + Entry with missing source id + 2007-03-01T02:01:00Z + + Test 2/1 source + + \ No newline at end of file diff --git a/tests/data/expunge/test3a.entry b/tests/data/expunge/test3a.entry new file mode 100644 index 0000000..af85d83 --- /dev/null +++ b/tests/data/expunge/test3a.entry @@ -0,0 +1,12 @@ + + + tag:bzr.mfd-consult.dk,2007:venus-expunge-test3/1 + + Test 3/1 + Entry for global setting 1 + 2007-03-01T03:01:00Z + + tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3 + Test 3 source + + \ No newline at end of file diff --git a/tests/data/expunge/test3b.entry b/tests/data/expunge/test3b.entry new file mode 100644 index 0000000..195cadd --- /dev/null +++ b/tests/data/expunge/test3b.entry @@ -0,0 +1,12 @@ + + + tag:bzr.mfd-consult.dk,2007:venus-expunge-test3/2 + + Test 3/2 + Entry for global setting 2 + 2007-03-01T03:02:00Z + + tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3 + Test 3 source + + \ No newline at end of file diff --git a/tests/data/expunge/test3c.entry b/tests/data/expunge/test3c.entry new file mode 100644 index 0000000..0f33eb0 --- /dev/null +++ b/tests/data/expunge/test3c.entry @@ -0,0 +1,12 @@ + + + tag:bzr.mfd-consult.dk,2007:venus-expunge-test3/3 + + Test 3/3 + Entry for global setting 3 + 2007-03-01T03:03:00Z + + tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3 + Test 3 source + + \ No newline at end of file diff --git a/tests/data/expunge/test4a.entry b/tests/data/expunge/test4a.entry new file mode 100644 index 0000000..744a83c --- /dev/null +++ b/tests/data/expunge/test4a.entry @@ -0,0 +1,12 @@ + + + tag:bzr.mfd-consult.dk,2007:venus-expunge-test4/1 + + Test 4/1 + Entry for local setting 1 + 2007-03-01T04:01:00Z + + tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4 + Test 4 source + + \ No newline at end of file diff --git a/tests/data/expunge/test4b.entry b/tests/data/expunge/test4b.entry new file mode 100644 index 0000000..4dcb6ba --- /dev/null +++ b/tests/data/expunge/test4b.entry @@ -0,0 +1,12 @@ + + + tag:bzr.mfd-consult.dk,2007:venus-expunge-test4/2 + + Test 4/2 + Entry for local setting 2 + 2007-03-01T04:02:00Z + + tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4 + Test 4 source + + \ No newline at end of file diff --git a/tests/data/expunge/test4c.entry b/tests/data/expunge/test4c.entry new file mode 100644 index 0000000..02fb184 --- /dev/null +++ b/tests/data/expunge/test4c.entry @@ -0,0 +1,12 @@ + + + tag:bzr.mfd-consult.dk,2007:venus-expunge-test4/3 + + Test 4/3 + Entry for local setting 3 + 2007-03-01T04:03:00Z + + tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4 + Test 4 source + + \ No newline at end of file diff --git a/tests/data/expunge/test5.entry b/tests/data/expunge/test5.entry new file mode 100644 index 0000000..96d338c --- /dev/null +++ b/tests/data/expunge/test5.entry @@ -0,0 +1,12 @@ + + + tag:bzr.mfd-consult.dk,2007:venus-expunge-test5/1 + + Test 5/1 + Entry from unsubbed feed + 2007-03-01T05:01:00Z + + tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed5 + Test 5 source + + \ No newline at end of file diff --git a/tests/data/expunge/testfeed1.atom b/tests/data/expunge/testfeed1.atom new file mode 100644 index 0000000..455803e --- /dev/null +++ b/tests/data/expunge/testfeed1.atom @@ -0,0 +1,5 @@ + + + + tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed1 + \ No newline at end of file diff --git a/tests/data/expunge/testfeed2.atom b/tests/data/expunge/testfeed2.atom new file mode 100644 index 0000000..58ae023 --- /dev/null +++ b/tests/data/expunge/testfeed2.atom @@ -0,0 +1,5 @@ + + + + tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed2 + \ No newline at end of file diff --git a/tests/data/expunge/testfeed3.atom b/tests/data/expunge/testfeed3.atom new file mode 100644 index 0000000..a8c111e --- /dev/null +++ b/tests/data/expunge/testfeed3.atom @@ -0,0 +1,5 @@ + + + + tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3 + \ No newline at end of file diff --git a/tests/data/expunge/testfeed4.atom b/tests/data/expunge/testfeed4.atom new file mode 100644 index 0000000..10fb50d --- /dev/null +++ b/tests/data/expunge/testfeed4.atom @@ -0,0 +1,5 @@ + + + + tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4 + \ No newline at end of file diff --git a/tests/test_expunge.py b/tests/test_expunge.py new file mode 100644 index 0000000..63945c7 --- /dev/null +++ b/tests/test_expunge.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python + +#import unittest, os, glob, calendar, shutil, time +#from planet.spider import filename, spiderPlanet, writeCache +#from planet import feedparser, config +#import planet + +workdir = 'tests/work/expunge/cache' +testfeed = 'tests/data/expunge/testfeed%s.atom' +configfile = 'tests/data/expunge/config.ini' + +class ExpungeTest(unittest.TestCase): + def setUp(self): + # silence errors + planet.logger = None + planet.getLogger('CRITICAL',None) + + try: + os.makedirs(workdir) + except: + self.tearDown() + os.makedirs(workdir) + + def tearDown(self): + shutil.rmtree(workdir) + os.removedirs(os.path.split(workdir)[0]) + + def test_filename(self): + self.assertEqual(os.path.join('.', 'example.com,index.html'), + filename('.', 'http://example.com/index.html')) + self.assertEqual(os.path.join('.', + 'planet.intertwingly.net,2006,testfeed1,1'), + filename('.', u'tag:planet.intertwingly.net,2006:testfeed1,1')) + self.assertEqual(os.path.join('.', + '00000000-0000-0000-0000-000000000000'), + filename('.', u'urn:uuid:00000000-0000-0000-0000-000000000000')) + + # Requires Python 2.3 + try: + import encodings.idna + except: + return + self.assertEqual(os.path.join('.', 'xn--8ws00zhy3a.com'), + filename('.', u'http://www.\u8a79\u59c6\u65af.com/')) + + def spiderFeed(self, feed_uri): + feed_info = feedparser.parse('') + data = feedparser.parse(feed_uri) + writeCache(feed_uri, feed_info, data) + + def verify_spiderFeed(self): + files = glob.glob(workdir+"/*") + files.sort() + + # verify that exactly four files + one sources dir were produced + self.assertEqual(5, len(files)) + + # verify that the file names are as expected + self.assertTrue(os.path.join(workdir, + 'planet.intertwingly.net,2006,testfeed1,1') in files) + + # verify that the file timestamps match atom:updated + data = feedparser.parse(files[2]) + self.assertEqual(['application/atom+xml'], [link.type + for link in data.entries[0].source.links if link.rel=='self']) + self.assertEqual('one', data.entries[0].source.planet_name) + self.assertEqual('2006-01-03T00:00:00Z', data.entries[0].updated) + self.assertEqual(os.stat(files[2]).st_mtime, + calendar.timegm(data.entries[0].updated_parsed)) + + def test_spiderFeed(self): + config.load(configfile) + self.spiderFeed(testfeed % '1b') + self.verify_spiderFeed() + + def test_spiderUpdate(self): + config.load(configfile) + self.spiderFeed(testfeed % '1a') + self.spiderFeed(testfeed % '1b') + self.verify_spiderFeed() + + def verify_spiderPlanet(self): + files = glob.glob(workdir+"/*") + + # verify that exactly eight files + 1 source dir were produced + self.assertEqual(14, len(files)) + + # verify that the file names are as expected + self.assertTrue(os.path.join(workdir, + 'planet.intertwingly.net,2006,testfeed1,1') in files) + self.assertTrue(os.path.join(workdir, + 'planet.intertwingly.net,2006,testfeed2,1') in files) + + data = feedparser.parse(workdir + + '/planet.intertwingly.net,2006,testfeed3,1') + self.assertEqual(['application/rss+xml'], [link.type + for link in data.entries[0].source.links if link.rel=='self']) + self.assertEqual('three', data.entries[0].source.author_detail.name) + self.assertEqual('three', data.entries[0].source['planet_css-id']) + + def test_spiderPlanet(self): + config.load(configfile) + spiderPlanet() + self.verify_spiderPlanet() + + def test_spiderThreads(self): + config.load(configfile.replace('config','threaded')) + _PORT = config.parser.getint('Planet','test_port') + + log = [] + from SimpleHTTPServer import SimpleHTTPRequestHandler + class TestRequestHandler(SimpleHTTPRequestHandler): + def log_message(self, format, *args): + log.append(args) + + from threading import Thread + class TestServerThread(Thread): + def __init__(self): + self.ready = 0 + self.done = 0 + Thread.__init__(self) + def run(self): + from BaseHTTPServer import HTTPServer + httpd = HTTPServer(('',_PORT), TestRequestHandler) + self.ready = 1 + while not self.done: + httpd.handle_request() + + httpd = TestServerThread() + httpd.start() + while not httpd.ready: + time.sleep(0.1) + + try: + spiderPlanet() + finally: + httpd.done = 1 + import urllib + urllib.urlopen('http://127.0.0.1:%d/' % _PORT).read() + + status = [int(rec[1]) for rec in log if str(rec[0]).startswith('GET ')] + status.sort() + self.assertEqual([200,200,200,200,404], status) + + self.verify_spiderPlanet() From 806b0ee53c5be19442e5037de84d503373fd6ee3 Mon Sep 17 00:00:00 2001 From: Morten Frederiksen Date: Sun, 4 Mar 2007 14:07:16 +0100 Subject: [PATCH 2/2] Updated expunge test cases to pass... --- planet/expunge.py | 4 +- tests/data/expunge/config.ini | 10 +- tests/test_expunge.py | 174 +++++++++++----------------------- 3 files changed, 63 insertions(+), 125 deletions(-) diff --git a/planet/expunge.py b/planet/expunge.py index ba6d733..9f890b9 100644 --- a/planet/expunge.py +++ b/planet/expunge.py @@ -15,7 +15,7 @@ def expungeCache(): data=feedparser.parse(filename(sources,sub)) if not data.feed.has_key('id'): continue if config.feed_options(sub).has_key('cache_keep_entries'): - entry_count[data.feed.id] = config.feed_options(sub)['cache_keep_entries'] + entry_count[data.feed.id] = int(config.feed_options(sub)['cache_keep_entries']) else: entry_count[data.feed.id] = config.cache_keep_entries() @@ -60,7 +60,7 @@ def expungeCache(): log.debug("Removing %s, not subscribed to %s", file, ids[0].childNodes[0].nodeValue) # remove old entry - #os.unlink(file) + os.unlink(file) except: log.error("Error parsing %s", file) diff --git a/tests/data/expunge/config.ini b/tests/data/expunge/config.ini index 8412250..ff750e0 100644 --- a/tests/data/expunge/config.ini +++ b/tests/data/expunge/config.ini @@ -3,18 +3,18 @@ name = test planet cache_directory = tests/work/expunge/cache cache_keep_entries = 1 -[tests/data/expunge/testfeed1.atom] +[tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed1] name = no source -[tests/data/expunge/testfeed2.atom] +[tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed2] name = no source id -[tests/data/expunge/testfeed3.atom] +[tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3] name = global setting -[tests/data/expunge/testfeed4.atom] +[tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4] name = local setting cache_keep_entries = 2 -#[tests/data/expunge/testfeed5.atom] +#[tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed5] #name = unsubbed diff --git a/tests/test_expunge.py b/tests/test_expunge.py index 63945c7..fc93b52 100644 --- a/tests/test_expunge.py +++ b/tests/test_expunge.py @@ -1,12 +1,15 @@ #!/usr/bin/env python - -#import unittest, os, glob, calendar, shutil, time -#from planet.spider import filename, spiderPlanet, writeCache -#from planet import feedparser, config -#import planet +import unittest, os, glob, shutil, time +from planet.spider import filename +from planet import feedparser, config +from planet.expunge import expungeCache +from xml.dom import minidom +import planet workdir = 'tests/work/expunge/cache' -testfeed = 'tests/data/expunge/testfeed%s.atom' +sourcesdir = 'tests/work/expunge/cache/sources' +testentries = 'tests/data/expunge/test*.entry' +testfeeds = 'tests/data/expunge/test*.atom' configfile = 'tests/data/expunge/config.ini' class ExpungeTest(unittest.TestCase): @@ -16,130 +19,65 @@ class ExpungeTest(unittest.TestCase): planet.getLogger('CRITICAL',None) try: - os.makedirs(workdir) + os.makedirs(workdir) + os.makedirs(sourcesdir) except: - self.tearDown() - os.makedirs(workdir) - + self.tearDown() + os.makedirs(workdir) + os.makedirs(sourcesdir) + def tearDown(self): shutil.rmtree(workdir) os.removedirs(os.path.split(workdir)[0]) - def test_filename(self): - self.assertEqual(os.path.join('.', 'example.com,index.html'), - filename('.', 'http://example.com/index.html')) - self.assertEqual(os.path.join('.', - 'planet.intertwingly.net,2006,testfeed1,1'), - filename('.', u'tag:planet.intertwingly.net,2006:testfeed1,1')) - self.assertEqual(os.path.join('.', - '00000000-0000-0000-0000-000000000000'), - filename('.', u'urn:uuid:00000000-0000-0000-0000-000000000000')) + def test_expunge(self): + config.load(configfile) - # Requires Python 2.3 - try: - import encodings.idna - except: - return - self.assertEqual(os.path.join('.', 'xn--8ws00zhy3a.com'), - filename('.', u'http://www.\u8a79\u59c6\u65af.com/')) + # create test entries in cache with correct timestamp + for entry in glob.glob(testentries): + e=minidom.parse(entry) + e.normalize() + eid = e.getElementsByTagName('id') + efile = filename(workdir, eid[0].childNodes[0].nodeValue) + eupdated = e.getElementsByTagName('updated')[0].childNodes[0].nodeValue + emtime = time.mktime(feedparser._parse_date_w3dtf(eupdated)) + if not eid or not eupdated: continue + shutil.copyfile(entry, efile) + os.utime(efile, (emtime, emtime)) + + # create test feeds in cache + sources = config.cache_sources_directory() + for feed in glob.glob(testfeeds): + f=minidom.parse(feed) + f.normalize() + fid = f.getElementsByTagName('id') + if not fid: continue + ffile = filename(sources, fid[0].childNodes[0].nodeValue) + shutil.copyfile(feed, ffile) - def spiderFeed(self, feed_uri): - feed_info = feedparser.parse('') - data = feedparser.parse(feed_uri) - writeCache(feed_uri, feed_info, data) - - def verify_spiderFeed(self): + # verify that exactly nine entries + one source dir were produced files = glob.glob(workdir+"/*") - files.sort() + self.assertEqual(10, len(files)) - # verify that exactly four files + one sources dir were produced - self.assertEqual(5, len(files)) + # verify that exactly four feeds were produced in source dir + files = glob.glob(sources+"/*") + self.assertEqual(4, len(files)) - # verify that the file names are as expected - self.assertTrue(os.path.join(workdir, - 'planet.intertwingly.net,2006,testfeed1,1') in files) + # expunge... + expungeCache() - # verify that the file timestamps match atom:updated - data = feedparser.parse(files[2]) - self.assertEqual(['application/atom+xml'], [link.type - for link in data.entries[0].source.links if link.rel=='self']) - self.assertEqual('one', data.entries[0].source.planet_name) - self.assertEqual('2006-01-03T00:00:00Z', data.entries[0].updated) - self.assertEqual(os.stat(files[2]).st_mtime, - calendar.timegm(data.entries[0].updated_parsed)) - - def test_spiderFeed(self): - config.load(configfile) - self.spiderFeed(testfeed % '1b') - self.verify_spiderFeed() - - def test_spiderUpdate(self): - config.load(configfile) - self.spiderFeed(testfeed % '1a') - self.spiderFeed(testfeed % '1b') - self.verify_spiderFeed() - - def verify_spiderPlanet(self): + # verify that five entries and one source dir are left files = glob.glob(workdir+"/*") + self.assertEqual(6, len(files)) - # verify that exactly eight files + 1 source dir were produced - self.assertEqual(14, len(files)) - - # verify that the file names are as expected + # verify that the right five entries are left self.assertTrue(os.path.join(workdir, - 'planet.intertwingly.net,2006,testfeed1,1') in files) + 'bzr.mfd-consult.dk,2007,venus-expunge-test1,1') in files) self.assertTrue(os.path.join(workdir, - 'planet.intertwingly.net,2006,testfeed2,1') in files) - - data = feedparser.parse(workdir + - '/planet.intertwingly.net,2006,testfeed3,1') - self.assertEqual(['application/rss+xml'], [link.type - for link in data.entries[0].source.links if link.rel=='self']) - self.assertEqual('three', data.entries[0].source.author_detail.name) - self.assertEqual('three', data.entries[0].source['planet_css-id']) - - def test_spiderPlanet(self): - config.load(configfile) - spiderPlanet() - self.verify_spiderPlanet() - - def test_spiderThreads(self): - config.load(configfile.replace('config','threaded')) - _PORT = config.parser.getint('Planet','test_port') - - log = [] - from SimpleHTTPServer import SimpleHTTPRequestHandler - class TestRequestHandler(SimpleHTTPRequestHandler): - def log_message(self, format, *args): - log.append(args) - - from threading import Thread - class TestServerThread(Thread): - def __init__(self): - self.ready = 0 - self.done = 0 - Thread.__init__(self) - def run(self): - from BaseHTTPServer import HTTPServer - httpd = HTTPServer(('',_PORT), TestRequestHandler) - self.ready = 1 - while not self.done: - httpd.handle_request() - - httpd = TestServerThread() - httpd.start() - while not httpd.ready: - time.sleep(0.1) - - try: - spiderPlanet() - finally: - httpd.done = 1 - import urllib - urllib.urlopen('http://127.0.0.1:%d/' % _PORT).read() - - status = [int(rec[1]) for rec in log if str(rec[0]).startswith('GET ')] - status.sort() - self.assertEqual([200,200,200,200,404], status) - - self.verify_spiderPlanet() + 'bzr.mfd-consult.dk,2007,venus-expunge-test2,1') in files) + self.assertTrue(os.path.join(workdir, + 'bzr.mfd-consult.dk,2007,venus-expunge-test3,3') in files) + self.assertTrue(os.path.join(workdir, + 'bzr.mfd-consult.dk,2007,venus-expunge-test4,2') in files) + self.assertTrue(os.path.join(workdir, + 'bzr.mfd-consult.dk,2007,venus-expunge-test4,3') in files)