Added expunge and preliminary test cases
This commit is contained in:
parent
567eb644b8
commit
a51d09ec07
2
THANKS
2
THANKS
@ -9,7 +9,7 @@ Harry Fuecks - Pipe characters in file names, filter bug
|
|||||||
Eric van der Vlist - Filters to add language, category information
|
Eric van der Vlist - Filters to add language, category information
|
||||||
Chris Dolan - mkdir cache; default template_dirs; fix xsltproc
|
Chris Dolan - mkdir cache; default template_dirs; fix xsltproc
|
||||||
David Sifry - rss 2.0 xslt template based on http://atom.geekhood.net/
|
David Sifry - rss 2.0 xslt template based on http://atom.geekhood.net/
|
||||||
Morten Fredericksen - Support WordPress LinkManager OPML
|
Morten Frederiksen - Support WordPress LinkManager OPML
|
||||||
Harry Fuecks - default item date to feed date
|
Harry Fuecks - default item date to feed date
|
||||||
Antonio Cavedoni - Django templates
|
Antonio Cavedoni - Django templates
|
||||||
|
|
||||||
|
@ -111,6 +111,10 @@ no threads are used and spidering follows the traditional algorithm.</dd>
|
|||||||
directory to be used for an additional HTTP cache to front end the Venus
|
directory to be used for an additional HTTP cache to front end the Venus
|
||||||
cache. If specified as a relative path, it is evaluated relative to the
|
cache. If specified as a relative path, it is evaluated relative to the
|
||||||
<code>cache_directory</code>.</dd>
|
<code>cache_directory</code>.</dd>
|
||||||
|
<dt><ins>cache_keep_entries</ins></dt>
|
||||||
|
<dd>Used by <code>expunge</code> to determine how many entries should be
|
||||||
|
kept for each source when expunging old entries from the cache directory.
|
||||||
|
This may be overriden on a per subscription feed basis.</dd>
|
||||||
</dl>
|
</dl>
|
||||||
<p>Additional options can be found in
|
<p>Additional options can be found in
|
||||||
<a href="normalization.html#overrides">normalization level overrides</a>.</p>
|
<a href="normalization.html#overrides">normalization level overrides</a>.</p>
|
||||||
|
17
expunge.py
Normal file
17
expunge.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
"""
|
||||||
|
Main program to run just the expunge portion of planet
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os.path
|
||||||
|
import sys
|
||||||
|
from planet import expunge, config
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
if len(sys.argv) == 2 and os.path.isfile(sys.argv[1]):
|
||||||
|
config.load(sys.argv[1])
|
||||||
|
expunge.expungeCache()
|
||||||
|
else:
|
||||||
|
print "Usage:"
|
||||||
|
print " python %s config.ini" % sys.argv[0]
|
@ -21,6 +21,7 @@ if __name__ == "__main__":
|
|||||||
offline = 0
|
offline = 0
|
||||||
verbose = 0
|
verbose = 0
|
||||||
only_if_new = 0
|
only_if_new = 0
|
||||||
|
expunge = 0
|
||||||
|
|
||||||
for arg in sys.argv[1:]:
|
for arg in sys.argv[1:]:
|
||||||
if arg == "-h" or arg == "--help":
|
if arg == "-h" or arg == "--help":
|
||||||
@ -31,6 +32,7 @@ if __name__ == "__main__":
|
|||||||
print " -o, --offline Update the Planet from the cache only"
|
print " -o, --offline Update the Planet from the cache only"
|
||||||
print " -h, --help Display this help message and exit"
|
print " -h, --help Display this help message and exit"
|
||||||
print " -n, --only-if-new Only spider new feeds"
|
print " -n, --only-if-new Only spider new feeds"
|
||||||
|
print " -x, --expunge Expunge old entries from cache"
|
||||||
print
|
print
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
elif arg == "-v" or arg == "--verbose":
|
elif arg == "-v" or arg == "--verbose":
|
||||||
@ -39,6 +41,8 @@ if __name__ == "__main__":
|
|||||||
offline = 1
|
offline = 1
|
||||||
elif arg == "-n" or arg == "--only-if-new":
|
elif arg == "-n" or arg == "--only-if-new":
|
||||||
only_if_new = 1
|
only_if_new = 1
|
||||||
|
elif arg == "-x" or arg == "--expunge":
|
||||||
|
expunge = 1
|
||||||
elif arg.startswith("-"):
|
elif arg.startswith("-"):
|
||||||
print >>sys.stderr, "Unknown option:", arg
|
print >>sys.stderr, "Unknown option:", arg
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
@ -62,3 +66,7 @@ if __name__ == "__main__":
|
|||||||
from planet import splice
|
from planet import splice
|
||||||
doc = splice.splice()
|
doc = splice.splice()
|
||||||
splice.apply(doc.toxml('utf-8'))
|
splice.apply(doc.toxml('utf-8'))
|
||||||
|
|
||||||
|
if expunge:
|
||||||
|
from planet import expunge
|
||||||
|
expunge.expungeCache
|
||||||
|
@ -107,6 +107,7 @@ def __init__():
|
|||||||
define_planet('spider_threads', 0)
|
define_planet('spider_threads', 0)
|
||||||
|
|
||||||
define_planet_int('feed_timeout', 20)
|
define_planet_int('feed_timeout', 20)
|
||||||
|
define_planet_int('cache_keep_entries', 10)
|
||||||
|
|
||||||
define_planet_list('template_files')
|
define_planet_list('template_files')
|
||||||
define_planet_list('bill_of_materials')
|
define_planet_list('bill_of_materials')
|
||||||
|
68
planet/expunge.py
Normal file
68
planet/expunge.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
""" Expunge old entries from a cache of entries """
|
||||||
|
import glob, os, planet, config, feedparser
|
||||||
|
from xml.dom import minidom
|
||||||
|
from spider import filename
|
||||||
|
|
||||||
|
def expungeCache():
|
||||||
|
""" Expunge old entries from a cache of entries """
|
||||||
|
import planet
|
||||||
|
log = planet.getLogger(config.log_level(),config.log_format())
|
||||||
|
|
||||||
|
log.info("Determining feed subscriptions")
|
||||||
|
entry_count = {}
|
||||||
|
sources = config.cache_sources_directory()
|
||||||
|
for sub in config.subscriptions():
|
||||||
|
data=feedparser.parse(filename(sources,sub))
|
||||||
|
if not data.feed.has_key('id'): continue
|
||||||
|
if config.feed_options(sub).has_key('cache_keep_entries'):
|
||||||
|
entry_count[data.feed.id] = config.feed_options(sub)['cache_keep_entries']
|
||||||
|
else:
|
||||||
|
entry_count[data.feed.id] = config.cache_keep_entries()
|
||||||
|
|
||||||
|
log.info("Listing cached entries")
|
||||||
|
cache = config.cache_directory()
|
||||||
|
dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*")
|
||||||
|
if not os.path.isdir(file)]
|
||||||
|
dir.sort()
|
||||||
|
dir.reverse()
|
||||||
|
|
||||||
|
for mtime,file in dir:
|
||||||
|
|
||||||
|
try:
|
||||||
|
entry=minidom.parse(file)
|
||||||
|
# determine source of entry
|
||||||
|
entry.normalize()
|
||||||
|
sources = entry.getElementsByTagName('source')
|
||||||
|
if not sources:
|
||||||
|
# no source determined, do not delete
|
||||||
|
log.debug("No source found for %s", file)
|
||||||
|
continue
|
||||||
|
ids = sources[0].getElementsByTagName('id')
|
||||||
|
if not ids:
|
||||||
|
# feed id not found, do not delete
|
||||||
|
log.debug("No source feed id found for %s", file)
|
||||||
|
continue
|
||||||
|
if ids[0].childNodes[0].nodeValue in entry_count:
|
||||||
|
# subscribed to feed, update entry count
|
||||||
|
entry_count[ids[0].childNodes[0].nodeValue] = entry_count[
|
||||||
|
ids[0].childNodes[0].nodeValue] - 1
|
||||||
|
if entry_count[ids[0].childNodes[0].nodeValue] >= 0:
|
||||||
|
# maximum not reached, do not delete
|
||||||
|
log.debug("Maximum not reached for %s from %s",
|
||||||
|
file, ids[0].childNodes[0].nodeValue)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# maximum reached
|
||||||
|
log.debug("Removing %s, maximum reached for %s",
|
||||||
|
file, ids[0].childNodes[0].nodeValue)
|
||||||
|
else:
|
||||||
|
# not subscribed
|
||||||
|
log.debug("Removing %s, not subscribed to %s",
|
||||||
|
file, ids[0].childNodes[0].nodeValue)
|
||||||
|
# remove old entry
|
||||||
|
#os.unlink(file)
|
||||||
|
|
||||||
|
except:
|
||||||
|
log.error("Error parsing %s", file)
|
||||||
|
|
||||||
|
# end of expungeCache()
|
20
tests/data/expunge/config.ini
Normal file
20
tests/data/expunge/config.ini
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
[Planet]
|
||||||
|
name = test planet
|
||||||
|
cache_directory = tests/work/expunge/cache
|
||||||
|
cache_keep_entries = 1
|
||||||
|
|
||||||
|
[tests/data/expunge/testfeed1.atom]
|
||||||
|
name = no source
|
||||||
|
|
||||||
|
[tests/data/expunge/testfeed2.atom]
|
||||||
|
name = no source id
|
||||||
|
|
||||||
|
[tests/data/expunge/testfeed3.atom]
|
||||||
|
name = global setting
|
||||||
|
|
||||||
|
[tests/data/expunge/testfeed4.atom]
|
||||||
|
name = local setting
|
||||||
|
cache_keep_entries = 2
|
||||||
|
|
||||||
|
#[tests/data/expunge/testfeed5.atom]
|
||||||
|
#name = unsubbed
|
8
tests/data/expunge/test1.entry
Normal file
8
tests/data/expunge/test1.entry
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test1/1</id>
|
||||||
|
<link href="http://example.com/1/1"/>
|
||||||
|
<title>Test 1/1</title>
|
||||||
|
<content>Entry with missing source</content>
|
||||||
|
<updated>2007-03-01T01:01:00Z</updated>
|
||||||
|
</entry>
|
11
tests/data/expunge/test2.entry
Normal file
11
tests/data/expunge/test2.entry
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test2/1</id>
|
||||||
|
<link href="http://example.com/2/1"/>
|
||||||
|
<title>Test 2/1</title>
|
||||||
|
<content>Entry with missing source id</content>
|
||||||
|
<updated>2007-03-01T02:01:00Z</updated>
|
||||||
|
<source>
|
||||||
|
<title>Test 2/1 source</title>
|
||||||
|
</source>
|
||||||
|
</entry>
|
12
tests/data/expunge/test3a.entry
Normal file
12
tests/data/expunge/test3a.entry
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test3/1</id>
|
||||||
|
<link href="http://example.com/3/1"/>
|
||||||
|
<title>Test 3/1</title>
|
||||||
|
<content>Entry for global setting 1</content>
|
||||||
|
<updated>2007-03-01T03:01:00Z</updated>
|
||||||
|
<source>
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3</id>
|
||||||
|
<title>Test 3 source</title>
|
||||||
|
</source>
|
||||||
|
</entry>
|
12
tests/data/expunge/test3b.entry
Normal file
12
tests/data/expunge/test3b.entry
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test3/2</id>
|
||||||
|
<link href="http://example.com/3/2"/>
|
||||||
|
<title>Test 3/2</title>
|
||||||
|
<content>Entry for global setting 2</content>
|
||||||
|
<updated>2007-03-01T03:02:00Z</updated>
|
||||||
|
<source>
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3</id>
|
||||||
|
<title>Test 3 source</title>
|
||||||
|
</source>
|
||||||
|
</entry>
|
12
tests/data/expunge/test3c.entry
Normal file
12
tests/data/expunge/test3c.entry
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test3/3</id>
|
||||||
|
<link href="http://example.com/3/3"/>
|
||||||
|
<title>Test 3/3</title>
|
||||||
|
<content>Entry for global setting 3</content>
|
||||||
|
<updated>2007-03-01T03:03:00Z</updated>
|
||||||
|
<source>
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3</id>
|
||||||
|
<title>Test 3 source</title>
|
||||||
|
</source>
|
||||||
|
</entry>
|
12
tests/data/expunge/test4a.entry
Normal file
12
tests/data/expunge/test4a.entry
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test4/1</id>
|
||||||
|
<link href="http://example.com/4/1"/>
|
||||||
|
<title>Test 4/1</title>
|
||||||
|
<content>Entry for local setting 1</content>
|
||||||
|
<updated>2007-03-01T04:01:00Z</updated>
|
||||||
|
<source>
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4</id>
|
||||||
|
<title>Test 4 source</title>
|
||||||
|
</source>
|
||||||
|
</entry>
|
12
tests/data/expunge/test4b.entry
Normal file
12
tests/data/expunge/test4b.entry
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test4/2</id>
|
||||||
|
<link href="http://example.com/4/2"/>
|
||||||
|
<title>Test 4/2</title>
|
||||||
|
<content>Entry for local setting 2</content>
|
||||||
|
<updated>2007-03-01T04:02:00Z</updated>
|
||||||
|
<source>
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4</id>
|
||||||
|
<title>Test 4 source</title>
|
||||||
|
</source>
|
||||||
|
</entry>
|
12
tests/data/expunge/test4c.entry
Normal file
12
tests/data/expunge/test4c.entry
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test4/3</id>
|
||||||
|
<link href="http://example.com/4/3"/>
|
||||||
|
<title>Test 4/3</title>
|
||||||
|
<content>Entry for local setting 3</content>
|
||||||
|
<updated>2007-03-01T04:03:00Z</updated>
|
||||||
|
<source>
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4</id>
|
||||||
|
<title>Test 4 source</title>
|
||||||
|
</source>
|
||||||
|
</entry>
|
12
tests/data/expunge/test5.entry
Normal file
12
tests/data/expunge/test5.entry
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<entry xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-test5/1</id>
|
||||||
|
<link href="http://example.com/5/1"/>
|
||||||
|
<title>Test 5/1</title>
|
||||||
|
<content>Entry from unsubbed feed</content>
|
||||||
|
<updated>2007-03-01T05:01:00Z</updated>
|
||||||
|
<source>
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed5</id>
|
||||||
|
<title>Test 5 source</title>
|
||||||
|
</source>
|
||||||
|
</entry>
|
5
tests/data/expunge/testfeed1.atom
Normal file
5
tests/data/expunge/testfeed1.atom
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<link rel="self" href="http://bzr.mfd-consult.dk/venus/tests/data/expunge/testfeed1.atom"/>
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed1</id>
|
||||||
|
</feed>
|
5
tests/data/expunge/testfeed2.atom
Normal file
5
tests/data/expunge/testfeed2.atom
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<link rel="self" href="http://bzr.mfd-consult.dk/venus/tests/data/expunge/testfeed2.atom"/>
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed2</id>
|
||||||
|
</feed>
|
5
tests/data/expunge/testfeed3.atom
Normal file
5
tests/data/expunge/testfeed3.atom
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<link rel="self" href="http://bzr.mfd-consult.dk/venus/tests/data/expunge/testfeed3.atom"/>
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed3</id>
|
||||||
|
</feed>
|
5
tests/data/expunge/testfeed4.atom
Normal file
5
tests/data/expunge/testfeed4.atom
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<link rel="self" href="http://bzr.mfd-consult.dk/venus/tests/data/expunge/testfeed4.atom"/>
|
||||||
|
<id>tag:bzr.mfd-consult.dk,2007:venus-expunge-testfeed4</id>
|
||||||
|
</feed>
|
145
tests/test_expunge.py
Normal file
145
tests/test_expunge.py
Normal file
@ -0,0 +1,145 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
#import unittest, os, glob, calendar, shutil, time
|
||||||
|
#from planet.spider import filename, spiderPlanet, writeCache
|
||||||
|
#from planet import feedparser, config
|
||||||
|
#import planet
|
||||||
|
|
||||||
|
workdir = 'tests/work/expunge/cache'
|
||||||
|
testfeed = 'tests/data/expunge/testfeed%s.atom'
|
||||||
|
configfile = 'tests/data/expunge/config.ini'
|
||||||
|
|
||||||
|
class ExpungeTest(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
# silence errors
|
||||||
|
planet.logger = None
|
||||||
|
planet.getLogger('CRITICAL',None)
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.makedirs(workdir)
|
||||||
|
except:
|
||||||
|
self.tearDown()
|
||||||
|
os.makedirs(workdir)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
shutil.rmtree(workdir)
|
||||||
|
os.removedirs(os.path.split(workdir)[0])
|
||||||
|
|
||||||
|
def test_filename(self):
|
||||||
|
self.assertEqual(os.path.join('.', 'example.com,index.html'),
|
||||||
|
filename('.', 'http://example.com/index.html'))
|
||||||
|
self.assertEqual(os.path.join('.',
|
||||||
|
'planet.intertwingly.net,2006,testfeed1,1'),
|
||||||
|
filename('.', u'tag:planet.intertwingly.net,2006:testfeed1,1'))
|
||||||
|
self.assertEqual(os.path.join('.',
|
||||||
|
'00000000-0000-0000-0000-000000000000'),
|
||||||
|
filename('.', u'urn:uuid:00000000-0000-0000-0000-000000000000'))
|
||||||
|
|
||||||
|
# Requires Python 2.3
|
||||||
|
try:
|
||||||
|
import encodings.idna
|
||||||
|
except:
|
||||||
|
return
|
||||||
|
self.assertEqual(os.path.join('.', 'xn--8ws00zhy3a.com'),
|
||||||
|
filename('.', u'http://www.\u8a79\u59c6\u65af.com/'))
|
||||||
|
|
||||||
|
def spiderFeed(self, feed_uri):
|
||||||
|
feed_info = feedparser.parse('<feed/>')
|
||||||
|
data = feedparser.parse(feed_uri)
|
||||||
|
writeCache(feed_uri, feed_info, data)
|
||||||
|
|
||||||
|
def verify_spiderFeed(self):
|
||||||
|
files = glob.glob(workdir+"/*")
|
||||||
|
files.sort()
|
||||||
|
|
||||||
|
# verify that exactly four files + one sources dir were produced
|
||||||
|
self.assertEqual(5, len(files))
|
||||||
|
|
||||||
|
# verify that the file names are as expected
|
||||||
|
self.assertTrue(os.path.join(workdir,
|
||||||
|
'planet.intertwingly.net,2006,testfeed1,1') in files)
|
||||||
|
|
||||||
|
# verify that the file timestamps match atom:updated
|
||||||
|
data = feedparser.parse(files[2])
|
||||||
|
self.assertEqual(['application/atom+xml'], [link.type
|
||||||
|
for link in data.entries[0].source.links if link.rel=='self'])
|
||||||
|
self.assertEqual('one', data.entries[0].source.planet_name)
|
||||||
|
self.assertEqual('2006-01-03T00:00:00Z', data.entries[0].updated)
|
||||||
|
self.assertEqual(os.stat(files[2]).st_mtime,
|
||||||
|
calendar.timegm(data.entries[0].updated_parsed))
|
||||||
|
|
||||||
|
def test_spiderFeed(self):
|
||||||
|
config.load(configfile)
|
||||||
|
self.spiderFeed(testfeed % '1b')
|
||||||
|
self.verify_spiderFeed()
|
||||||
|
|
||||||
|
def test_spiderUpdate(self):
|
||||||
|
config.load(configfile)
|
||||||
|
self.spiderFeed(testfeed % '1a')
|
||||||
|
self.spiderFeed(testfeed % '1b')
|
||||||
|
self.verify_spiderFeed()
|
||||||
|
|
||||||
|
def verify_spiderPlanet(self):
|
||||||
|
files = glob.glob(workdir+"/*")
|
||||||
|
|
||||||
|
# verify that exactly eight files + 1 source dir were produced
|
||||||
|
self.assertEqual(14, len(files))
|
||||||
|
|
||||||
|
# verify that the file names are as expected
|
||||||
|
self.assertTrue(os.path.join(workdir,
|
||||||
|
'planet.intertwingly.net,2006,testfeed1,1') in files)
|
||||||
|
self.assertTrue(os.path.join(workdir,
|
||||||
|
'planet.intertwingly.net,2006,testfeed2,1') in files)
|
||||||
|
|
||||||
|
data = feedparser.parse(workdir +
|
||||||
|
'/planet.intertwingly.net,2006,testfeed3,1')
|
||||||
|
self.assertEqual(['application/rss+xml'], [link.type
|
||||||
|
for link in data.entries[0].source.links if link.rel=='self'])
|
||||||
|
self.assertEqual('three', data.entries[0].source.author_detail.name)
|
||||||
|
self.assertEqual('three', data.entries[0].source['planet_css-id'])
|
||||||
|
|
||||||
|
def test_spiderPlanet(self):
|
||||||
|
config.load(configfile)
|
||||||
|
spiderPlanet()
|
||||||
|
self.verify_spiderPlanet()
|
||||||
|
|
||||||
|
def test_spiderThreads(self):
|
||||||
|
config.load(configfile.replace('config','threaded'))
|
||||||
|
_PORT = config.parser.getint('Planet','test_port')
|
||||||
|
|
||||||
|
log = []
|
||||||
|
from SimpleHTTPServer import SimpleHTTPRequestHandler
|
||||||
|
class TestRequestHandler(SimpleHTTPRequestHandler):
|
||||||
|
def log_message(self, format, *args):
|
||||||
|
log.append(args)
|
||||||
|
|
||||||
|
from threading import Thread
|
||||||
|
class TestServerThread(Thread):
|
||||||
|
def __init__(self):
|
||||||
|
self.ready = 0
|
||||||
|
self.done = 0
|
||||||
|
Thread.__init__(self)
|
||||||
|
def run(self):
|
||||||
|
from BaseHTTPServer import HTTPServer
|
||||||
|
httpd = HTTPServer(('',_PORT), TestRequestHandler)
|
||||||
|
self.ready = 1
|
||||||
|
while not self.done:
|
||||||
|
httpd.handle_request()
|
||||||
|
|
||||||
|
httpd = TestServerThread()
|
||||||
|
httpd.start()
|
||||||
|
while not httpd.ready:
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
spiderPlanet()
|
||||||
|
finally:
|
||||||
|
httpd.done = 1
|
||||||
|
import urllib
|
||||||
|
urllib.urlopen('http://127.0.0.1:%d/' % _PORT).read()
|
||||||
|
|
||||||
|
status = [int(rec[1]) for rec in log if str(rec[0]).startswith('GET ')]
|
||||||
|
status.sort()
|
||||||
|
self.assertEqual([200,200,200,200,404], status)
|
||||||
|
|
||||||
|
self.verify_spiderPlanet()
|
Loading…
x
Reference in New Issue
Block a user