68 lines
2.6 KiB
Python
68 lines
2.6 KiB
Python
""" Expunge old entries from a cache of entries """
|
|
import glob, os, planet, config, feedparser
|
|
from xml.dom import minidom
|
|
from spider import filename
|
|
|
|
def expungeCache():
|
|
""" Expunge old entries from a cache of entries """
|
|
log = planet.logger
|
|
|
|
log.info("Determining feed subscriptions")
|
|
entry_count = {}
|
|
sources = config.cache_sources_directory()
|
|
for sub in config.subscriptions():
|
|
data=feedparser.parse(filename(sources,sub))
|
|
if not data.feed.has_key('id'): continue
|
|
if config.feed_options(sub).has_key('cache_keep_entries'):
|
|
entry_count[data.feed.id] = int(config.feed_options(sub)['cache_keep_entries'])
|
|
else:
|
|
entry_count[data.feed.id] = config.cache_keep_entries()
|
|
|
|
log.info("Listing cached entries")
|
|
cache = config.cache_directory()
|
|
dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*")
|
|
if not os.path.isdir(file)]
|
|
dir.sort()
|
|
dir.reverse()
|
|
|
|
for mtime,file in dir:
|
|
|
|
try:
|
|
entry=minidom.parse(file)
|
|
# determine source of entry
|
|
entry.normalize()
|
|
sources = entry.getElementsByTagName('source')
|
|
if not sources:
|
|
# no source determined, do not delete
|
|
log.debug("No source found for %s", file)
|
|
continue
|
|
ids = sources[0].getElementsByTagName('id')
|
|
if not ids:
|
|
# feed id not found, do not delete
|
|
log.debug("No source feed id found for %s", file)
|
|
continue
|
|
if ids[0].childNodes[0].nodeValue in entry_count:
|
|
# subscribed to feed, update entry count
|
|
entry_count[ids[0].childNodes[0].nodeValue] = entry_count[
|
|
ids[0].childNodes[0].nodeValue] - 1
|
|
if entry_count[ids[0].childNodes[0].nodeValue] >= 0:
|
|
# maximum not reached, do not delete
|
|
log.debug("Maximum not reached for %s from %s",
|
|
file, ids[0].childNodes[0].nodeValue)
|
|
continue
|
|
else:
|
|
# maximum reached
|
|
log.debug("Removing %s, maximum reached for %s",
|
|
file, ids[0].childNodes[0].nodeValue)
|
|
else:
|
|
# not subscribed
|
|
log.debug("Removing %s, not subscribed to %s",
|
|
file, ids[0].childNodes[0].nodeValue)
|
|
# remove old entry
|
|
os.unlink(file)
|
|
|
|
except:
|
|
log.error("Error parsing %s", file)
|
|
|
|
# end of expungeCache()
|