Add an "only-if-new" option to planet

This commit is contained in:
Sam Ruby 2006-10-19 11:55:33 -04:00
parent 31bcbd26a2
commit 7310e14ee5
2 changed files with 9 additions and 4 deletions

View File

@ -20,6 +20,7 @@ if __name__ == "__main__":
config_file = "config.ini" config_file = "config.ini"
offline = 0 offline = 0
verbose = 0 verbose = 0
only_if_new = 0
for arg in sys.argv[1:]: for arg in sys.argv[1:]:
if arg == "-h" or arg == "--help": if arg == "-h" or arg == "--help":
@ -29,12 +30,15 @@ if __name__ == "__main__":
print " -v, --verbose DEBUG level logging during update" print " -v, --verbose DEBUG level logging during update"
print " -o, --offline Update the Planet from the cache only" print " -o, --offline Update the Planet from the cache only"
print " -h, --help Display this help message and exit" print " -h, --help Display this help message and exit"
print " -n, --only-if-new Only spider new feeds"
print print
sys.exit(0) sys.exit(0)
elif arg == "-v" or arg == "--verbose": elif arg == "-v" or arg == "--verbose":
verbose = 1 verbose = 1
elif arg == "-o" or arg == "--offline": elif arg == "-o" or arg == "--offline":
offline = 1 offline = 1
elif arg == "-n" or arg == "--only-if-new":
only_if_new = 1
elif arg.startswith("-"): elif arg.startswith("-"):
print >>sys.stderr, "Unknown option:", arg print >>sys.stderr, "Unknown option:", arg
sys.exit(1) sys.exit(1)
@ -50,7 +54,7 @@ if __name__ == "__main__":
if not offline: if not offline:
from planet import spider from planet import spider
spider.spiderPlanet() spider.spiderPlanet(only_if_new=only_if_new)
from planet import splice from planet import splice
doc = splice.splice() doc = splice.splice()

View File

@ -109,7 +109,7 @@ def scrub(feed, data):
source.author_detail['name'] = \ source.author_detail['name'] = \
str(stripHtml(source.author_detail.name)) str(stripHtml(source.author_detail.name))
def spiderFeed(feed): def spiderFeed(feed, only_if_new=0):
""" Spider (fetch) a single feed """ """ Spider (fetch) a single feed """
log = planet.logger log = planet.logger
@ -119,6 +119,7 @@ def spiderFeed(feed):
os.makedirs(sources, 0700) os.makedirs(sources, 0700)
feed_source = filename(sources, feed) feed_source = filename(sources, feed)
feed_info = feedparser.parse(feed_source) feed_info = feedparser.parse(feed_source)
if feed_info.feed and only_if_new: return
if feed_info.feed.get('planet_http_status',None) == '410': return if feed_info.feed.get('planet_http_status',None) == '410': return
# read feed itself # read feed itself
@ -302,7 +303,7 @@ def spiderFeed(feed):
write(xdoc.toxml('utf-8'), filename(sources, feed)) write(xdoc.toxml('utf-8'), filename(sources, feed))
xdoc.unlink() xdoc.unlink()
def spiderPlanet(): def spiderPlanet(only_if_new = False):
""" Spider (fetch) an entire planet """ """ Spider (fetch) an entire planet """
log = planet.getLogger(config.log_level()) log = planet.getLogger(config.log_level())
planet.setTimeout(config.feed_timeout()) planet.setTimeout(config.feed_timeout())
@ -312,7 +313,7 @@ def spiderPlanet():
for feed in config.subscriptions(): for feed in config.subscriptions():
try: try:
spiderFeed(feed) spiderFeed(feed, only_if_new=only_if_new)
except Exception,e: except Exception,e:
import sys, traceback import sys, traceback
type, value, tb = sys.exc_info() type, value, tb = sys.exc_info()