From 7310e14ee5f099f8d87b401283782ba2e9430cab Mon Sep 17 00:00:00 2001 From: Sam Ruby Date: Thu, 19 Oct 2006 11:55:33 -0400 Subject: [PATCH] Add an "only-if-new" option to planet --- planet.py | 6 +++++- planet/spider.py | 7 ++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/planet.py b/planet.py index d3346e1..d4d5976 100755 --- a/planet.py +++ b/planet.py @@ -20,6 +20,7 @@ if __name__ == "__main__": config_file = "config.ini" offline = 0 verbose = 0 + only_if_new = 0 for arg in sys.argv[1:]: if arg == "-h" or arg == "--help": @@ -29,12 +30,15 @@ if __name__ == "__main__": print " -v, --verbose DEBUG level logging during update" print " -o, --offline Update the Planet from the cache only" print " -h, --help Display this help message and exit" + print " -n, --only-if-new Only spider new feeds" print sys.exit(0) elif arg == "-v" or arg == "--verbose": verbose = 1 elif arg == "-o" or arg == "--offline": offline = 1 + elif arg == "-n" or arg == "--only-if-new": + only_if_new = 1 elif arg.startswith("-"): print >>sys.stderr, "Unknown option:", arg sys.exit(1) @@ -50,7 +54,7 @@ if __name__ == "__main__": if not offline: from planet import spider - spider.spiderPlanet() + spider.spiderPlanet(only_if_new=only_if_new) from planet import splice doc = splice.splice() diff --git a/planet/spider.py b/planet/spider.py index 974b82d..a098aee 100644 --- a/planet/spider.py +++ b/planet/spider.py @@ -109,7 +109,7 @@ def scrub(feed, data): source.author_detail['name'] = \ str(stripHtml(source.author_detail.name)) -def spiderFeed(feed): +def spiderFeed(feed, only_if_new=0): """ Spider (fetch) a single feed """ log = planet.logger @@ -119,6 +119,7 @@ def spiderFeed(feed): os.makedirs(sources, 0700) feed_source = filename(sources, feed) feed_info = feedparser.parse(feed_source) + if feed_info.feed and only_if_new: return if feed_info.feed.get('planet_http_status',None) == '410': return # read feed itself @@ -302,7 +303,7 @@ def spiderFeed(feed): write(xdoc.toxml('utf-8'), filename(sources, feed)) xdoc.unlink() -def spiderPlanet(): +def spiderPlanet(only_if_new = False): """ Spider (fetch) an entire planet """ log = planet.getLogger(config.log_level()) planet.setTimeout(config.feed_timeout()) @@ -312,7 +313,7 @@ def spiderPlanet(): for feed in config.subscriptions(): try: - spiderFeed(feed) + spiderFeed(feed, only_if_new=only_if_new) except Exception,e: import sys, traceback type, value, tb = sys.exc_info()