Add an "only-if-new" option to planet
This commit is contained in:
parent
31bcbd26a2
commit
7310e14ee5
@ -20,6 +20,7 @@ if __name__ == "__main__":
|
|||||||
config_file = "config.ini"
|
config_file = "config.ini"
|
||||||
offline = 0
|
offline = 0
|
||||||
verbose = 0
|
verbose = 0
|
||||||
|
only_if_new = 0
|
||||||
|
|
||||||
for arg in sys.argv[1:]:
|
for arg in sys.argv[1:]:
|
||||||
if arg == "-h" or arg == "--help":
|
if arg == "-h" or arg == "--help":
|
||||||
@ -29,12 +30,15 @@ if __name__ == "__main__":
|
|||||||
print " -v, --verbose DEBUG level logging during update"
|
print " -v, --verbose DEBUG level logging during update"
|
||||||
print " -o, --offline Update the Planet from the cache only"
|
print " -o, --offline Update the Planet from the cache only"
|
||||||
print " -h, --help Display this help message and exit"
|
print " -h, --help Display this help message and exit"
|
||||||
|
print " -n, --only-if-new Only spider new feeds"
|
||||||
print
|
print
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
elif arg == "-v" or arg == "--verbose":
|
elif arg == "-v" or arg == "--verbose":
|
||||||
verbose = 1
|
verbose = 1
|
||||||
elif arg == "-o" or arg == "--offline":
|
elif arg == "-o" or arg == "--offline":
|
||||||
offline = 1
|
offline = 1
|
||||||
|
elif arg == "-n" or arg == "--only-if-new":
|
||||||
|
only_if_new = 1
|
||||||
elif arg.startswith("-"):
|
elif arg.startswith("-"):
|
||||||
print >>sys.stderr, "Unknown option:", arg
|
print >>sys.stderr, "Unknown option:", arg
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
@ -50,7 +54,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
if not offline:
|
if not offline:
|
||||||
from planet import spider
|
from planet import spider
|
||||||
spider.spiderPlanet()
|
spider.spiderPlanet(only_if_new=only_if_new)
|
||||||
|
|
||||||
from planet import splice
|
from planet import splice
|
||||||
doc = splice.splice()
|
doc = splice.splice()
|
||||||
|
@ -109,7 +109,7 @@ def scrub(feed, data):
|
|||||||
source.author_detail['name'] = \
|
source.author_detail['name'] = \
|
||||||
str(stripHtml(source.author_detail.name))
|
str(stripHtml(source.author_detail.name))
|
||||||
|
|
||||||
def spiderFeed(feed):
|
def spiderFeed(feed, only_if_new=0):
|
||||||
""" Spider (fetch) a single feed """
|
""" Spider (fetch) a single feed """
|
||||||
log = planet.logger
|
log = planet.logger
|
||||||
|
|
||||||
@ -119,6 +119,7 @@ def spiderFeed(feed):
|
|||||||
os.makedirs(sources, 0700)
|
os.makedirs(sources, 0700)
|
||||||
feed_source = filename(sources, feed)
|
feed_source = filename(sources, feed)
|
||||||
feed_info = feedparser.parse(feed_source)
|
feed_info = feedparser.parse(feed_source)
|
||||||
|
if feed_info.feed and only_if_new: return
|
||||||
if feed_info.feed.get('planet_http_status',None) == '410': return
|
if feed_info.feed.get('planet_http_status',None) == '410': return
|
||||||
|
|
||||||
# read feed itself
|
# read feed itself
|
||||||
@ -302,7 +303,7 @@ def spiderFeed(feed):
|
|||||||
write(xdoc.toxml('utf-8'), filename(sources, feed))
|
write(xdoc.toxml('utf-8'), filename(sources, feed))
|
||||||
xdoc.unlink()
|
xdoc.unlink()
|
||||||
|
|
||||||
def spiderPlanet():
|
def spiderPlanet(only_if_new = False):
|
||||||
""" Spider (fetch) an entire planet """
|
""" Spider (fetch) an entire planet """
|
||||||
log = planet.getLogger(config.log_level())
|
log = planet.getLogger(config.log_level())
|
||||||
planet.setTimeout(config.feed_timeout())
|
planet.setTimeout(config.feed_timeout())
|
||||||
@ -312,7 +313,7 @@ def spiderPlanet():
|
|||||||
|
|
||||||
for feed in config.subscriptions():
|
for feed in config.subscriptions():
|
||||||
try:
|
try:
|
||||||
spiderFeed(feed)
|
spiderFeed(feed, only_if_new=only_if_new)
|
||||||
except Exception,e:
|
except Exception,e:
|
||||||
import sys, traceback
|
import sys, traceback
|
||||||
type, value, tb = sys.exc_info()
|
type, value, tb = sys.exc_info()
|
||||||
|
Loading…
Reference in New Issue
Block a user