Duplicate feed and no data reporting
This commit is contained in:
parent
73a5dcaaf9
commit
7e1542bf8e
@ -80,16 +80,23 @@ def writeCache(feed_uri, feed_info, data):
|
||||
|
||||
# process based on the HTTP status code
|
||||
if data.status == 200 and data.has_key("url"):
|
||||
data.feed['planet_http_location'] = data.url
|
||||
if feed_uri == data.url:
|
||||
feed_info.feed['planet_http_location'] = data.url
|
||||
if data.has_key("entries") and len(data.entries) == 0:
|
||||
log.warning("No data %s", feed_uri)
|
||||
feed_info.feed['planet_message'] = 'no data'
|
||||
elif feed_uri == data.url:
|
||||
log.info("Updating feed %s", feed_uri)
|
||||
else:
|
||||
log.info("Updating feed %s @ %s", feed_uri, data.url)
|
||||
elif data.status == 301 and data.has_key("entries") and len(data.entries)>0:
|
||||
log.warning("Feed has moved from <%s> to <%s>", feed_uri, data.url)
|
||||
data.feed['planet_http_location'] = data.url
|
||||
elif data.status == 304:
|
||||
elif data.status == 304 and data.has_key("url"):
|
||||
feed_info.feed['planet_http_location'] = data.url
|
||||
if feed_uri == data.url:
|
||||
log.info("Feed %s unchanged", feed_uri)
|
||||
else:
|
||||
log.info("Feed %s unchanged @ %s", feed_uri, data.url)
|
||||
|
||||
if not feed_info.feed.has_key('planet_message'):
|
||||
if feed_info.feed.has_key('planet_updated'):
|
||||
@ -99,6 +106,8 @@ def writeCache(feed_uri, feed_info, data):
|
||||
else:
|
||||
if feed_info.feed.planet_message.startswith("no activity in"):
|
||||
return
|
||||
if not feed_info.feed.planet_message.startswith("duplicate") and \
|
||||
not feed_info.feed.planet_message.startswith("no data"):
|
||||
del feed_info.feed['planet_message']
|
||||
|
||||
elif data.status == 410:
|
||||
@ -432,16 +441,31 @@ def spiderPlanet(only_if_new = False):
|
||||
'href': feed.url, 'bozo': 0,
|
||||
'status': int(feed.headers.status)})
|
||||
|
||||
# duplicate feed?
|
||||
id = data.feed.get('id', None)
|
||||
if not id and hasattr(data, 'href'): id=data.href
|
||||
if not id: id=uri
|
||||
if not id: id = feed_info.feed.get('id', None)
|
||||
|
||||
if not feeds_seen.has_key(id):
|
||||
writeCache(uri, feed_info, data)
|
||||
feeds_seen[id] = uri
|
||||
else:
|
||||
href=uri
|
||||
if data.has_key('href'): href=data.href
|
||||
|
||||
duplicate = None
|
||||
if id and id in feeds_seen:
|
||||
duplicate = id
|
||||
elif href and href in feeds_seen:
|
||||
duplicate = href
|
||||
|
||||
if duplicate:
|
||||
feed_info.feed['planet_message'] = \
|
||||
'duplicate subscription: ' + feeds_seen[duplicate]
|
||||
log.warn('Duplicate subscription: %s and %s' %
|
||||
(uri, feeds_seen[id]))
|
||||
(uri, feeds_seen[duplicate]))
|
||||
if href: feed_info.feed['planet_http_location'] = href
|
||||
|
||||
if id: feeds_seen[id] = uri
|
||||
if href: feeds_seen[href] = uri
|
||||
|
||||
# complete processing for the feed
|
||||
writeCache(uri, feed_info, data)
|
||||
|
||||
except Exception, e:
|
||||
import sys, traceback
|
||||
|
Loading…
x
Reference in New Issue
Block a user