From 73a5dcaaf98a22807cb68fb56f2ec05bb6e7bb2d Mon Sep 17 00:00:00 2001 From: Sam Ruby Date: Sun, 4 Nov 2007 21:00:54 -0500 Subject: [PATCH] Fine tune duplicate detection --- planet/spider.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/planet/spider.py b/planet/spider.py index cf21794..8944012 100644 --- a/planet/spider.py +++ b/planet/spider.py @@ -432,7 +432,10 @@ def spiderPlanet(only_if_new = False): 'href': feed.url, 'bozo': 0, 'status': int(feed.headers.status)}) - id = data.feed.get('id', data.href) + id = data.feed.get('id', None) + if not id and hasattr(data, 'href'): id=data.href + if not id: id=uri + if not feeds_seen.has_key(id): writeCache(uri, feed_info, data) feeds_seen[id] = uri