diff --git a/planet/httplib2/__init__.py b/planet/httplib2/__init__.py index 83421b4..73e9bf7 100644 --- a/planet/httplib2/__init__.py +++ b/planet/httplib2/__init__.py @@ -35,6 +35,7 @@ import random import sha import hmac from gettext import gettext as _ +from socket import gaierror __all__ = ['Http', 'Response', 'HttpLib2Error', 'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent', @@ -704,13 +705,13 @@ a string that contains the response entity body. cachekey = md5.new(defrag_uri).hexdigest() cached_value = self.cache.get(cachekey) if cached_value: - #try: - f = StringIO.StringIO(cached_value) - info = rfc822.Message(f) - content = cached_value.split('\r\n\r\n', 1)[1] - #except: - # self.cache.delete(cachekey) - # cachekey = None + try: + f = StringIO.StringIO(cached_value) + info = rfc822.Message(f) + content = cached_value.split('\r\n\r\n', 1)[1] + except: + self.cache.delete(cachekey) + cachekey = None else: cachekey = None @@ -769,7 +770,11 @@ a string that contains the response entity body. merged_response = Response(info) if hasattr(response, "_stale_digest"): merged_response._stale_digest = response._stale_digest - _updateCache(headers, merged_response, content, self.cache, cachekey) + try: + _updateCache(headers, merged_response, content, self.cache, cachekey) + except: + print locals() + raise response = merged_response response.status = 200 response.fromcache = True diff --git a/planet/spider.py b/planet/spider.py index 3bf0d7b..41b2d57 100644 --- a/planet/spider.py +++ b/planet/spider.py @@ -334,6 +334,7 @@ def spiderPlanet(only_if_new = False): from Queue import Queue, Empty from threading import Thread import httplib2 + from socket import gaierror work_queue = Queue() awaiting_parsing = Queue() @@ -350,8 +351,11 @@ def spiderPlanet(only_if_new = False): # is empty which will terminate the thread. uri = work_queue.get(block=False) log.info("Fetching %s via %d", uri, thread_index) - (resp, content) = h.request(uri) - awaiting_parsing.put(block=True, item=(resp, content, uri)) + try: + (resp, content) = h.request(uri) + awaiting_parsing.put(block=True, item=(resp, content, uri)) + except gaierror: + log.error("Fail to resolve server name %s via %d", uri, thread_index) except Empty, e: log.info("Thread %d finished", thread_index) pass @@ -373,8 +377,11 @@ def spiderPlanet(only_if_new = False): try: (resp_headers, content, uri) = item if not resp_headers.fromcache: - log.info("Parsing pre-fetched %s", uri) - spiderFeed(uri, only_if_new=only_if_new, content=content, resp_headers=resp_headers) + if resp_headers.status < 300: + log.info("Parsing pre-fetched %s", uri) + spiderFeed(uri, only_if_new=only_if_new, content=content, resp_headers=resp_headers) + else: + log.error("Status code %d from %s", resp_headers.status, uri) except Exception, e: import sys, traceback type, value, tb = sys.exc_info()