Fixed one bug with passing non-2xx responses to feedparser. Also added a try/except to help debug the problem with 'content' undefined in httplib2.

This commit is contained in:
Joe Gregorio 2006-11-04 16:58:03 -05:00
parent 4569dba5e2
commit 681eb117f8
2 changed files with 24 additions and 12 deletions

View File

@ -35,6 +35,7 @@ import random
import sha import sha
import hmac import hmac
from gettext import gettext as _ from gettext import gettext as _
from socket import gaierror
__all__ = ['Http', 'Response', 'HttpLib2Error', __all__ = ['Http', 'Response', 'HttpLib2Error',
'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent', 'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
@ -704,13 +705,13 @@ a string that contains the response entity body.
cachekey = md5.new(defrag_uri).hexdigest() cachekey = md5.new(defrag_uri).hexdigest()
cached_value = self.cache.get(cachekey) cached_value = self.cache.get(cachekey)
if cached_value: if cached_value:
#try: try:
f = StringIO.StringIO(cached_value) f = StringIO.StringIO(cached_value)
info = rfc822.Message(f) info = rfc822.Message(f)
content = cached_value.split('\r\n\r\n', 1)[1] content = cached_value.split('\r\n\r\n', 1)[1]
#except: except:
# self.cache.delete(cachekey) self.cache.delete(cachekey)
# cachekey = None cachekey = None
else: else:
cachekey = None cachekey = None
@ -769,7 +770,11 @@ a string that contains the response entity body.
merged_response = Response(info) merged_response = Response(info)
if hasattr(response, "_stale_digest"): if hasattr(response, "_stale_digest"):
merged_response._stale_digest = response._stale_digest merged_response._stale_digest = response._stale_digest
try:
_updateCache(headers, merged_response, content, self.cache, cachekey) _updateCache(headers, merged_response, content, self.cache, cachekey)
except:
print locals()
raise
response = merged_response response = merged_response
response.status = 200 response.status = 200
response.fromcache = True response.fromcache = True

View File

@ -334,6 +334,7 @@ def spiderPlanet(only_if_new = False):
from Queue import Queue, Empty from Queue import Queue, Empty
from threading import Thread from threading import Thread
import httplib2 import httplib2
from socket import gaierror
work_queue = Queue() work_queue = Queue()
awaiting_parsing = Queue() awaiting_parsing = Queue()
@ -350,8 +351,11 @@ def spiderPlanet(only_if_new = False):
# is empty which will terminate the thread. # is empty which will terminate the thread.
uri = work_queue.get(block=False) uri = work_queue.get(block=False)
log.info("Fetching %s via %d", uri, thread_index) log.info("Fetching %s via %d", uri, thread_index)
try:
(resp, content) = h.request(uri) (resp, content) = h.request(uri)
awaiting_parsing.put(block=True, item=(resp, content, uri)) awaiting_parsing.put(block=True, item=(resp, content, uri))
except gaierror:
log.error("Fail to resolve server name %s via %d", uri, thread_index)
except Empty, e: except Empty, e:
log.info("Thread %d finished", thread_index) log.info("Thread %d finished", thread_index)
pass pass
@ -373,8 +377,11 @@ def spiderPlanet(only_if_new = False):
try: try:
(resp_headers, content, uri) = item (resp_headers, content, uri) = item
if not resp_headers.fromcache: if not resp_headers.fromcache:
if resp_headers.status < 300:
log.info("Parsing pre-fetched %s", uri) log.info("Parsing pre-fetched %s", uri)
spiderFeed(uri, only_if_new=only_if_new, content=content, resp_headers=resp_headers) spiderFeed(uri, only_if_new=only_if_new, content=content, resp_headers=resp_headers)
else:
log.error("Status code %d from %s", resp_headers.status, uri)
except Exception, e: except Exception, e:
import sys, traceback import sys, traceback
type, value, tb = sys.exc_info() type, value, tb = sys.exc_info()