Fixed one bug with passing non-2xx responses to feedparser. Also added a try/except to help debug the problem with 'content' undefined in httplib2.
This commit is contained in:
parent
4569dba5e2
commit
681eb117f8
@ -35,6 +35,7 @@ import random
|
||||
import sha
|
||||
import hmac
|
||||
from gettext import gettext as _
|
||||
from socket import gaierror
|
||||
|
||||
__all__ = ['Http', 'Response', 'HttpLib2Error',
|
||||
'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
|
||||
@ -704,13 +705,13 @@ a string that contains the response entity body.
|
||||
cachekey = md5.new(defrag_uri).hexdigest()
|
||||
cached_value = self.cache.get(cachekey)
|
||||
if cached_value:
|
||||
#try:
|
||||
try:
|
||||
f = StringIO.StringIO(cached_value)
|
||||
info = rfc822.Message(f)
|
||||
content = cached_value.split('\r\n\r\n', 1)[1]
|
||||
#except:
|
||||
# self.cache.delete(cachekey)
|
||||
# cachekey = None
|
||||
except:
|
||||
self.cache.delete(cachekey)
|
||||
cachekey = None
|
||||
else:
|
||||
cachekey = None
|
||||
|
||||
@ -769,7 +770,11 @@ a string that contains the response entity body.
|
||||
merged_response = Response(info)
|
||||
if hasattr(response, "_stale_digest"):
|
||||
merged_response._stale_digest = response._stale_digest
|
||||
try:
|
||||
_updateCache(headers, merged_response, content, self.cache, cachekey)
|
||||
except:
|
||||
print locals()
|
||||
raise
|
||||
response = merged_response
|
||||
response.status = 200
|
||||
response.fromcache = True
|
||||
|
@ -334,6 +334,7 @@ def spiderPlanet(only_if_new = False):
|
||||
from Queue import Queue, Empty
|
||||
from threading import Thread
|
||||
import httplib2
|
||||
from socket import gaierror
|
||||
|
||||
work_queue = Queue()
|
||||
awaiting_parsing = Queue()
|
||||
@ -350,8 +351,11 @@ def spiderPlanet(only_if_new = False):
|
||||
# is empty which will terminate the thread.
|
||||
uri = work_queue.get(block=False)
|
||||
log.info("Fetching %s via %d", uri, thread_index)
|
||||
try:
|
||||
(resp, content) = h.request(uri)
|
||||
awaiting_parsing.put(block=True, item=(resp, content, uri))
|
||||
except gaierror:
|
||||
log.error("Fail to resolve server name %s via %d", uri, thread_index)
|
||||
except Empty, e:
|
||||
log.info("Thread %d finished", thread_index)
|
||||
pass
|
||||
@ -373,8 +377,11 @@ def spiderPlanet(only_if_new = False):
|
||||
try:
|
||||
(resp_headers, content, uri) = item
|
||||
if not resp_headers.fromcache:
|
||||
if resp_headers.status < 300:
|
||||
log.info("Parsing pre-fetched %s", uri)
|
||||
spiderFeed(uri, only_if_new=only_if_new, content=content, resp_headers=resp_headers)
|
||||
else:
|
||||
log.error("Status code %d from %s", resp_headers.status, uri)
|
||||
except Exception, e:
|
||||
import sys, traceback
|
||||
type, value, tb = sys.exc_info()
|
||||
|
Loading…
x
Reference in New Issue
Block a user