Fixed one bug with passing non-2xx responses to feedparser. Also added a try/except to help debug the problem with 'content' undefined in httplib2.
This commit is contained in:
parent
4569dba5e2
commit
681eb117f8
@ -35,6 +35,7 @@ import random
|
|||||||
import sha
|
import sha
|
||||||
import hmac
|
import hmac
|
||||||
from gettext import gettext as _
|
from gettext import gettext as _
|
||||||
|
from socket import gaierror
|
||||||
|
|
||||||
__all__ = ['Http', 'Response', 'HttpLib2Error',
|
__all__ = ['Http', 'Response', 'HttpLib2Error',
|
||||||
'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
|
'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
|
||||||
@ -704,13 +705,13 @@ a string that contains the response entity body.
|
|||||||
cachekey = md5.new(defrag_uri).hexdigest()
|
cachekey = md5.new(defrag_uri).hexdigest()
|
||||||
cached_value = self.cache.get(cachekey)
|
cached_value = self.cache.get(cachekey)
|
||||||
if cached_value:
|
if cached_value:
|
||||||
#try:
|
try:
|
||||||
f = StringIO.StringIO(cached_value)
|
f = StringIO.StringIO(cached_value)
|
||||||
info = rfc822.Message(f)
|
info = rfc822.Message(f)
|
||||||
content = cached_value.split('\r\n\r\n', 1)[1]
|
content = cached_value.split('\r\n\r\n', 1)[1]
|
||||||
#except:
|
except:
|
||||||
# self.cache.delete(cachekey)
|
self.cache.delete(cachekey)
|
||||||
# cachekey = None
|
cachekey = None
|
||||||
else:
|
else:
|
||||||
cachekey = None
|
cachekey = None
|
||||||
|
|
||||||
@ -769,7 +770,11 @@ a string that contains the response entity body.
|
|||||||
merged_response = Response(info)
|
merged_response = Response(info)
|
||||||
if hasattr(response, "_stale_digest"):
|
if hasattr(response, "_stale_digest"):
|
||||||
merged_response._stale_digest = response._stale_digest
|
merged_response._stale_digest = response._stale_digest
|
||||||
|
try:
|
||||||
_updateCache(headers, merged_response, content, self.cache, cachekey)
|
_updateCache(headers, merged_response, content, self.cache, cachekey)
|
||||||
|
except:
|
||||||
|
print locals()
|
||||||
|
raise
|
||||||
response = merged_response
|
response = merged_response
|
||||||
response.status = 200
|
response.status = 200
|
||||||
response.fromcache = True
|
response.fromcache = True
|
||||||
|
@ -334,6 +334,7 @@ def spiderPlanet(only_if_new = False):
|
|||||||
from Queue import Queue, Empty
|
from Queue import Queue, Empty
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
import httplib2
|
import httplib2
|
||||||
|
from socket import gaierror
|
||||||
|
|
||||||
work_queue = Queue()
|
work_queue = Queue()
|
||||||
awaiting_parsing = Queue()
|
awaiting_parsing = Queue()
|
||||||
@ -350,8 +351,11 @@ def spiderPlanet(only_if_new = False):
|
|||||||
# is empty which will terminate the thread.
|
# is empty which will terminate the thread.
|
||||||
uri = work_queue.get(block=False)
|
uri = work_queue.get(block=False)
|
||||||
log.info("Fetching %s via %d", uri, thread_index)
|
log.info("Fetching %s via %d", uri, thread_index)
|
||||||
|
try:
|
||||||
(resp, content) = h.request(uri)
|
(resp, content) = h.request(uri)
|
||||||
awaiting_parsing.put(block=True, item=(resp, content, uri))
|
awaiting_parsing.put(block=True, item=(resp, content, uri))
|
||||||
|
except gaierror:
|
||||||
|
log.error("Fail to resolve server name %s via %d", uri, thread_index)
|
||||||
except Empty, e:
|
except Empty, e:
|
||||||
log.info("Thread %d finished", thread_index)
|
log.info("Thread %d finished", thread_index)
|
||||||
pass
|
pass
|
||||||
@ -373,8 +377,11 @@ def spiderPlanet(only_if_new = False):
|
|||||||
try:
|
try:
|
||||||
(resp_headers, content, uri) = item
|
(resp_headers, content, uri) = item
|
||||||
if not resp_headers.fromcache:
|
if not resp_headers.fromcache:
|
||||||
|
if resp_headers.status < 300:
|
||||||
log.info("Parsing pre-fetched %s", uri)
|
log.info("Parsing pre-fetched %s", uri)
|
||||||
spiderFeed(uri, only_if_new=only_if_new, content=content, resp_headers=resp_headers)
|
spiderFeed(uri, only_if_new=only_if_new, content=content, resp_headers=resp_headers)
|
||||||
|
else:
|
||||||
|
log.error("Status code %d from %s", resp_headers.status, uri)
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
import sys, traceback
|
import sys, traceback
|
||||||
type, value, tb = sys.exc_info()
|
type, value, tb = sys.exc_info()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user