Updated to latest httplib2. Now deleting 'content-encoding' header from the httplib2 response before passing to feedparser

2006-11-05 22:48:30 -05:00 · 2006-11-05 22:48:30 -05:00 · 56a447e1be
commit 56a447e1be
parent 4b9e85e4f7
2 changed files with 7 additions and 2 deletions
--- a/planet/httplib2/init.py
+++ b/planet/httplib2/init.py
@ -16,7 +16,7 @@ __contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
    "Jonathan Feinberg",
    "Blair Zajac"]
 __license__ = "MIT"
-__version__ = "$Rev: 204 $"
+__version__ = "$Rev: 208 $"

 import re 
 import md5
@ -232,8 +232,10 @@ def _decompressContent(response, new_content):
    try:
        if response.get('content-encoding', None) == 'gzip':
            content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read()
+            response['content-length'] = str(len(content))
        if response.get('content-encoding', None) == 'deflate':
            content = zlib.decompress(content)
+            response['content-length'] = str(len(content))
    except:
        content = ""
        raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'))
@ -833,4 +835,3 @@ class Response(dict):
            raise AttributeError, name 


-
--- a/planet/spider.py
+++ b/planet/spider.py
@ -141,9 +141,13 @@ def spiderFeed(feed, only_if_new=0, content=None, resp_headers=None):

    # read feed itself
    if content:
+        # httplib2 was used to get the content, so prepare a 
+        # proper object to pass to feedparser.
        f = StringIO(content) 
        setattr(f, 'url', feed)
        if resp_headers:
+            if resp_headers.has_key('content-encoding'):
+                del resp_headers['content-encoding']
            setattr(f, 'headers', resp_headers)
        data = feedparser.parse(f)
    else: