Remove deprecation warnings (and update httplib2)

2009-09-09 09:20:15 -04:00 · 2009-09-09 09:20:15 -04:00 · 63fa05e556
commit 63fa05e556
parent 51e17650df
3 changed files with 441 additions and 175 deletions
--- a/planet/reconstitute.py
+++ b/planet/reconstitute.py
@ -13,13 +13,18 @@ well formed XHTML.
 Todo:
  * extension elements
 """
-import re, time, md5, sgmllib
+import re, time, sgmllib
 from xml.sax.saxutils import escape
 from xml.dom import minidom, Node
 from html5lib import liberalxmlparser
 from html5lib.treebuilders import dom
 import planet, config

+try:
+  from hashlib import md5
+except:
+  from md5 import new as md5
+
 illegal_xml_chars = re.compile("[\x01-\x08\x0B\x0C\x0E-\x1F]")

 def createTextElement(parent, name, value):
@ -68,14 +73,14 @@ def id(xentry, entry):
        entry_id = entry.link
    elif entry.has_key("title") and entry.title:
        entry_id = (entry.title_detail.base + "/" +
-            md5.new(entry.title).hexdigest())
+            md5(entry.title).hexdigest())
    elif entry.has_key("summary") and entry.summary:
        entry_id = (entry.summary_detail.base + "/" +
-            md5.new(entry.summary).hexdigest())
+            md5(entry.summary).hexdigest())
    elif entry.has_key("content") and entry.content:

        entry_id = (entry.content[0].base + "/" + 
-            md5.new(entry.content[0].value).hexdigest())
+            md5(entry.content[0].value).hexdigest())
    else:
        return

--- a/planet/spider.py
+++ b/planet/spider.py
@ -10,6 +10,11 @@ from xml.dom import minidom
 import planet, config, feedparser, reconstitute, shell, socket, scrub
 from StringIO import StringIO 

+try:
+  from hashlib import md5
+except:
+  from md5 import new as md5
+
 # Regular expressions to sanitise cache filenames
 re_url_scheme    = re.compile(r'^\w+:/*(\w+:|www\.)?')
 re_slash         = re.compile(r'[?/:|]+')
@ -44,9 +49,8 @@ def filename(directory, filename):
        parts=filename.split(',')
        for i in range(len(parts),0,-1):
            if len(','.join(parts[:i])) < 220:
-                import md5
                filename = ','.join(parts[:i]) + ',' + \
-                    md5.new(','.join(parts[i:])).hexdigest()
+                    md5(','.join(parts[i:])).hexdigest()
                break
  
    return os.path.join(directory, filename)
@ -277,7 +281,7 @@ def writeCache(feed_uri, feed_info, data):
    xdoc.unlink()

 def httpThread(thread_index, input_queue, output_queue, log):
-    import httplib2, md5
+    import httplib2
    from httplib import BadStatusLine

    h = httplib2.Http(config.http_cache_directory())
@ -312,7 +316,7 @@ def httpThread(thread_index, input_queue, output_queue, log):
            (resp, content) = h.request(idna, 'GET', headers=headers)

            # unchanged detection
-            resp['-content-hash'] = md5.new(content or '').hexdigest()
+            resp['-content-hash'] = md5(content or '').hexdigest()
            if resp.status == 200:
                if resp.fromcache:
                    resp.status = 304
--- a/planet/vendor/httplib2/init.py
+++ b/planet/vendor/httplib2/init.py
@ -7,6 +7,9 @@ to conserve bandwidth.

 Requires Python 2.3 or later

+Changelog:
+2007-08-18, Rick: Modified so it's able to use a socks proxy if needed.
+
 """

 __author__ = "Joe Gregorio (joe@bitworking.org)"
@ -19,14 +22,14 @@ __contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
    "Sam Ruby",
    "Louis Nyffenegger"]
 __license__ = "MIT"
-__version__ = "$Rev: 227 $"
+__version__ = "$Rev$"

 import re 
 import sys 
-import md5
 import email
 import email.Utils
 import email.Message
+import email.FeedParser
 import StringIO
 import gzip
 import zlib
@ -38,10 +41,32 @@ import copy
 import calendar
 import time
 import random
-import sha
+# remove depracated warning in python2.6
+try:
+    from hashlib import sha1 as _sha, md5 as _md5
+except ImportError:
+    import sha
+    import md5
+    _sha = sha.new
+    _md5 = md5.new
 import hmac
 from gettext import gettext as _
-from socket import gaierror
+import socket
+
+try:
+    import socks
+except ImportError:
+    socks = None
+
+# Build the appropriate socket wrapper for ssl
+try:
+    import ssl # python 2.6
+    _ssl_wrap_socket = ssl.wrap_socket
+except ImportError:
+    def _ssl_wrap_socket(sock, key_file, cert_file):
+        ssl_sock = socket.ssl(sock, key_file, cert_file)
+        return httplib.FakeSocket(sock, ssl_sock)
+

 if sys.version_info >= (2,3):
    from iri2uri import iri2uri
@ -49,7 +74,12 @@ else:
    def iri2uri(uri):
        return uri

-__all__ = ['Http', 'Response', 'HttpLib2Error',
+def has_timeout(timeout): # python 2.6
+    if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'):
+        return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT)
+    return (timeout is not None)
+
+__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error',
  'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent', 
  'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',
  'debuglevel']
@ -58,6 +88,7 @@ __all__ = ['Http', 'Response', 'HttpLib2Error',
 # The httplib debug level, set to a non-zero value to get debug output
 debuglevel = 0

+
 # Python 2.3 support
 if sys.version_info < (2,4):
    def sorted(seq):
@ -77,11 +108,20 @@ if not hasattr(httplib.HTTPResponse, 'getheaders'):
 # All exceptions raised here derive from HttpLib2Error
 class HttpLib2Error(Exception): pass

-class RedirectMissingLocation(HttpLib2Error): pass
-class RedirectLimit(HttpLib2Error): pass
-class FailedToDecompressContent(HttpLib2Error): pass
-class UnimplementedDigestAuthOptionError(HttpLib2Error): pass
-class UnimplementedHmacDigestAuthOptionError(HttpLib2Error): pass
+# Some exceptions can be caught and optionally 
+# be turned back into responses. 
+class HttpLib2ErrorWithResponse(HttpLib2Error):
+    def __init__(self, desc, response, content):
+        self.response = response
+        self.content = content
+        HttpLib2Error.__init__(self, desc)
+
+class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass
+class RedirectLimit(HttpLib2ErrorWithResponse): pass
+class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass
+class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
+class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
+
 class RelativeURIError(HttpLib2Error): pass
 class ServerNotFoundError(HttpLib2Error): pass

@ -159,11 +199,11 @@ def safename(filename):
                filename = filename.encode('idna')
            else:
                filename = filename.encode('idna')
-    except:
+    except UnicodeError:
        pass
    if isinstance(filename,unicode):
        filename=filename.encode('utf-8')
-    filemd5 = md5.new(filename).hexdigest()
+    filemd5 = _md5(filename).hexdigest()
    filename = re_url_scheme.sub("", filename)
    filename = re_slash.sub(",", filename)

@ -180,8 +220,8 @@ def _parse_cache_control(headers):
    retval = {}
    if headers.has_key('cache-control'):
        parts =  headers['cache-control'].split(',')
-        parts_with_args = [tuple([x.strip() for x in part.split("=")]) for part in parts if -1 != part.find("=")]
-        parts_wo_args = [(name.strip(), 1) for name in parts if -1 == name.find("=")]
+        parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")]
+        parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]
        retval = dict(parts_with_args + parts_wo_args)
    return retval 

@ -275,22 +315,25 @@ def _entry_disposition(response_headers, request_headers):
        if cc_response.has_key('max-age'):
            try:
                freshness_lifetime = int(cc_response['max-age'])
-            except:
+            except ValueError:
                freshness_lifetime = 0
        elif response_headers.has_key('expires'):
            expires = email.Utils.parsedate_tz(response_headers['expires'])
-            freshness_lifetime = max(0, calendar.timegm(expires) - date)
+            if None == expires:
+                freshness_lifetime = 0
+            else:
+                freshness_lifetime = max(0, calendar.timegm(expires) - date)
        else:
            freshness_lifetime = 0
        if cc.has_key('max-age'):
            try:
                freshness_lifetime = int(cc['max-age'])
-            except:
+            except ValueError:
                freshness_lifetime = 0
        if cc.has_key('min-fresh'):
            try:
                min_fresh = int(cc['min-fresh'])
-            except:
+            except ValueError:
                min_fresh = 0
            current_age += min_fresh 
        if freshness_lifetime > current_age:
@ -307,10 +350,12 @@ def _decompressContent(response, new_content):
            if encoding == 'deflate':
                content = zlib.decompress(content)
            response['content-length'] = str(len(content))
+            # Record the historical presence of the encoding in a way the won't interfere.
+            response['-content-encoding'] = response['content-encoding']
            del response['content-encoding']
-    except:
+    except IOError:
        content = ""
-        raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'))
+        raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content)
    return content

 def _updateCache(request_headers, response_headers, content, cache, cachekey):
@ -339,11 +384,11 @@ def _updateCache(request_headers, response_headers, content, cache, cachekey):
            cache.set(cachekey, text)

 def _cnonce():
-    dig = md5.new("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
+    dig = _md5("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
    return dig[:16]

 def _wsse_username_token(cnonce, iso_now, password):
-    return base64.encodestring(sha.new("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
+    return base64.encodestring(_sha("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()


 # For credentials we need two things, first 
@ -354,7 +399,7 @@ def _wsse_username_token(cnonce, iso_now, password):
 # So we also need each Auth instance to be able to tell us
 # how close to the 'top' it is.

-class Authentication:
+class Authentication(object):
    def __init__(self, credentials, host, request_uri, headers, response, content, http):
        (scheme, authority, path, query, fragment) = parse_uri(request_uri)
        self.path = path
@ -405,11 +450,11 @@ class DigestAuthentication(Authentication):
        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
        challenge = _parse_www_authenticate(response, 'www-authenticate')
        self.challenge = challenge['digest']
-        qop = self.challenge.get('qop')
+        qop = self.challenge.get('qop', 'auth')
        self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
        if self.challenge['qop'] is None:
            raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop))
-        self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5')
+        self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upper()
        if self.challenge['algorithm'] != 'MD5':
            raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
        self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])   
@ -417,7 +462,7 @@ class DigestAuthentication(Authentication):

    def request(self, method, request_uri, headers, content, cnonce = None):
        """Modify the request headers"""
-        H = lambda x: md5.new(x).hexdigest()
+        H = lambda x: _md5(x).hexdigest()
        KD = lambda s, d: H("%s:%s" % (s, d))
        A2 = "".join([method, ":", request_uri])
        self.challenge['cnonce'] = cnonce or _cnonce() 
@ -477,13 +522,13 @@ class HmacDigestAuthentication(Authentication):
        if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:
            raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))
        if self.challenge['algorithm'] == 'HMAC-MD5':
-            self.hashmod = md5
+            self.hashmod = _md5
        else:
-            self.hashmod = sha
+            self.hashmod = _sha
        if self.challenge['pw-algorithm'] == 'MD5':
-            self.pwhashmod = md5
+            self.pwhashmod = _md5
        else:
-            self.pwhashmod = sha
+            self.pwhashmod = _sha
        self.key = "".join([self.credentials[0], ":",
                    self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
                    ":", self.challenge['realm']
@ -545,8 +590,17 @@ class GoogleLoginAuthentication(Authentication):
    def __init__(self, credentials, host, request_uri, headers, response, content, http):
        from urllib import urlencode
        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
+        challenge = _parse_www_authenticate(response, 'www-authenticate')
+        service = challenge['googlelogin'].get('service', 'xapi')
+        # Bloggger actually returns the service in the challenge
+        # For the rest we guess based on the URI
+        if service == 'xapi' and  request_uri.find("calendar") > 0:
+            service = "cl"
+        # No point in guessing Base or Spreadsheet
+        #elif request_uri.find("spreadsheets") > 0:
+        #    service = "wise"

-        auth = dict(Email=credentials[0], Passwd=credentials[1], service='cl', source=headers['user-agent'])
+        auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])
        resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})
        lines = content.split('\n')
        d = dict([tuple(line.split("=", 1)) for line in lines if line])
@ -571,10 +625,7 @@ AUTH_SCHEME_CLASSES = {

 AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]

-def _md5(s):
-    return 
-
-class FileCache:
+class FileCache(object):
    """Uses a local directory as a store for cached files.
    Not really safe to use if multiple threads or processes are going to 
    be running on the same cache.
@ -589,16 +640,16 @@ class FileCache:
        retval = None
        cacheFullPath = os.path.join(self.cache, self.safe(key))
        try:
-            f = file(cacheFullPath, "r")
+            f = file(cacheFullPath, "rb")
            retval = f.read()
            f.close()
-        except:
+        except IOError:
            pass
        return retval

    def set(self, key, value):
        cacheFullPath = os.path.join(self.cache, self.safe(key))
-        f = file(cacheFullPath, "w")
+        f = file(cacheFullPath, "wb")
        f.write(value)
        f.close()

@ -607,12 +658,131 @@ class FileCache:
        if os.path.exists(cacheFullPath):
            os.remove(cacheFullPath)

-class Http:
-    """An HTTP client that handles all 
-    methods, caching, ETags, compression,
-    HTTPS, Basic, Digest, WSSE, etc.
+class Credentials(object):
+    def __init__(self):
+        self.credentials = []
+
+    def add(self, name, password, domain=""):
+        self.credentials.append((domain.lower(), name, password))
+
+    def clear(self):
+        self.credentials = []
+
+    def iter(self, domain):
+        for (cdomain, name, password) in self.credentials:
+            if cdomain == "" or domain == cdomain:
+                yield (name, password) 
+
+class KeyCerts(Credentials):
+    """Identical to Credentials except that
+    name/password are mapped to key/cert."""
+    pass
+
+
+class ProxyInfo(object):
+  """Collect information required to use a proxy."""
+  def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None):
+      """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX
+      constants. For example:
+
+p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000)
+      """
+      self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass
+
+  def astuple(self):
+    return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns,
+        self.proxy_user, self.proxy_pass)
+
+  def isgood(self):
+    return socks and (self.proxy_host != None) and (self.proxy_port != None)
+
+
+class HTTPConnectionWithTimeout(httplib.HTTPConnection):
+    """HTTPConnection subclass that supports timeouts"""
+
+    def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None):
+        httplib.HTTPConnection.__init__(self, host, port, strict)
+        self.timeout = timeout
+        self.proxy_info = proxy_info
+
+    def connect(self):
+        """Connect to the host and port specified in __init__."""
+        # Mostly verbatim from httplib.py.
+        msg = "getaddrinfo returns an empty list"
+        for res in socket.getaddrinfo(self.host, self.port, 0,
+                socket.SOCK_STREAM):
+            af, socktype, proto, canonname, sa = res
+            try:
+                if self.proxy_info and self.proxy_info.isgood():
+                    self.sock = socks.socksocket(af, socktype, proto)
+                    self.sock.setproxy(*self.proxy_info.astuple())
+                else:
+                    self.sock = socket.socket(af, socktype, proto)
+                # Different from httplib: support timeouts.
+                if has_timeout(self.timeout):
+                    self.sock.settimeout(self.timeout)
+                    # End of difference from httplib.
+                if self.debuglevel > 0:
+                    print "connect: (%s, %s)" % (self.host, self.port)
+
+                self.sock.connect(sa)
+            except socket.error, msg:
+                if self.debuglevel > 0:
+                    print 'connect fail:', (self.host, self.port)
+                if self.sock:
+                    self.sock.close()
+                self.sock = None
+                continue
+            break
+        if not self.sock:
+            raise socket.error, msg
+
+class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):
+    "This class allows communication via SSL."
+
+    def __init__(self, host, port=None, key_file=None, cert_file=None,
+                 strict=None, timeout=None, proxy_info=None):
+        httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file,
+                cert_file=cert_file, strict=strict)
+        self.timeout = timeout
+        self.proxy_info = proxy_info
+
+    def connect(self):
+        "Connect to a host on a given (SSL) port."
+
+        if self.proxy_info and self.proxy_info.isgood():
+            sock = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.setproxy(*self.proxy_info.astuple())
+        else:
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        
+        if has_timeout(self.timeout):
+            sock.settimeout(self.timeout)
+        sock.connect((self.host, self.port))
+        self.sock =_ssl_wrap_socket(sock, self.key_file, self.cert_file)
+
+
+
+class Http(object):
+    """An HTTP client that handles:
+- all methods
+- caching
+- ETags
+- compression,
+- HTTPS
+- Basic
+- Digest
+- WSSE
+
+and more.
    """
-    def __init__(self, cache=None):
+    def __init__(self, cache=None, timeout=None, proxy_info=None):
+        """The value of proxy_info is a ProxyInfo instance.
+
+If 'cache' is a string then it is used as a directory name
+for a disk cache. Otherwise it must be an object that supports
+the same interface as FileCache."""
+        self.proxy_info = proxy_info
        # Map domain name to an httplib connection
        self.connections = {}
        # The location of the cache, for now a directory
@ -622,45 +792,72 @@ class Http:
        else:
            self.cache = cache

-        # tuples of name, password
-        self.credentials = []
+        # Name/password
+        self.credentials = Credentials()
+
+        # Key/cert
+        self.certificates = KeyCerts()

        # authorization objects
        self.authorizations = []

+        # If set to False then no redirects are followed, even safe ones.
+        self.follow_redirects = True
+        
+        # Which HTTP methods do we apply optimistic concurrency to, i.e.
+        # which methods get an "if-match:" etag header added to them.
+        self.optimistic_concurrency_methods = ["PUT"]
+
+        # If 'follow_redirects' is True, and this is set to True then
+        # all redirecs are followed, including unsafe ones.
        self.follow_all_redirects = False

        self.ignore_etag = False

+        self.force_exception_to_status_code = False 
+
+        self.timeout = timeout
+
    def _auth_from_challenge(self, host, request_uri, headers, response, content):
        """A generator that creates Authorization objects
           that can be applied to requests.
        """
        challenges = _parse_www_authenticate(response, 'www-authenticate')
-        for cred in self.credentials:
+        for cred in self.credentials.iter(host):
            for scheme in AUTH_SCHEME_ORDER:
                if challenges.has_key(scheme):
                    yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)

-    def add_credentials(self, name, password):
+    def add_credentials(self, name, password, domain=""):
        """Add a name and password that will be used
        any time a request requires authentication."""
-        self.credentials.append((name, password))
+        self.credentials.add(name, password, domain)
+
+    def add_certificate(self, key, cert, domain):
+        """Add a key and cert that will be used
+        any time a request requires authentication."""
+        self.certificates.add(key, cert, domain)

    def clear_credentials(self):
        """Remove all the names and passwords
        that are used for authentication"""
-        self.credentials = []
+        self.credentials.clear()
        self.authorizations = []

    def _conn_request(self, conn, request_uri, method, body, headers):
        for i in range(2):
            try:
                conn.request(method, request_uri, body, headers)
+            except socket.gaierror:
+                conn.close()
+                raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
+            except (socket.error, httplib.HTTPException):
+                # Just because the server closed the connection doesn't apparently mean
+                # that the server didn't send a response.
+                pass
+            try:
                response = conn.getresponse()
-            except gaierror:
-                raise ServerNotFoundError("Unable to find the server at %s" % request_uri)
-            except:
+            except (socket.error, httplib.HTTPException):
                if i == 0:
                    conn.close()
                    conn.connect()
@ -668,11 +865,13 @@ class Http:
                else:
                    raise
            else:
-                content = response.read()
+                content = ""
+                if method != "HEAD":
+                    content = response.read()
                response = Response(response)
-                content = _decompressContent(response, content)
-
-            break;
+                if method != "HEAD":
+                    content = _decompressContent(response, content)
+            break
        return (response, content)


@ -702,13 +901,13 @@ class Http:
                    authorization.response(response, body)
                    break

-        if (self.follow_all_redirects or method in ["GET", "HEAD"]) or response.status == 303:
-            if response.status in [300, 301, 302, 303, 307]:
+        if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303):
+            if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
                # Pick out the location header and basically start from the beginning
                # remembering first to strip the ETag header and decrement our 'depth'
                if redirections:
                    if not response.has_key('location') and response.status != 300:
-                        raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."))
+                        raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content)
                    # Fix-up relative redirects (which violate an RFC 2616 MUST)
                    if response.has_key('location'):
                        location = response['location']
@ -733,7 +932,7 @@ class Http:
                        (response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1)
                        response.previous = old_response
                else:
-                    raise RedirectLimit( _("Redirected more times than rediection_limit allows."))
+                    raise RedirectLimit( _("Redirected more times than rediection_limit allows."), response, content)
            elif response.status in [200, 203] and method == "GET":
                # Don't cache 206's since we aren't going to handle byte range requests
                if not response.has_key('content-location'):
@ -742,7 +941,13 @@ class Http:

        return (response, content)

-    def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS):
+
+# Need to catch and rebrand some exceptions
+# Then need to optionally turn all exceptions into status codes
+# including all socket.* and httplib.* exceptions.
+
+
+    def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None):
        """ Performs a single HTTP request.
 The 'uri' is the URI of the HTTP resource and can begin 
 with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
@ -763,115 +968,164 @@ The return value is a tuple of (response, content), the first
 being and instance of the 'Response' class, the second being 
 a string that contains the response entity body.
        """
-        if headers is None:
-            headers = {}
-        else:
-            headers = _normalize_headers(headers)
-
-        if not headers.has_key('user-agent'):
-            headers['user-agent'] = "Python-httplib2/%s" % __version__
-
-        uri = iri2uri(uri)
-
-        (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
-
-        if not self.connections.has_key(scheme+":"+authority):
-            connection_type = (scheme == 'https') and httplib.HTTPSConnection or httplib.HTTPConnection
-            conn = self.connections[scheme+":"+authority] = connection_type(authority)
-            conn.set_debuglevel(debuglevel)
-        else:
-            conn = self.connections[scheme+":"+authority]
-
-        if method in ["GET", "HEAD"] and 'range' not in headers:
-            headers['accept-encoding'] = 'compress, gzip'
-
-        info = email.Message.Message()
-        cached_value = None
-        if self.cache:
-            cachekey = defrag_uri
-            cached_value = self.cache.get(cachekey)
-            if cached_value:
-                try:
-                    info = email.message_from_string(cached_value)
-                    content = cached_value.split('\r\n\r\n', 1)[1]
-                except Exception, e:
-                    self.cache.delete(cachekey)
-                    cachekey = None
-                    cached_value = None
-        else:
-            cachekey = None
-                    
-        if method in ["PUT"] and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
-            # http://www.w3.org/1999/04/Editing/ 
-            headers['if-match'] = info['etag']
-
-        if method not in ["GET", "HEAD"] and self.cache and cachekey:
-            # RFC 2616 Section 13.10
-            self.cache.delete(cachekey)
-
-        if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
-            if info.has_key('-x-permanent-redirect-url'):
-                # Should cached permanent redirects be counted in our redirection count? For now, yes.
-                (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
-                response.previous = Response(info)
-                response.previous.fromcache = True
+        try:
+            if headers is None:
+                headers = {}
            else:
-                # Determine our course of action:
-                #   Is the cached entry fresh or stale?
-                #   Has the client requested a non-cached response?
-                #   
-                # There seems to be three possible answers: 
-                # 1. [FRESH] Return the cache entry w/o doing a GET
-                # 2. [STALE] Do the GET (but add in cache validators if available)
-                # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
-                entry_disposition = _entry_disposition(info, headers) 
+                headers = _normalize_headers(headers)

-                if entry_disposition == "FRESH":
-                    if not cached_value:
-                        info['status'] = '504'
-                        content = ""
-                    response = Response(info)
-                    if cached_value:
-                        response.fromcache = True
-                    return (response, content)
+            if not headers.has_key('user-agent'):
+                headers['user-agent'] = "Python-httplib2/%s" % __version__

-                if entry_disposition == "STALE":
-                    if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
-                        headers['if-none-match'] = info['etag']
-                    if info.has_key('last-modified') and not 'last-modified' in headers:
-                        headers['if-modified-since'] = info['last-modified']
-                elif entry_disposition == "TRANSPARENT":
-                    pass
+            uri = iri2uri(uri)

-                (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
+            (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
+            domain_port = authority.split(":")[0:2]
+            if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http':
+                scheme = 'https'
+                authority = domain_port[0]

-            if response.status == 304 and method == "GET":
-                # Rewrite the cache entry with the new end-to-end headers
-                # Take all headers that are in response 
-                # and overwrite their values in info.
-                # unless they are hop-by-hop, or are listed in the connection header.
-
-                for key in _get_end2end_headers(response):
-                    info[key] = response[key]
-                merged_response = Response(info)
-                if hasattr(response, "_stale_digest"):
-                    merged_response._stale_digest = response._stale_digest
-                try:
-                    _updateCache(headers, merged_response, content, self.cache, cachekey)
-                except:
-                    print locals()
-                    raise 
-                response = merged_response
-                response.status = 200
-                response.fromcache = True 
-
-            elif response.status == 200:
-                content = new_content
+            conn_key = scheme+":"+authority
+            if conn_key in self.connections:
+                conn = self.connections[conn_key]
            else:
+                if not connection_type:
+                    connection_type = (scheme == 'https') and HTTPSConnectionWithTimeout or HTTPConnectionWithTimeout
+                certs = list(self.certificates.iter(authority))
+                if scheme == 'https' and certs:
+                    conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0],
+                        cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info)
+                else:
+                    conn = self.connections[conn_key] = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info)
+                conn.set_debuglevel(debuglevel)
+
+            if method in ["GET", "HEAD"] and 'range' not in headers and 'accept-encoding' not in headers:
+                headers['accept-encoding'] = 'deflate, gzip'
+
+            info = email.Message.Message()
+            cached_value = None
+            if self.cache:
+                cachekey = defrag_uri
+                cached_value = self.cache.get(cachekey)
+                if cached_value:
+                    # info = email.message_from_string(cached_value)
+                    #
+                    # Need to replace the line above with the kludge below
+                    # to fix the non-existent bug not fixed in this
+                    # bug report: http://mail.python.org/pipermail/python-bugs-list/2005-September/030289.html
+                    try:
+                        info, content = cached_value.split('\r\n\r\n', 1)
+                        feedparser = email.FeedParser.FeedParser()
+                        feedparser.feed(info)
+                        info = feedparser.close()
+                        feedparser._parse = None
+                    except IndexError:
+                        self.cache.delete(cachekey)
+                        cachekey = None
+                        cached_value = None
+            else:
+                cachekey = None
+
+            if method in self.optimistic_concurrency_methods and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
+                # http://www.w3.org/1999/04/Editing/
+                headers['if-match'] = info['etag']
+
+            if method not in ["GET", "HEAD"] and self.cache and cachekey:
+                # RFC 2616 Section 13.10
                self.cache.delete(cachekey)
-                content = new_content 
-        else: 
-            (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
+
+            if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
+                if info.has_key('-x-permanent-redirect-url'):
+                    # Should cached permanent redirects be counted in our redirection count? For now, yes.
+                    (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
+                    response.previous = Response(info)
+                    response.previous.fromcache = True
+                else:
+                    # Determine our course of action:
+                    #   Is the cached entry fresh or stale?
+                    #   Has the client requested a non-cached response?
+                    #   
+                    # There seems to be three possible answers: 
+                    # 1. [FRESH] Return the cache entry w/o doing a GET
+                    # 2. [STALE] Do the GET (but add in cache validators if available)
+                    # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
+                    entry_disposition = _entry_disposition(info, headers) 
+                    
+                    if entry_disposition == "FRESH":
+                        if not cached_value:
+                            info['status'] = '504'
+                            content = ""
+                        response = Response(info)
+                        if cached_value:
+                            response.fromcache = True
+                        return (response, content)
+
+                    if entry_disposition == "STALE":
+                        if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
+                            headers['if-none-match'] = info['etag']
+                        if info.has_key('last-modified') and not 'last-modified' in headers:
+                            headers['if-modified-since'] = info['last-modified']
+                    elif entry_disposition == "TRANSPARENT":
+                        pass
+
+                    (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
+
+                if response.status == 304 and method == "GET":
+                    # Rewrite the cache entry with the new end-to-end headers
+                    # Take all headers that are in response 
+                    # and overwrite their values in info.
+                    # unless they are hop-by-hop, or are listed in the connection header.
+
+                    for key in _get_end2end_headers(response):
+                        info[key] = response[key]
+                    merged_response = Response(info)
+                    if hasattr(response, "_stale_digest"):
+                        merged_response._stale_digest = response._stale_digest
+                    _updateCache(headers, merged_response, content, self.cache, cachekey)
+                    response = merged_response
+                    response.status = 200
+                    response.fromcache = True 
+
+                elif response.status == 200:
+                    content = new_content
+                else:
+                    self.cache.delete(cachekey)
+                    content = new_content 
+            else: 
+                cc = _parse_cache_control(headers)
+                if cc.has_key('only-if-cached'):
+                    info['status'] = '504'
+                    response = Response(info)
+                    content = ""
+                else:
+                    (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
+        except Exception, e:
+            if self.force_exception_to_status_code:
+                if isinstance(e, HttpLib2ErrorWithResponse):
+                    response = e.response
+                    content = e.content
+                    response.status = 500
+                    response.reason = str(e) 
+                elif isinstance(e, socket.timeout):
+                    content = "Request Timeout"
+                    response = Response( {
+                            "content-type": "text/plain",
+                            "status": "408",
+                            "content-length": len(content)
+                            })
+                    response.reason = "Request Timeout"
+                else:
+                    content = str(e) 
+                    response = Response( {
+                            "content-type": "text/plain",
+                            "status": "400",
+                            "content-length": len(content)
+                            })
+                    response.reason = "Bad Request" 
+            else:
+                raise
+
+ 
        return (response, content)

 
@ -898,7 +1152,7 @@ class Response(dict):
        # an httplib.HTTPResponse object.
        if isinstance(info, httplib.HTTPResponse):
            for key, value in info.getheaders(): 
-                self[key] = value 
+                self[key.lower()] = value 
            self.status = info.status
            self['status'] = str(self.status)
            self.reason = info.reason
@ -907,11 +1161,14 @@ class Response(dict):
            for key, value in info.items(): 
                self[key] = value 
            self.status = int(self['status'])
+        else:
+            for key, value in info.iteritems(): 
+                self[key] = value 
+            self.status = int(self.get('status', self.status))
+

    def __getattr__(self, name):
        if name == 'dict':
            return self 
        else:  
            raise AttributeError, name 
-
-