Remove deprecation warnings (and update httplib2)

This commit is contained in:
Sam Ruby 2009-09-09 09:20:15 -04:00
parent 51e17650df
commit 63fa05e556
3 changed files with 441 additions and 175 deletions

View File

@ -13,13 +13,18 @@ well formed XHTML.
Todo:
* extension elements
"""
import re, time, md5, sgmllib
import re, time, sgmllib
from xml.sax.saxutils import escape
from xml.dom import minidom, Node
from html5lib import liberalxmlparser
from html5lib.treebuilders import dom
import planet, config
try:
from hashlib import md5
except:
from md5 import new as md5
illegal_xml_chars = re.compile("[\x01-\x08\x0B\x0C\x0E-\x1F]")
def createTextElement(parent, name, value):
@ -68,14 +73,14 @@ def id(xentry, entry):
entry_id = entry.link
elif entry.has_key("title") and entry.title:
entry_id = (entry.title_detail.base + "/" +
md5.new(entry.title).hexdigest())
md5(entry.title).hexdigest())
elif entry.has_key("summary") and entry.summary:
entry_id = (entry.summary_detail.base + "/" +
md5.new(entry.summary).hexdigest())
md5(entry.summary).hexdigest())
elif entry.has_key("content") and entry.content:
entry_id = (entry.content[0].base + "/" +
md5.new(entry.content[0].value).hexdigest())
md5(entry.content[0].value).hexdigest())
else:
return

View File

@ -10,6 +10,11 @@ from xml.dom import minidom
import planet, config, feedparser, reconstitute, shell, socket, scrub
from StringIO import StringIO
try:
from hashlib import md5
except:
from md5 import new as md5
# Regular expressions to sanitise cache filenames
re_url_scheme = re.compile(r'^\w+:/*(\w+:|www\.)?')
re_slash = re.compile(r'[?/:|]+')
@ -44,9 +49,8 @@ def filename(directory, filename):
parts=filename.split(',')
for i in range(len(parts),0,-1):
if len(','.join(parts[:i])) < 220:
import md5
filename = ','.join(parts[:i]) + ',' + \
md5.new(','.join(parts[i:])).hexdigest()
md5(','.join(parts[i:])).hexdigest()
break
return os.path.join(directory, filename)
@ -277,7 +281,7 @@ def writeCache(feed_uri, feed_info, data):
xdoc.unlink()
def httpThread(thread_index, input_queue, output_queue, log):
import httplib2, md5
import httplib2
from httplib import BadStatusLine
h = httplib2.Http(config.http_cache_directory())
@ -312,7 +316,7 @@ def httpThread(thread_index, input_queue, output_queue, log):
(resp, content) = h.request(idna, 'GET', headers=headers)
# unchanged detection
resp['-content-hash'] = md5.new(content or '').hexdigest()
resp['-content-hash'] = md5(content or '').hexdigest()
if resp.status == 200:
if resp.fromcache:
resp.status = 304

View File

@ -7,6 +7,9 @@ to conserve bandwidth.
Requires Python 2.3 or later
Changelog:
2007-08-18, Rick: Modified so it's able to use a socks proxy if needed.
"""
__author__ = "Joe Gregorio (joe@bitworking.org)"
@ -19,14 +22,14 @@ __contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
"Sam Ruby",
"Louis Nyffenegger"]
__license__ = "MIT"
__version__ = "$Rev: 227 $"
__version__ = "$Rev$"
import re
import sys
import md5
import email
import email.Utils
import email.Message
import email.FeedParser
import StringIO
import gzip
import zlib
@ -38,10 +41,32 @@ import copy
import calendar
import time
import random
import sha
# remove depracated warning in python2.6
try:
from hashlib import sha1 as _sha, md5 as _md5
except ImportError:
import sha
import md5
_sha = sha.new
_md5 = md5.new
import hmac
from gettext import gettext as _
from socket import gaierror
import socket
try:
import socks
except ImportError:
socks = None
# Build the appropriate socket wrapper for ssl
try:
import ssl # python 2.6
_ssl_wrap_socket = ssl.wrap_socket
except ImportError:
def _ssl_wrap_socket(sock, key_file, cert_file):
ssl_sock = socket.ssl(sock, key_file, cert_file)
return httplib.FakeSocket(sock, ssl_sock)
if sys.version_info >= (2,3):
from iri2uri import iri2uri
@ -49,7 +74,12 @@ else:
def iri2uri(uri):
return uri
__all__ = ['Http', 'Response', 'HttpLib2Error',
def has_timeout(timeout): # python 2.6
if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'):
return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT)
return (timeout is not None)
__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error',
'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',
'debuglevel']
@ -58,6 +88,7 @@ __all__ = ['Http', 'Response', 'HttpLib2Error',
# The httplib debug level, set to a non-zero value to get debug output
debuglevel = 0
# Python 2.3 support
if sys.version_info < (2,4):
def sorted(seq):
@ -77,11 +108,20 @@ if not hasattr(httplib.HTTPResponse, 'getheaders'):
# All exceptions raised here derive from HttpLib2Error
class HttpLib2Error(Exception): pass
class RedirectMissingLocation(HttpLib2Error): pass
class RedirectLimit(HttpLib2Error): pass
class FailedToDecompressContent(HttpLib2Error): pass
class UnimplementedDigestAuthOptionError(HttpLib2Error): pass
class UnimplementedHmacDigestAuthOptionError(HttpLib2Error): pass
# Some exceptions can be caught and optionally
# be turned back into responses.
class HttpLib2ErrorWithResponse(HttpLib2Error):
def __init__(self, desc, response, content):
self.response = response
self.content = content
HttpLib2Error.__init__(self, desc)
class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass
class RedirectLimit(HttpLib2ErrorWithResponse): pass
class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass
class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
class RelativeURIError(HttpLib2Error): pass
class ServerNotFoundError(HttpLib2Error): pass
@ -159,11 +199,11 @@ def safename(filename):
filename = filename.encode('idna')
else:
filename = filename.encode('idna')
except:
except UnicodeError:
pass
if isinstance(filename,unicode):
filename=filename.encode('utf-8')
filemd5 = md5.new(filename).hexdigest()
filemd5 = _md5(filename).hexdigest()
filename = re_url_scheme.sub("", filename)
filename = re_slash.sub(",", filename)
@ -180,8 +220,8 @@ def _parse_cache_control(headers):
retval = {}
if headers.has_key('cache-control'):
parts = headers['cache-control'].split(',')
parts_with_args = [tuple([x.strip() for x in part.split("=")]) for part in parts if -1 != part.find("=")]
parts_wo_args = [(name.strip(), 1) for name in parts if -1 == name.find("=")]
parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")]
parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]
retval = dict(parts_with_args + parts_wo_args)
return retval
@ -275,22 +315,25 @@ def _entry_disposition(response_headers, request_headers):
if cc_response.has_key('max-age'):
try:
freshness_lifetime = int(cc_response['max-age'])
except:
except ValueError:
freshness_lifetime = 0
elif response_headers.has_key('expires'):
expires = email.Utils.parsedate_tz(response_headers['expires'])
freshness_lifetime = max(0, calendar.timegm(expires) - date)
if None == expires:
freshness_lifetime = 0
else:
freshness_lifetime = max(0, calendar.timegm(expires) - date)
else:
freshness_lifetime = 0
if cc.has_key('max-age'):
try:
freshness_lifetime = int(cc['max-age'])
except:
except ValueError:
freshness_lifetime = 0
if cc.has_key('min-fresh'):
try:
min_fresh = int(cc['min-fresh'])
except:
except ValueError:
min_fresh = 0
current_age += min_fresh
if freshness_lifetime > current_age:
@ -307,10 +350,12 @@ def _decompressContent(response, new_content):
if encoding == 'deflate':
content = zlib.decompress(content)
response['content-length'] = str(len(content))
# Record the historical presence of the encoding in a way the won't interfere.
response['-content-encoding'] = response['content-encoding']
del response['content-encoding']
except:
except IOError:
content = ""
raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'))
raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content)
return content
def _updateCache(request_headers, response_headers, content, cache, cachekey):
@ -339,11 +384,11 @@ def _updateCache(request_headers, response_headers, content, cache, cachekey):
cache.set(cachekey, text)
def _cnonce():
dig = md5.new("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
dig = _md5("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
return dig[:16]
def _wsse_username_token(cnonce, iso_now, password):
return base64.encodestring(sha.new("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
return base64.encodestring(_sha("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
# For credentials we need two things, first
@ -354,7 +399,7 @@ def _wsse_username_token(cnonce, iso_now, password):
# So we also need each Auth instance to be able to tell us
# how close to the 'top' it is.
class Authentication:
class Authentication(object):
def __init__(self, credentials, host, request_uri, headers, response, content, http):
(scheme, authority, path, query, fragment) = parse_uri(request_uri)
self.path = path
@ -405,11 +450,11 @@ class DigestAuthentication(Authentication):
Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
challenge = _parse_www_authenticate(response, 'www-authenticate')
self.challenge = challenge['digest']
qop = self.challenge.get('qop')
qop = self.challenge.get('qop', 'auth')
self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
if self.challenge['qop'] is None:
raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop))
self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5')
self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upper()
if self.challenge['algorithm'] != 'MD5':
raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])
@ -417,7 +462,7 @@ class DigestAuthentication(Authentication):
def request(self, method, request_uri, headers, content, cnonce = None):
"""Modify the request headers"""
H = lambda x: md5.new(x).hexdigest()
H = lambda x: _md5(x).hexdigest()
KD = lambda s, d: H("%s:%s" % (s, d))
A2 = "".join([method, ":", request_uri])
self.challenge['cnonce'] = cnonce or _cnonce()
@ -477,13 +522,13 @@ class HmacDigestAuthentication(Authentication):
if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:
raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))
if self.challenge['algorithm'] == 'HMAC-MD5':
self.hashmod = md5
self.hashmod = _md5
else:
self.hashmod = sha
self.hashmod = _sha
if self.challenge['pw-algorithm'] == 'MD5':
self.pwhashmod = md5
self.pwhashmod = _md5
else:
self.pwhashmod = sha
self.pwhashmod = _sha
self.key = "".join([self.credentials[0], ":",
self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
":", self.challenge['realm']
@ -545,8 +590,17 @@ class GoogleLoginAuthentication(Authentication):
def __init__(self, credentials, host, request_uri, headers, response, content, http):
from urllib import urlencode
Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
challenge = _parse_www_authenticate(response, 'www-authenticate')
service = challenge['googlelogin'].get('service', 'xapi')
# Bloggger actually returns the service in the challenge
# For the rest we guess based on the URI
if service == 'xapi' and request_uri.find("calendar") > 0:
service = "cl"
# No point in guessing Base or Spreadsheet
#elif request_uri.find("spreadsheets") > 0:
# service = "wise"
auth = dict(Email=credentials[0], Passwd=credentials[1], service='cl', source=headers['user-agent'])
auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])
resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})
lines = content.split('\n')
d = dict([tuple(line.split("=", 1)) for line in lines if line])
@ -571,10 +625,7 @@ AUTH_SCHEME_CLASSES = {
AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
def _md5(s):
return
class FileCache:
class FileCache(object):
"""Uses a local directory as a store for cached files.
Not really safe to use if multiple threads or processes are going to
be running on the same cache.
@ -589,16 +640,16 @@ class FileCache:
retval = None
cacheFullPath = os.path.join(self.cache, self.safe(key))
try:
f = file(cacheFullPath, "r")
f = file(cacheFullPath, "rb")
retval = f.read()
f.close()
except:
except IOError:
pass
return retval
def set(self, key, value):
cacheFullPath = os.path.join(self.cache, self.safe(key))
f = file(cacheFullPath, "w")
f = file(cacheFullPath, "wb")
f.write(value)
f.close()
@ -607,12 +658,131 @@ class FileCache:
if os.path.exists(cacheFullPath):
os.remove(cacheFullPath)
class Http:
"""An HTTP client that handles all
methods, caching, ETags, compression,
HTTPS, Basic, Digest, WSSE, etc.
class Credentials(object):
def __init__(self):
self.credentials = []
def add(self, name, password, domain=""):
self.credentials.append((domain.lower(), name, password))
def clear(self):
self.credentials = []
def iter(self, domain):
for (cdomain, name, password) in self.credentials:
if cdomain == "" or domain == cdomain:
yield (name, password)
class KeyCerts(Credentials):
"""Identical to Credentials except that
name/password are mapped to key/cert."""
pass
class ProxyInfo(object):
"""Collect information required to use a proxy."""
def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None):
"""The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX
constants. For example:
p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000)
"""
self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass
def astuple(self):
return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns,
self.proxy_user, self.proxy_pass)
def isgood(self):
return socks and (self.proxy_host != None) and (self.proxy_port != None)
class HTTPConnectionWithTimeout(httplib.HTTPConnection):
"""HTTPConnection subclass that supports timeouts"""
def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None):
httplib.HTTPConnection.__init__(self, host, port, strict)
self.timeout = timeout
self.proxy_info = proxy_info
def connect(self):
"""Connect to the host and port specified in __init__."""
# Mostly verbatim from httplib.py.
msg = "getaddrinfo returns an empty list"
for res in socket.getaddrinfo(self.host, self.port, 0,
socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
try:
if self.proxy_info and self.proxy_info.isgood():
self.sock = socks.socksocket(af, socktype, proto)
self.sock.setproxy(*self.proxy_info.astuple())
else:
self.sock = socket.socket(af, socktype, proto)
# Different from httplib: support timeouts.
if has_timeout(self.timeout):
self.sock.settimeout(self.timeout)
# End of difference from httplib.
if self.debuglevel > 0:
print "connect: (%s, %s)" % (self.host, self.port)
self.sock.connect(sa)
except socket.error, msg:
if self.debuglevel > 0:
print 'connect fail:', (self.host, self.port)
if self.sock:
self.sock.close()
self.sock = None
continue
break
if not self.sock:
raise socket.error, msg
class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):
"This class allows communication via SSL."
def __init__(self, host, port=None, key_file=None, cert_file=None,
strict=None, timeout=None, proxy_info=None):
httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file,
cert_file=cert_file, strict=strict)
self.timeout = timeout
self.proxy_info = proxy_info
def connect(self):
"Connect to a host on a given (SSL) port."
if self.proxy_info and self.proxy_info.isgood():
sock = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM)
sock.setproxy(*self.proxy_info.astuple())
else:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
if has_timeout(self.timeout):
sock.settimeout(self.timeout)
sock.connect((self.host, self.port))
self.sock =_ssl_wrap_socket(sock, self.key_file, self.cert_file)
class Http(object):
"""An HTTP client that handles:
- all methods
- caching
- ETags
- compression,
- HTTPS
- Basic
- Digest
- WSSE
and more.
"""
def __init__(self, cache=None):
def __init__(self, cache=None, timeout=None, proxy_info=None):
"""The value of proxy_info is a ProxyInfo instance.
If 'cache' is a string then it is used as a directory name
for a disk cache. Otherwise it must be an object that supports
the same interface as FileCache."""
self.proxy_info = proxy_info
# Map domain name to an httplib connection
self.connections = {}
# The location of the cache, for now a directory
@ -622,45 +792,72 @@ class Http:
else:
self.cache = cache
# tuples of name, password
self.credentials = []
# Name/password
self.credentials = Credentials()
# Key/cert
self.certificates = KeyCerts()
# authorization objects
self.authorizations = []
# If set to False then no redirects are followed, even safe ones.
self.follow_redirects = True
# Which HTTP methods do we apply optimistic concurrency to, i.e.
# which methods get an "if-match:" etag header added to them.
self.optimistic_concurrency_methods = ["PUT"]
# If 'follow_redirects' is True, and this is set to True then
# all redirecs are followed, including unsafe ones.
self.follow_all_redirects = False
self.ignore_etag = False
self.force_exception_to_status_code = False
self.timeout = timeout
def _auth_from_challenge(self, host, request_uri, headers, response, content):
"""A generator that creates Authorization objects
that can be applied to requests.
"""
challenges = _parse_www_authenticate(response, 'www-authenticate')
for cred in self.credentials:
for cred in self.credentials.iter(host):
for scheme in AUTH_SCHEME_ORDER:
if challenges.has_key(scheme):
yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
def add_credentials(self, name, password):
def add_credentials(self, name, password, domain=""):
"""Add a name and password that will be used
any time a request requires authentication."""
self.credentials.append((name, password))
self.credentials.add(name, password, domain)
def add_certificate(self, key, cert, domain):
"""Add a key and cert that will be used
any time a request requires authentication."""
self.certificates.add(key, cert, domain)
def clear_credentials(self):
"""Remove all the names and passwords
that are used for authentication"""
self.credentials = []
self.credentials.clear()
self.authorizations = []
def _conn_request(self, conn, request_uri, method, body, headers):
for i in range(2):
try:
conn.request(method, request_uri, body, headers)
except socket.gaierror:
conn.close()
raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
except (socket.error, httplib.HTTPException):
# Just because the server closed the connection doesn't apparently mean
# that the server didn't send a response.
pass
try:
response = conn.getresponse()
except gaierror:
raise ServerNotFoundError("Unable to find the server at %s" % request_uri)
except:
except (socket.error, httplib.HTTPException):
if i == 0:
conn.close()
conn.connect()
@ -668,11 +865,13 @@ class Http:
else:
raise
else:
content = response.read()
content = ""
if method != "HEAD":
content = response.read()
response = Response(response)
content = _decompressContent(response, content)
break;
if method != "HEAD":
content = _decompressContent(response, content)
break
return (response, content)
@ -702,13 +901,13 @@ class Http:
authorization.response(response, body)
break
if (self.follow_all_redirects or method in ["GET", "HEAD"]) or response.status == 303:
if response.status in [300, 301, 302, 303, 307]:
if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303):
if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
# Pick out the location header and basically start from the beginning
# remembering first to strip the ETag header and decrement our 'depth'
if redirections:
if not response.has_key('location') and response.status != 300:
raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."))
raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content)
# Fix-up relative redirects (which violate an RFC 2616 MUST)
if response.has_key('location'):
location = response['location']
@ -733,7 +932,7 @@ class Http:
(response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1)
response.previous = old_response
else:
raise RedirectLimit( _("Redirected more times than rediection_limit allows."))
raise RedirectLimit( _("Redirected more times than rediection_limit allows."), response, content)
elif response.status in [200, 203] and method == "GET":
# Don't cache 206's since we aren't going to handle byte range requests
if not response.has_key('content-location'):
@ -742,7 +941,13 @@ class Http:
return (response, content)
def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS):
# Need to catch and rebrand some exceptions
# Then need to optionally turn all exceptions into status codes
# including all socket.* and httplib.* exceptions.
def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None):
""" Performs a single HTTP request.
The 'uri' is the URI of the HTTP resource and can begin
with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
@ -763,115 +968,164 @@ The return value is a tuple of (response, content), the first
being and instance of the 'Response' class, the second being
a string that contains the response entity body.
"""
if headers is None:
headers = {}
else:
headers = _normalize_headers(headers)
if not headers.has_key('user-agent'):
headers['user-agent'] = "Python-httplib2/%s" % __version__
uri = iri2uri(uri)
(scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
if not self.connections.has_key(scheme+":"+authority):
connection_type = (scheme == 'https') and httplib.HTTPSConnection or httplib.HTTPConnection
conn = self.connections[scheme+":"+authority] = connection_type(authority)
conn.set_debuglevel(debuglevel)
else:
conn = self.connections[scheme+":"+authority]
if method in ["GET", "HEAD"] and 'range' not in headers:
headers['accept-encoding'] = 'compress, gzip'
info = email.Message.Message()
cached_value = None
if self.cache:
cachekey = defrag_uri
cached_value = self.cache.get(cachekey)
if cached_value:
try:
info = email.message_from_string(cached_value)
content = cached_value.split('\r\n\r\n', 1)[1]
except Exception, e:
self.cache.delete(cachekey)
cachekey = None
cached_value = None
else:
cachekey = None
if method in ["PUT"] and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
# http://www.w3.org/1999/04/Editing/
headers['if-match'] = info['etag']
if method not in ["GET", "HEAD"] and self.cache and cachekey:
# RFC 2616 Section 13.10
self.cache.delete(cachekey)
if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
if info.has_key('-x-permanent-redirect-url'):
# Should cached permanent redirects be counted in our redirection count? For now, yes.
(response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
response.previous = Response(info)
response.previous.fromcache = True
try:
if headers is None:
headers = {}
else:
# Determine our course of action:
# Is the cached entry fresh or stale?
# Has the client requested a non-cached response?
#
# There seems to be three possible answers:
# 1. [FRESH] Return the cache entry w/o doing a GET
# 2. [STALE] Do the GET (but add in cache validators if available)
# 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
entry_disposition = _entry_disposition(info, headers)
headers = _normalize_headers(headers)
if entry_disposition == "FRESH":
if not cached_value:
info['status'] = '504'
content = ""
response = Response(info)
if cached_value:
response.fromcache = True
return (response, content)
if not headers.has_key('user-agent'):
headers['user-agent'] = "Python-httplib2/%s" % __version__
if entry_disposition == "STALE":
if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
headers['if-none-match'] = info['etag']
if info.has_key('last-modified') and not 'last-modified' in headers:
headers['if-modified-since'] = info['last-modified']
elif entry_disposition == "TRANSPARENT":
pass
uri = iri2uri(uri)
(response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
(scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
domain_port = authority.split(":")[0:2]
if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http':
scheme = 'https'
authority = domain_port[0]
if response.status == 304 and method == "GET":
# Rewrite the cache entry with the new end-to-end headers
# Take all headers that are in response
# and overwrite their values in info.
# unless they are hop-by-hop, or are listed in the connection header.
for key in _get_end2end_headers(response):
info[key] = response[key]
merged_response = Response(info)
if hasattr(response, "_stale_digest"):
merged_response._stale_digest = response._stale_digest
try:
_updateCache(headers, merged_response, content, self.cache, cachekey)
except:
print locals()
raise
response = merged_response
response.status = 200
response.fromcache = True
elif response.status == 200:
content = new_content
conn_key = scheme+":"+authority
if conn_key in self.connections:
conn = self.connections[conn_key]
else:
if not connection_type:
connection_type = (scheme == 'https') and HTTPSConnectionWithTimeout or HTTPConnectionWithTimeout
certs = list(self.certificates.iter(authority))
if scheme == 'https' and certs:
conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0],
cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info)
else:
conn = self.connections[conn_key] = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info)
conn.set_debuglevel(debuglevel)
if method in ["GET", "HEAD"] and 'range' not in headers and 'accept-encoding' not in headers:
headers['accept-encoding'] = 'deflate, gzip'
info = email.Message.Message()
cached_value = None
if self.cache:
cachekey = defrag_uri
cached_value = self.cache.get(cachekey)
if cached_value:
# info = email.message_from_string(cached_value)
#
# Need to replace the line above with the kludge below
# to fix the non-existent bug not fixed in this
# bug report: http://mail.python.org/pipermail/python-bugs-list/2005-September/030289.html
try:
info, content = cached_value.split('\r\n\r\n', 1)
feedparser = email.FeedParser.FeedParser()
feedparser.feed(info)
info = feedparser.close()
feedparser._parse = None
except IndexError:
self.cache.delete(cachekey)
cachekey = None
cached_value = None
else:
cachekey = None
if method in self.optimistic_concurrency_methods and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
# http://www.w3.org/1999/04/Editing/
headers['if-match'] = info['etag']
if method not in ["GET", "HEAD"] and self.cache and cachekey:
# RFC 2616 Section 13.10
self.cache.delete(cachekey)
content = new_content
else:
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
if info.has_key('-x-permanent-redirect-url'):
# Should cached permanent redirects be counted in our redirection count? For now, yes.
(response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
response.previous = Response(info)
response.previous.fromcache = True
else:
# Determine our course of action:
# Is the cached entry fresh or stale?
# Has the client requested a non-cached response?
#
# There seems to be three possible answers:
# 1. [FRESH] Return the cache entry w/o doing a GET
# 2. [STALE] Do the GET (but add in cache validators if available)
# 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
entry_disposition = _entry_disposition(info, headers)
if entry_disposition == "FRESH":
if not cached_value:
info['status'] = '504'
content = ""
response = Response(info)
if cached_value:
response.fromcache = True
return (response, content)
if entry_disposition == "STALE":
if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
headers['if-none-match'] = info['etag']
if info.has_key('last-modified') and not 'last-modified' in headers:
headers['if-modified-since'] = info['last-modified']
elif entry_disposition == "TRANSPARENT":
pass
(response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
if response.status == 304 and method == "GET":
# Rewrite the cache entry with the new end-to-end headers
# Take all headers that are in response
# and overwrite their values in info.
# unless they are hop-by-hop, or are listed in the connection header.
for key in _get_end2end_headers(response):
info[key] = response[key]
merged_response = Response(info)
if hasattr(response, "_stale_digest"):
merged_response._stale_digest = response._stale_digest
_updateCache(headers, merged_response, content, self.cache, cachekey)
response = merged_response
response.status = 200
response.fromcache = True
elif response.status == 200:
content = new_content
else:
self.cache.delete(cachekey)
content = new_content
else:
cc = _parse_cache_control(headers)
if cc.has_key('only-if-cached'):
info['status'] = '504'
response = Response(info)
content = ""
else:
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
except Exception, e:
if self.force_exception_to_status_code:
if isinstance(e, HttpLib2ErrorWithResponse):
response = e.response
content = e.content
response.status = 500
response.reason = str(e)
elif isinstance(e, socket.timeout):
content = "Request Timeout"
response = Response( {
"content-type": "text/plain",
"status": "408",
"content-length": len(content)
})
response.reason = "Request Timeout"
else:
content = str(e)
response = Response( {
"content-type": "text/plain",
"status": "400",
"content-length": len(content)
})
response.reason = "Bad Request"
else:
raise
return (response, content)
@ -898,7 +1152,7 @@ class Response(dict):
# an httplib.HTTPResponse object.
if isinstance(info, httplib.HTTPResponse):
for key, value in info.getheaders():
self[key] = value
self[key.lower()] = value
self.status = info.status
self['status'] = str(self.status)
self.reason = info.reason
@ -907,11 +1161,14 @@ class Response(dict):
for key, value in info.items():
self[key] = value
self.status = int(self['status'])
else:
for key, value in info.iteritems():
self[key] = value
self.status = int(self.get('status', self.status))
def __getattr__(self, name):
if name == 'dict':
return self
else:
raise AttributeError, name