Remove deprecation warnings (and update httplib2)
This commit is contained in:
parent
51e17650df
commit
63fa05e556
@ -13,13 +13,18 @@ well formed XHTML.
|
||||
Todo:
|
||||
* extension elements
|
||||
"""
|
||||
import re, time, md5, sgmllib
|
||||
import re, time, sgmllib
|
||||
from xml.sax.saxutils import escape
|
||||
from xml.dom import minidom, Node
|
||||
from html5lib import liberalxmlparser
|
||||
from html5lib.treebuilders import dom
|
||||
import planet, config
|
||||
|
||||
try:
|
||||
from hashlib import md5
|
||||
except:
|
||||
from md5 import new as md5
|
||||
|
||||
illegal_xml_chars = re.compile("[\x01-\x08\x0B\x0C\x0E-\x1F]")
|
||||
|
||||
def createTextElement(parent, name, value):
|
||||
@ -68,14 +73,14 @@ def id(xentry, entry):
|
||||
entry_id = entry.link
|
||||
elif entry.has_key("title") and entry.title:
|
||||
entry_id = (entry.title_detail.base + "/" +
|
||||
md5.new(entry.title).hexdigest())
|
||||
md5(entry.title).hexdigest())
|
||||
elif entry.has_key("summary") and entry.summary:
|
||||
entry_id = (entry.summary_detail.base + "/" +
|
||||
md5.new(entry.summary).hexdigest())
|
||||
md5(entry.summary).hexdigest())
|
||||
elif entry.has_key("content") and entry.content:
|
||||
|
||||
entry_id = (entry.content[0].base + "/" +
|
||||
md5.new(entry.content[0].value).hexdigest())
|
||||
md5(entry.content[0].value).hexdigest())
|
||||
else:
|
||||
return
|
||||
|
||||
|
@ -10,6 +10,11 @@ from xml.dom import minidom
|
||||
import planet, config, feedparser, reconstitute, shell, socket, scrub
|
||||
from StringIO import StringIO
|
||||
|
||||
try:
|
||||
from hashlib import md5
|
||||
except:
|
||||
from md5 import new as md5
|
||||
|
||||
# Regular expressions to sanitise cache filenames
|
||||
re_url_scheme = re.compile(r'^\w+:/*(\w+:|www\.)?')
|
||||
re_slash = re.compile(r'[?/:|]+')
|
||||
@ -44,9 +49,8 @@ def filename(directory, filename):
|
||||
parts=filename.split(',')
|
||||
for i in range(len(parts),0,-1):
|
||||
if len(','.join(parts[:i])) < 220:
|
||||
import md5
|
||||
filename = ','.join(parts[:i]) + ',' + \
|
||||
md5.new(','.join(parts[i:])).hexdigest()
|
||||
md5(','.join(parts[i:])).hexdigest()
|
||||
break
|
||||
|
||||
return os.path.join(directory, filename)
|
||||
@ -277,7 +281,7 @@ def writeCache(feed_uri, feed_info, data):
|
||||
xdoc.unlink()
|
||||
|
||||
def httpThread(thread_index, input_queue, output_queue, log):
|
||||
import httplib2, md5
|
||||
import httplib2
|
||||
from httplib import BadStatusLine
|
||||
|
||||
h = httplib2.Http(config.http_cache_directory())
|
||||
@ -312,7 +316,7 @@ def httpThread(thread_index, input_queue, output_queue, log):
|
||||
(resp, content) = h.request(idna, 'GET', headers=headers)
|
||||
|
||||
# unchanged detection
|
||||
resp['-content-hash'] = md5.new(content or '').hexdigest()
|
||||
resp['-content-hash'] = md5(content or '').hexdigest()
|
||||
if resp.status == 200:
|
||||
if resp.fromcache:
|
||||
resp.status = 304
|
||||
|
589
planet/vendor/httplib2/__init__.py
vendored
589
planet/vendor/httplib2/__init__.py
vendored
@ -7,6 +7,9 @@ to conserve bandwidth.
|
||||
|
||||
Requires Python 2.3 or later
|
||||
|
||||
Changelog:
|
||||
2007-08-18, Rick: Modified so it's able to use a socks proxy if needed.
|
||||
|
||||
"""
|
||||
|
||||
__author__ = "Joe Gregorio (joe@bitworking.org)"
|
||||
@ -19,14 +22,14 @@ __contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
|
||||
"Sam Ruby",
|
||||
"Louis Nyffenegger"]
|
||||
__license__ = "MIT"
|
||||
__version__ = "$Rev: 227 $"
|
||||
__version__ = "$Rev$"
|
||||
|
||||
import re
|
||||
import sys
|
||||
import md5
|
||||
import email
|
||||
import email.Utils
|
||||
import email.Message
|
||||
import email.FeedParser
|
||||
import StringIO
|
||||
import gzip
|
||||
import zlib
|
||||
@ -38,10 +41,32 @@ import copy
|
||||
import calendar
|
||||
import time
|
||||
import random
|
||||
import sha
|
||||
# remove depracated warning in python2.6
|
||||
try:
|
||||
from hashlib import sha1 as _sha, md5 as _md5
|
||||
except ImportError:
|
||||
import sha
|
||||
import md5
|
||||
_sha = sha.new
|
||||
_md5 = md5.new
|
||||
import hmac
|
||||
from gettext import gettext as _
|
||||
from socket import gaierror
|
||||
import socket
|
||||
|
||||
try:
|
||||
import socks
|
||||
except ImportError:
|
||||
socks = None
|
||||
|
||||
# Build the appropriate socket wrapper for ssl
|
||||
try:
|
||||
import ssl # python 2.6
|
||||
_ssl_wrap_socket = ssl.wrap_socket
|
||||
except ImportError:
|
||||
def _ssl_wrap_socket(sock, key_file, cert_file):
|
||||
ssl_sock = socket.ssl(sock, key_file, cert_file)
|
||||
return httplib.FakeSocket(sock, ssl_sock)
|
||||
|
||||
|
||||
if sys.version_info >= (2,3):
|
||||
from iri2uri import iri2uri
|
||||
@ -49,7 +74,12 @@ else:
|
||||
def iri2uri(uri):
|
||||
return uri
|
||||
|
||||
__all__ = ['Http', 'Response', 'HttpLib2Error',
|
||||
def has_timeout(timeout): # python 2.6
|
||||
if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'):
|
||||
return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT)
|
||||
return (timeout is not None)
|
||||
|
||||
__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error',
|
||||
'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
|
||||
'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',
|
||||
'debuglevel']
|
||||
@ -58,6 +88,7 @@ __all__ = ['Http', 'Response', 'HttpLib2Error',
|
||||
# The httplib debug level, set to a non-zero value to get debug output
|
||||
debuglevel = 0
|
||||
|
||||
|
||||
# Python 2.3 support
|
||||
if sys.version_info < (2,4):
|
||||
def sorted(seq):
|
||||
@ -77,11 +108,20 @@ if not hasattr(httplib.HTTPResponse, 'getheaders'):
|
||||
# All exceptions raised here derive from HttpLib2Error
|
||||
class HttpLib2Error(Exception): pass
|
||||
|
||||
class RedirectMissingLocation(HttpLib2Error): pass
|
||||
class RedirectLimit(HttpLib2Error): pass
|
||||
class FailedToDecompressContent(HttpLib2Error): pass
|
||||
class UnimplementedDigestAuthOptionError(HttpLib2Error): pass
|
||||
class UnimplementedHmacDigestAuthOptionError(HttpLib2Error): pass
|
||||
# Some exceptions can be caught and optionally
|
||||
# be turned back into responses.
|
||||
class HttpLib2ErrorWithResponse(HttpLib2Error):
|
||||
def __init__(self, desc, response, content):
|
||||
self.response = response
|
||||
self.content = content
|
||||
HttpLib2Error.__init__(self, desc)
|
||||
|
||||
class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass
|
||||
class RedirectLimit(HttpLib2ErrorWithResponse): pass
|
||||
class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass
|
||||
class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
|
||||
class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
|
||||
|
||||
class RelativeURIError(HttpLib2Error): pass
|
||||
class ServerNotFoundError(HttpLib2Error): pass
|
||||
|
||||
@ -159,11 +199,11 @@ def safename(filename):
|
||||
filename = filename.encode('idna')
|
||||
else:
|
||||
filename = filename.encode('idna')
|
||||
except:
|
||||
except UnicodeError:
|
||||
pass
|
||||
if isinstance(filename,unicode):
|
||||
filename=filename.encode('utf-8')
|
||||
filemd5 = md5.new(filename).hexdigest()
|
||||
filemd5 = _md5(filename).hexdigest()
|
||||
filename = re_url_scheme.sub("", filename)
|
||||
filename = re_slash.sub(",", filename)
|
||||
|
||||
@ -180,8 +220,8 @@ def _parse_cache_control(headers):
|
||||
retval = {}
|
||||
if headers.has_key('cache-control'):
|
||||
parts = headers['cache-control'].split(',')
|
||||
parts_with_args = [tuple([x.strip() for x in part.split("=")]) for part in parts if -1 != part.find("=")]
|
||||
parts_wo_args = [(name.strip(), 1) for name in parts if -1 == name.find("=")]
|
||||
parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")]
|
||||
parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]
|
||||
retval = dict(parts_with_args + parts_wo_args)
|
||||
return retval
|
||||
|
||||
@ -275,22 +315,25 @@ def _entry_disposition(response_headers, request_headers):
|
||||
if cc_response.has_key('max-age'):
|
||||
try:
|
||||
freshness_lifetime = int(cc_response['max-age'])
|
||||
except:
|
||||
except ValueError:
|
||||
freshness_lifetime = 0
|
||||
elif response_headers.has_key('expires'):
|
||||
expires = email.Utils.parsedate_tz(response_headers['expires'])
|
||||
freshness_lifetime = max(0, calendar.timegm(expires) - date)
|
||||
if None == expires:
|
||||
freshness_lifetime = 0
|
||||
else:
|
||||
freshness_lifetime = max(0, calendar.timegm(expires) - date)
|
||||
else:
|
||||
freshness_lifetime = 0
|
||||
if cc.has_key('max-age'):
|
||||
try:
|
||||
freshness_lifetime = int(cc['max-age'])
|
||||
except:
|
||||
except ValueError:
|
||||
freshness_lifetime = 0
|
||||
if cc.has_key('min-fresh'):
|
||||
try:
|
||||
min_fresh = int(cc['min-fresh'])
|
||||
except:
|
||||
except ValueError:
|
||||
min_fresh = 0
|
||||
current_age += min_fresh
|
||||
if freshness_lifetime > current_age:
|
||||
@ -307,10 +350,12 @@ def _decompressContent(response, new_content):
|
||||
if encoding == 'deflate':
|
||||
content = zlib.decompress(content)
|
||||
response['content-length'] = str(len(content))
|
||||
# Record the historical presence of the encoding in a way the won't interfere.
|
||||
response['-content-encoding'] = response['content-encoding']
|
||||
del response['content-encoding']
|
||||
except:
|
||||
except IOError:
|
||||
content = ""
|
||||
raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'))
|
||||
raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content)
|
||||
return content
|
||||
|
||||
def _updateCache(request_headers, response_headers, content, cache, cachekey):
|
||||
@ -339,11 +384,11 @@ def _updateCache(request_headers, response_headers, content, cache, cachekey):
|
||||
cache.set(cachekey, text)
|
||||
|
||||
def _cnonce():
|
||||
dig = md5.new("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
|
||||
dig = _md5("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
|
||||
return dig[:16]
|
||||
|
||||
def _wsse_username_token(cnonce, iso_now, password):
|
||||
return base64.encodestring(sha.new("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
|
||||
return base64.encodestring(_sha("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
|
||||
|
||||
|
||||
# For credentials we need two things, first
|
||||
@ -354,7 +399,7 @@ def _wsse_username_token(cnonce, iso_now, password):
|
||||
# So we also need each Auth instance to be able to tell us
|
||||
# how close to the 'top' it is.
|
||||
|
||||
class Authentication:
|
||||
class Authentication(object):
|
||||
def __init__(self, credentials, host, request_uri, headers, response, content, http):
|
||||
(scheme, authority, path, query, fragment) = parse_uri(request_uri)
|
||||
self.path = path
|
||||
@ -405,11 +450,11 @@ class DigestAuthentication(Authentication):
|
||||
Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
|
||||
challenge = _parse_www_authenticate(response, 'www-authenticate')
|
||||
self.challenge = challenge['digest']
|
||||
qop = self.challenge.get('qop')
|
||||
qop = self.challenge.get('qop', 'auth')
|
||||
self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
|
||||
if self.challenge['qop'] is None:
|
||||
raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop))
|
||||
self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5')
|
||||
self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upper()
|
||||
if self.challenge['algorithm'] != 'MD5':
|
||||
raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
|
||||
self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])
|
||||
@ -417,7 +462,7 @@ class DigestAuthentication(Authentication):
|
||||
|
||||
def request(self, method, request_uri, headers, content, cnonce = None):
|
||||
"""Modify the request headers"""
|
||||
H = lambda x: md5.new(x).hexdigest()
|
||||
H = lambda x: _md5(x).hexdigest()
|
||||
KD = lambda s, d: H("%s:%s" % (s, d))
|
||||
A2 = "".join([method, ":", request_uri])
|
||||
self.challenge['cnonce'] = cnonce or _cnonce()
|
||||
@ -477,13 +522,13 @@ class HmacDigestAuthentication(Authentication):
|
||||
if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:
|
||||
raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))
|
||||
if self.challenge['algorithm'] == 'HMAC-MD5':
|
||||
self.hashmod = md5
|
||||
self.hashmod = _md5
|
||||
else:
|
||||
self.hashmod = sha
|
||||
self.hashmod = _sha
|
||||
if self.challenge['pw-algorithm'] == 'MD5':
|
||||
self.pwhashmod = md5
|
||||
self.pwhashmod = _md5
|
||||
else:
|
||||
self.pwhashmod = sha
|
||||
self.pwhashmod = _sha
|
||||
self.key = "".join([self.credentials[0], ":",
|
||||
self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
|
||||
":", self.challenge['realm']
|
||||
@ -545,8 +590,17 @@ class GoogleLoginAuthentication(Authentication):
|
||||
def __init__(self, credentials, host, request_uri, headers, response, content, http):
|
||||
from urllib import urlencode
|
||||
Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
|
||||
challenge = _parse_www_authenticate(response, 'www-authenticate')
|
||||
service = challenge['googlelogin'].get('service', 'xapi')
|
||||
# Bloggger actually returns the service in the challenge
|
||||
# For the rest we guess based on the URI
|
||||
if service == 'xapi' and request_uri.find("calendar") > 0:
|
||||
service = "cl"
|
||||
# No point in guessing Base or Spreadsheet
|
||||
#elif request_uri.find("spreadsheets") > 0:
|
||||
# service = "wise"
|
||||
|
||||
auth = dict(Email=credentials[0], Passwd=credentials[1], service='cl', source=headers['user-agent'])
|
||||
auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])
|
||||
resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
lines = content.split('\n')
|
||||
d = dict([tuple(line.split("=", 1)) for line in lines if line])
|
||||
@ -571,10 +625,7 @@ AUTH_SCHEME_CLASSES = {
|
||||
|
||||
AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
|
||||
|
||||
def _md5(s):
|
||||
return
|
||||
|
||||
class FileCache:
|
||||
class FileCache(object):
|
||||
"""Uses a local directory as a store for cached files.
|
||||
Not really safe to use if multiple threads or processes are going to
|
||||
be running on the same cache.
|
||||
@ -589,16 +640,16 @@ class FileCache:
|
||||
retval = None
|
||||
cacheFullPath = os.path.join(self.cache, self.safe(key))
|
||||
try:
|
||||
f = file(cacheFullPath, "r")
|
||||
f = file(cacheFullPath, "rb")
|
||||
retval = f.read()
|
||||
f.close()
|
||||
except:
|
||||
except IOError:
|
||||
pass
|
||||
return retval
|
||||
|
||||
def set(self, key, value):
|
||||
cacheFullPath = os.path.join(self.cache, self.safe(key))
|
||||
f = file(cacheFullPath, "w")
|
||||
f = file(cacheFullPath, "wb")
|
||||
f.write(value)
|
||||
f.close()
|
||||
|
||||
@ -607,12 +658,131 @@ class FileCache:
|
||||
if os.path.exists(cacheFullPath):
|
||||
os.remove(cacheFullPath)
|
||||
|
||||
class Http:
|
||||
"""An HTTP client that handles all
|
||||
methods, caching, ETags, compression,
|
||||
HTTPS, Basic, Digest, WSSE, etc.
|
||||
class Credentials(object):
|
||||
def __init__(self):
|
||||
self.credentials = []
|
||||
|
||||
def add(self, name, password, domain=""):
|
||||
self.credentials.append((domain.lower(), name, password))
|
||||
|
||||
def clear(self):
|
||||
self.credentials = []
|
||||
|
||||
def iter(self, domain):
|
||||
for (cdomain, name, password) in self.credentials:
|
||||
if cdomain == "" or domain == cdomain:
|
||||
yield (name, password)
|
||||
|
||||
class KeyCerts(Credentials):
|
||||
"""Identical to Credentials except that
|
||||
name/password are mapped to key/cert."""
|
||||
pass
|
||||
|
||||
|
||||
class ProxyInfo(object):
|
||||
"""Collect information required to use a proxy."""
|
||||
def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None):
|
||||
"""The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX
|
||||
constants. For example:
|
||||
|
||||
p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000)
|
||||
"""
|
||||
self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass
|
||||
|
||||
def astuple(self):
|
||||
return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns,
|
||||
self.proxy_user, self.proxy_pass)
|
||||
|
||||
def isgood(self):
|
||||
return socks and (self.proxy_host != None) and (self.proxy_port != None)
|
||||
|
||||
|
||||
class HTTPConnectionWithTimeout(httplib.HTTPConnection):
|
||||
"""HTTPConnection subclass that supports timeouts"""
|
||||
|
||||
def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None):
|
||||
httplib.HTTPConnection.__init__(self, host, port, strict)
|
||||
self.timeout = timeout
|
||||
self.proxy_info = proxy_info
|
||||
|
||||
def connect(self):
|
||||
"""Connect to the host and port specified in __init__."""
|
||||
# Mostly verbatim from httplib.py.
|
||||
msg = "getaddrinfo returns an empty list"
|
||||
for res in socket.getaddrinfo(self.host, self.port, 0,
|
||||
socket.SOCK_STREAM):
|
||||
af, socktype, proto, canonname, sa = res
|
||||
try:
|
||||
if self.proxy_info and self.proxy_info.isgood():
|
||||
self.sock = socks.socksocket(af, socktype, proto)
|
||||
self.sock.setproxy(*self.proxy_info.astuple())
|
||||
else:
|
||||
self.sock = socket.socket(af, socktype, proto)
|
||||
# Different from httplib: support timeouts.
|
||||
if has_timeout(self.timeout):
|
||||
self.sock.settimeout(self.timeout)
|
||||
# End of difference from httplib.
|
||||
if self.debuglevel > 0:
|
||||
print "connect: (%s, %s)" % (self.host, self.port)
|
||||
|
||||
self.sock.connect(sa)
|
||||
except socket.error, msg:
|
||||
if self.debuglevel > 0:
|
||||
print 'connect fail:', (self.host, self.port)
|
||||
if self.sock:
|
||||
self.sock.close()
|
||||
self.sock = None
|
||||
continue
|
||||
break
|
||||
if not self.sock:
|
||||
raise socket.error, msg
|
||||
|
||||
class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):
|
||||
"This class allows communication via SSL."
|
||||
|
||||
def __init__(self, host, port=None, key_file=None, cert_file=None,
|
||||
strict=None, timeout=None, proxy_info=None):
|
||||
httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file,
|
||||
cert_file=cert_file, strict=strict)
|
||||
self.timeout = timeout
|
||||
self.proxy_info = proxy_info
|
||||
|
||||
def connect(self):
|
||||
"Connect to a host on a given (SSL) port."
|
||||
|
||||
if self.proxy_info and self.proxy_info.isgood():
|
||||
sock = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
sock.setproxy(*self.proxy_info.astuple())
|
||||
else:
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
|
||||
if has_timeout(self.timeout):
|
||||
sock.settimeout(self.timeout)
|
||||
sock.connect((self.host, self.port))
|
||||
self.sock =_ssl_wrap_socket(sock, self.key_file, self.cert_file)
|
||||
|
||||
|
||||
|
||||
class Http(object):
|
||||
"""An HTTP client that handles:
|
||||
- all methods
|
||||
- caching
|
||||
- ETags
|
||||
- compression,
|
||||
- HTTPS
|
||||
- Basic
|
||||
- Digest
|
||||
- WSSE
|
||||
|
||||
and more.
|
||||
"""
|
||||
def __init__(self, cache=None):
|
||||
def __init__(self, cache=None, timeout=None, proxy_info=None):
|
||||
"""The value of proxy_info is a ProxyInfo instance.
|
||||
|
||||
If 'cache' is a string then it is used as a directory name
|
||||
for a disk cache. Otherwise it must be an object that supports
|
||||
the same interface as FileCache."""
|
||||
self.proxy_info = proxy_info
|
||||
# Map domain name to an httplib connection
|
||||
self.connections = {}
|
||||
# The location of the cache, for now a directory
|
||||
@ -622,45 +792,72 @@ class Http:
|
||||
else:
|
||||
self.cache = cache
|
||||
|
||||
# tuples of name, password
|
||||
self.credentials = []
|
||||
# Name/password
|
||||
self.credentials = Credentials()
|
||||
|
||||
# Key/cert
|
||||
self.certificates = KeyCerts()
|
||||
|
||||
# authorization objects
|
||||
self.authorizations = []
|
||||
|
||||
# If set to False then no redirects are followed, even safe ones.
|
||||
self.follow_redirects = True
|
||||
|
||||
# Which HTTP methods do we apply optimistic concurrency to, i.e.
|
||||
# which methods get an "if-match:" etag header added to them.
|
||||
self.optimistic_concurrency_methods = ["PUT"]
|
||||
|
||||
# If 'follow_redirects' is True, and this is set to True then
|
||||
# all redirecs are followed, including unsafe ones.
|
||||
self.follow_all_redirects = False
|
||||
|
||||
self.ignore_etag = False
|
||||
|
||||
self.force_exception_to_status_code = False
|
||||
|
||||
self.timeout = timeout
|
||||
|
||||
def _auth_from_challenge(self, host, request_uri, headers, response, content):
|
||||
"""A generator that creates Authorization objects
|
||||
that can be applied to requests.
|
||||
"""
|
||||
challenges = _parse_www_authenticate(response, 'www-authenticate')
|
||||
for cred in self.credentials:
|
||||
for cred in self.credentials.iter(host):
|
||||
for scheme in AUTH_SCHEME_ORDER:
|
||||
if challenges.has_key(scheme):
|
||||
yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
|
||||
|
||||
def add_credentials(self, name, password):
|
||||
def add_credentials(self, name, password, domain=""):
|
||||
"""Add a name and password that will be used
|
||||
any time a request requires authentication."""
|
||||
self.credentials.append((name, password))
|
||||
self.credentials.add(name, password, domain)
|
||||
|
||||
def add_certificate(self, key, cert, domain):
|
||||
"""Add a key and cert that will be used
|
||||
any time a request requires authentication."""
|
||||
self.certificates.add(key, cert, domain)
|
||||
|
||||
def clear_credentials(self):
|
||||
"""Remove all the names and passwords
|
||||
that are used for authentication"""
|
||||
self.credentials = []
|
||||
self.credentials.clear()
|
||||
self.authorizations = []
|
||||
|
||||
def _conn_request(self, conn, request_uri, method, body, headers):
|
||||
for i in range(2):
|
||||
try:
|
||||
conn.request(method, request_uri, body, headers)
|
||||
except socket.gaierror:
|
||||
conn.close()
|
||||
raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
|
||||
except (socket.error, httplib.HTTPException):
|
||||
# Just because the server closed the connection doesn't apparently mean
|
||||
# that the server didn't send a response.
|
||||
pass
|
||||
try:
|
||||
response = conn.getresponse()
|
||||
except gaierror:
|
||||
raise ServerNotFoundError("Unable to find the server at %s" % request_uri)
|
||||
except:
|
||||
except (socket.error, httplib.HTTPException):
|
||||
if i == 0:
|
||||
conn.close()
|
||||
conn.connect()
|
||||
@ -668,11 +865,13 @@ class Http:
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
content = response.read()
|
||||
content = ""
|
||||
if method != "HEAD":
|
||||
content = response.read()
|
||||
response = Response(response)
|
||||
content = _decompressContent(response, content)
|
||||
|
||||
break;
|
||||
if method != "HEAD":
|
||||
content = _decompressContent(response, content)
|
||||
break
|
||||
return (response, content)
|
||||
|
||||
|
||||
@ -702,13 +901,13 @@ class Http:
|
||||
authorization.response(response, body)
|
||||
break
|
||||
|
||||
if (self.follow_all_redirects or method in ["GET", "HEAD"]) or response.status == 303:
|
||||
if response.status in [300, 301, 302, 303, 307]:
|
||||
if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303):
|
||||
if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
|
||||
# Pick out the location header and basically start from the beginning
|
||||
# remembering first to strip the ETag header and decrement our 'depth'
|
||||
if redirections:
|
||||
if not response.has_key('location') and response.status != 300:
|
||||
raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."))
|
||||
raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content)
|
||||
# Fix-up relative redirects (which violate an RFC 2616 MUST)
|
||||
if response.has_key('location'):
|
||||
location = response['location']
|
||||
@ -733,7 +932,7 @@ class Http:
|
||||
(response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1)
|
||||
response.previous = old_response
|
||||
else:
|
||||
raise RedirectLimit( _("Redirected more times than rediection_limit allows."))
|
||||
raise RedirectLimit( _("Redirected more times than rediection_limit allows."), response, content)
|
||||
elif response.status in [200, 203] and method == "GET":
|
||||
# Don't cache 206's since we aren't going to handle byte range requests
|
||||
if not response.has_key('content-location'):
|
||||
@ -742,7 +941,13 @@ class Http:
|
||||
|
||||
return (response, content)
|
||||
|
||||
def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS):
|
||||
|
||||
# Need to catch and rebrand some exceptions
|
||||
# Then need to optionally turn all exceptions into status codes
|
||||
# including all socket.* and httplib.* exceptions.
|
||||
|
||||
|
||||
def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None):
|
||||
""" Performs a single HTTP request.
|
||||
The 'uri' is the URI of the HTTP resource and can begin
|
||||
with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
|
||||
@ -763,115 +968,164 @@ The return value is a tuple of (response, content), the first
|
||||
being and instance of the 'Response' class, the second being
|
||||
a string that contains the response entity body.
|
||||
"""
|
||||
if headers is None:
|
||||
headers = {}
|
||||
else:
|
||||
headers = _normalize_headers(headers)
|
||||
|
||||
if not headers.has_key('user-agent'):
|
||||
headers['user-agent'] = "Python-httplib2/%s" % __version__
|
||||
|
||||
uri = iri2uri(uri)
|
||||
|
||||
(scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
|
||||
|
||||
if not self.connections.has_key(scheme+":"+authority):
|
||||
connection_type = (scheme == 'https') and httplib.HTTPSConnection or httplib.HTTPConnection
|
||||
conn = self.connections[scheme+":"+authority] = connection_type(authority)
|
||||
conn.set_debuglevel(debuglevel)
|
||||
else:
|
||||
conn = self.connections[scheme+":"+authority]
|
||||
|
||||
if method in ["GET", "HEAD"] and 'range' not in headers:
|
||||
headers['accept-encoding'] = 'compress, gzip'
|
||||
|
||||
info = email.Message.Message()
|
||||
cached_value = None
|
||||
if self.cache:
|
||||
cachekey = defrag_uri
|
||||
cached_value = self.cache.get(cachekey)
|
||||
if cached_value:
|
||||
try:
|
||||
info = email.message_from_string(cached_value)
|
||||
content = cached_value.split('\r\n\r\n', 1)[1]
|
||||
except Exception, e:
|
||||
self.cache.delete(cachekey)
|
||||
cachekey = None
|
||||
cached_value = None
|
||||
else:
|
||||
cachekey = None
|
||||
|
||||
if method in ["PUT"] and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
|
||||
# http://www.w3.org/1999/04/Editing/
|
||||
headers['if-match'] = info['etag']
|
||||
|
||||
if method not in ["GET", "HEAD"] and self.cache and cachekey:
|
||||
# RFC 2616 Section 13.10
|
||||
self.cache.delete(cachekey)
|
||||
|
||||
if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
|
||||
if info.has_key('-x-permanent-redirect-url'):
|
||||
# Should cached permanent redirects be counted in our redirection count? For now, yes.
|
||||
(response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
|
||||
response.previous = Response(info)
|
||||
response.previous.fromcache = True
|
||||
try:
|
||||
if headers is None:
|
||||
headers = {}
|
||||
else:
|
||||
# Determine our course of action:
|
||||
# Is the cached entry fresh or stale?
|
||||
# Has the client requested a non-cached response?
|
||||
#
|
||||
# There seems to be three possible answers:
|
||||
# 1. [FRESH] Return the cache entry w/o doing a GET
|
||||
# 2. [STALE] Do the GET (but add in cache validators if available)
|
||||
# 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
|
||||
entry_disposition = _entry_disposition(info, headers)
|
||||
headers = _normalize_headers(headers)
|
||||
|
||||
if entry_disposition == "FRESH":
|
||||
if not cached_value:
|
||||
info['status'] = '504'
|
||||
content = ""
|
||||
response = Response(info)
|
||||
if cached_value:
|
||||
response.fromcache = True
|
||||
return (response, content)
|
||||
if not headers.has_key('user-agent'):
|
||||
headers['user-agent'] = "Python-httplib2/%s" % __version__
|
||||
|
||||
if entry_disposition == "STALE":
|
||||
if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
|
||||
headers['if-none-match'] = info['etag']
|
||||
if info.has_key('last-modified') and not 'last-modified' in headers:
|
||||
headers['if-modified-since'] = info['last-modified']
|
||||
elif entry_disposition == "TRANSPARENT":
|
||||
pass
|
||||
uri = iri2uri(uri)
|
||||
|
||||
(response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
|
||||
(scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
|
||||
domain_port = authority.split(":")[0:2]
|
||||
if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http':
|
||||
scheme = 'https'
|
||||
authority = domain_port[0]
|
||||
|
||||
if response.status == 304 and method == "GET":
|
||||
# Rewrite the cache entry with the new end-to-end headers
|
||||
# Take all headers that are in response
|
||||
# and overwrite their values in info.
|
||||
# unless they are hop-by-hop, or are listed in the connection header.
|
||||
|
||||
for key in _get_end2end_headers(response):
|
||||
info[key] = response[key]
|
||||
merged_response = Response(info)
|
||||
if hasattr(response, "_stale_digest"):
|
||||
merged_response._stale_digest = response._stale_digest
|
||||
try:
|
||||
_updateCache(headers, merged_response, content, self.cache, cachekey)
|
||||
except:
|
||||
print locals()
|
||||
raise
|
||||
response = merged_response
|
||||
response.status = 200
|
||||
response.fromcache = True
|
||||
|
||||
elif response.status == 200:
|
||||
content = new_content
|
||||
conn_key = scheme+":"+authority
|
||||
if conn_key in self.connections:
|
||||
conn = self.connections[conn_key]
|
||||
else:
|
||||
if not connection_type:
|
||||
connection_type = (scheme == 'https') and HTTPSConnectionWithTimeout or HTTPConnectionWithTimeout
|
||||
certs = list(self.certificates.iter(authority))
|
||||
if scheme == 'https' and certs:
|
||||
conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0],
|
||||
cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info)
|
||||
else:
|
||||
conn = self.connections[conn_key] = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info)
|
||||
conn.set_debuglevel(debuglevel)
|
||||
|
||||
if method in ["GET", "HEAD"] and 'range' not in headers and 'accept-encoding' not in headers:
|
||||
headers['accept-encoding'] = 'deflate, gzip'
|
||||
|
||||
info = email.Message.Message()
|
||||
cached_value = None
|
||||
if self.cache:
|
||||
cachekey = defrag_uri
|
||||
cached_value = self.cache.get(cachekey)
|
||||
if cached_value:
|
||||
# info = email.message_from_string(cached_value)
|
||||
#
|
||||
# Need to replace the line above with the kludge below
|
||||
# to fix the non-existent bug not fixed in this
|
||||
# bug report: http://mail.python.org/pipermail/python-bugs-list/2005-September/030289.html
|
||||
try:
|
||||
info, content = cached_value.split('\r\n\r\n', 1)
|
||||
feedparser = email.FeedParser.FeedParser()
|
||||
feedparser.feed(info)
|
||||
info = feedparser.close()
|
||||
feedparser._parse = None
|
||||
except IndexError:
|
||||
self.cache.delete(cachekey)
|
||||
cachekey = None
|
||||
cached_value = None
|
||||
else:
|
||||
cachekey = None
|
||||
|
||||
if method in self.optimistic_concurrency_methods and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
|
||||
# http://www.w3.org/1999/04/Editing/
|
||||
headers['if-match'] = info['etag']
|
||||
|
||||
if method not in ["GET", "HEAD"] and self.cache and cachekey:
|
||||
# RFC 2616 Section 13.10
|
||||
self.cache.delete(cachekey)
|
||||
content = new_content
|
||||
else:
|
||||
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
|
||||
|
||||
if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
|
||||
if info.has_key('-x-permanent-redirect-url'):
|
||||
# Should cached permanent redirects be counted in our redirection count? For now, yes.
|
||||
(response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
|
||||
response.previous = Response(info)
|
||||
response.previous.fromcache = True
|
||||
else:
|
||||
# Determine our course of action:
|
||||
# Is the cached entry fresh or stale?
|
||||
# Has the client requested a non-cached response?
|
||||
#
|
||||
# There seems to be three possible answers:
|
||||
# 1. [FRESH] Return the cache entry w/o doing a GET
|
||||
# 2. [STALE] Do the GET (but add in cache validators if available)
|
||||
# 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
|
||||
entry_disposition = _entry_disposition(info, headers)
|
||||
|
||||
if entry_disposition == "FRESH":
|
||||
if not cached_value:
|
||||
info['status'] = '504'
|
||||
content = ""
|
||||
response = Response(info)
|
||||
if cached_value:
|
||||
response.fromcache = True
|
||||
return (response, content)
|
||||
|
||||
if entry_disposition == "STALE":
|
||||
if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
|
||||
headers['if-none-match'] = info['etag']
|
||||
if info.has_key('last-modified') and not 'last-modified' in headers:
|
||||
headers['if-modified-since'] = info['last-modified']
|
||||
elif entry_disposition == "TRANSPARENT":
|
||||
pass
|
||||
|
||||
(response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
|
||||
|
||||
if response.status == 304 and method == "GET":
|
||||
# Rewrite the cache entry with the new end-to-end headers
|
||||
# Take all headers that are in response
|
||||
# and overwrite their values in info.
|
||||
# unless they are hop-by-hop, or are listed in the connection header.
|
||||
|
||||
for key in _get_end2end_headers(response):
|
||||
info[key] = response[key]
|
||||
merged_response = Response(info)
|
||||
if hasattr(response, "_stale_digest"):
|
||||
merged_response._stale_digest = response._stale_digest
|
||||
_updateCache(headers, merged_response, content, self.cache, cachekey)
|
||||
response = merged_response
|
||||
response.status = 200
|
||||
response.fromcache = True
|
||||
|
||||
elif response.status == 200:
|
||||
content = new_content
|
||||
else:
|
||||
self.cache.delete(cachekey)
|
||||
content = new_content
|
||||
else:
|
||||
cc = _parse_cache_control(headers)
|
||||
if cc.has_key('only-if-cached'):
|
||||
info['status'] = '504'
|
||||
response = Response(info)
|
||||
content = ""
|
||||
else:
|
||||
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
|
||||
except Exception, e:
|
||||
if self.force_exception_to_status_code:
|
||||
if isinstance(e, HttpLib2ErrorWithResponse):
|
||||
response = e.response
|
||||
content = e.content
|
||||
response.status = 500
|
||||
response.reason = str(e)
|
||||
elif isinstance(e, socket.timeout):
|
||||
content = "Request Timeout"
|
||||
response = Response( {
|
||||
"content-type": "text/plain",
|
||||
"status": "408",
|
||||
"content-length": len(content)
|
||||
})
|
||||
response.reason = "Request Timeout"
|
||||
else:
|
||||
content = str(e)
|
||||
response = Response( {
|
||||
"content-type": "text/plain",
|
||||
"status": "400",
|
||||
"content-length": len(content)
|
||||
})
|
||||
response.reason = "Bad Request"
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
return (response, content)
|
||||
|
||||
|
||||
@ -898,7 +1152,7 @@ class Response(dict):
|
||||
# an httplib.HTTPResponse object.
|
||||
if isinstance(info, httplib.HTTPResponse):
|
||||
for key, value in info.getheaders():
|
||||
self[key] = value
|
||||
self[key.lower()] = value
|
||||
self.status = info.status
|
||||
self['status'] = str(self.status)
|
||||
self.reason = info.reason
|
||||
@ -907,11 +1161,14 @@ class Response(dict):
|
||||
for key, value in info.items():
|
||||
self[key] = value
|
||||
self.status = int(self['status'])
|
||||
else:
|
||||
for key, value in info.iteritems():
|
||||
self[key] = value
|
||||
self.status = int(self.get('status', self.status))
|
||||
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name == 'dict':
|
||||
return self
|
||||
else:
|
||||
raise AttributeError, name
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user