Pull in the latest httplib2

This commit is contained in:
Sam Ruby 2007-01-23 20:09:58 -05:00
parent 77d15d22cf
commit 32a1c49090
3 changed files with 154 additions and 25 deletions

View File

@ -16,11 +16,13 @@ __contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
"Xavier Verges Farrero",
"Jonathan Feinberg",
"Blair Zajac",
"Sam Ruby"]
"Sam Ruby",
"Louis Nyffenegger"]
__license__ = "MIT"
__version__ = "$Rev: 217 $"
__version__ = "$Rev: 227 $"
import re
import sys
import md5
import email
import email.Utils
@ -41,6 +43,12 @@ import hmac
from gettext import gettext as _
from socket import gaierror
if sys.version_info >= (2,3):
from iri2uri import iri2uri
else:
def iri2uri(uri):
return uri
__all__ = ['Http', 'Response', 'HttpLib2Error',
'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',
@ -51,7 +59,7 @@ __all__ = ['Http', 'Response', 'HttpLib2Error',
debuglevel = 0
# Python 2.3 support
if 'sorted' not in __builtins__:
if sys.version_info < (2,4):
def sorted(seq):
seq.sort()
return seq
@ -60,7 +68,6 @@ if 'sorted' not in __builtins__:
def HTTPResponse__getheaders(self):
"""Return list of (header, value) tuples."""
if self.msg is None:
print "================================"
raise httplib.ResponseNotReady()
return self.msg.items()
@ -75,6 +82,8 @@ class RedirectLimit(HttpLib2Error): pass
class FailedToDecompressContent(HttpLib2Error): pass
class UnimplementedDigestAuthOptionError(HttpLib2Error): pass
class UnimplementedHmacDigestAuthOptionError(HttpLib2Error): pass
class RelativeURIError(HttpLib2Error): pass
class ServerNotFoundError(HttpLib2Error): pass
# Open Items:
# -----------
@ -118,6 +127,8 @@ def parse_uri(uri):
def urlnorm(uri):
(scheme, authority, path, query, fragment) = parse_uri(uri)
if not scheme or not authority:
raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
authority = authority.lower()
scheme = scheme.lower()
if not path:
@ -125,6 +136,7 @@ def urlnorm(uri):
# Could do syntax based normalization of the URI before
# computing the digest. See Section 6.2.2 of Std 66.
request_uri = query and "?".join([path, query]) or path
scheme = scheme.lower()
defrag_uri = scheme + "://" + authority + request_uri
return scheme, authority, request_uri, defrag_uri
@ -143,9 +155,10 @@ def safename(filename):
try:
if re_url_scheme.match(filename):
if isinstance(filename,str):
filename=filename.decode('utf-8').encode('idna')
filename = filename.decode('utf-8')
filename = filename.encode('idna')
else:
filename=filename.encode('idna')
filename = filename.encode('idna')
except:
pass
if isinstance(filename,unicode):
@ -260,16 +273,26 @@ def _entry_disposition(response_headers, request_headers):
now = time.time()
current_age = max(0, now - date)
if cc_response.has_key('max-age'):
try:
freshness_lifetime = int(cc_response['max-age'])
except:
freshness_lifetime = 0
elif response_headers.has_key('expires'):
expires = email.Utils.parsedate_tz(response_headers['expires'])
freshness_lifetime = max(0, calendar.timegm(expires) - date)
else:
freshness_lifetime = 0
if cc.has_key('max-age'):
freshness_lifetime = min(freshness_lifetime, int(cc['max-age']))
try:
freshness_lifetime = int(cc['max-age'])
except:
freshness_lifetime = 0
if cc.has_key('min-fresh'):
current_age += int(cc['min-fresh'])
try:
min_fresh = int(cc['min-fresh'])
except:
min_fresh = 0
current_age += min_fresh
if freshness_lifetime > current_age:
retval = "FRESH"
return retval
@ -418,13 +441,13 @@ class DigestAuthentication(Authentication):
def response(self, response, content):
if not response.has_key('authentication-info'):
challenge = _parse_www_authenticate(response, 'www-authenticate')['digest']
challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {})
if 'true' == challenge.get('stale'):
self.challenge['nonce'] = challenge['nonce']
self.challenge['nc'] = 1
return True
else:
updated_challenge = _parse_www_authenticate(response, 'authentication-info')['digest']
updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})
if updated_challenge.has_key('nextnonce'):
self.challenge['nonce'] = updated_challenge['nextnonce']
@ -440,7 +463,6 @@ class HmacDigestAuthentication(Authentication):
Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
challenge = _parse_www_authenticate(response, 'www-authenticate')
self.challenge = challenge['hmacdigest']
print self.challenge
# TODO: self.challenge['domain']
self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')
if self.challenge['reason'] not in ['unauthorized', 'integrity']:
@ -466,9 +488,6 @@ class HmacDigestAuthentication(Authentication):
self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
":", self.challenge['realm']
])
print response['www-authenticate']
print "".join([self.credentials[1], self.challenge['salt']])
print "key_str = %s" % self.key
self.key = self.pwhashmod.new(self.key).hexdigest().lower()
def request(self, method, request_uri, headers, content):
@ -479,8 +498,6 @@ class HmacDigestAuthentication(Authentication):
created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime())
cnonce = _cnonce()
request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val)
print "key = %s" % self.key
print "msg = %s" % request_digest
request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
headers['Authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (
self.credentials[0],
@ -641,6 +658,8 @@ class Http:
try:
conn.request(method, request_uri, body, headers)
response = conn.getresponse()
except gaierror:
raise ServerNotFoundError("Unable to find the server at %s" % request_uri)
except:
if i == 0:
conn.close()
@ -752,6 +771,8 @@ a string that contains the response entity body.
if not headers.has_key('user-agent'):
headers['user-agent'] = "Python-httplib2/%s" % __version__
uri = iri2uri(uri)
(scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
if not self.connections.has_key(scheme+":"+authority):
@ -780,7 +801,7 @@ a string that contains the response entity body.
else:
cachekey = None
if method in ["PUT"] and self.cache and info.has_key('etag') and not self.ignore_etag:
if method in ["PUT"] and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
# http://www.w3.org/1999/04/Editing/
headers['if-match'] = info['etag']
@ -815,9 +836,9 @@ a string that contains the response entity body.
return (response, content)
if entry_disposition == "STALE":
if info.has_key('etag') and not self.ignore_etag:
if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
headers['if-none-match'] = info['etag']
if info.has_key('last-modified'):
if info.has_key('last-modified') and not 'last-modified' in headers:
headers['if-modified-since'] = info['last-modified']
elif entry_disposition == "TRANSPARENT":
pass

110
planet/httplib2/iri2uri.py Normal file
View File

@ -0,0 +1,110 @@
"""
iri2uri
Converts an IRI to a URI.
"""
__author__ = "Joe Gregorio (joe@bitworking.org)"
__copyright__ = "Copyright 2006, Joe Gregorio"
__contributors__ = []
__version__ = "1.0.0"
__license__ = "MIT"
__history__ = """
"""
import urlparse
# Convert an IRI to a URI following the rules in RFC 3987
#
# The characters we need to enocde and escape are defined in the spec:
#
# iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD
# ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
# / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
# / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
# / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
# / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
# / %xD0000-DFFFD / %xE1000-EFFFD
escape_range = [
(0xA0, 0xD7FF ),
(0xE000, 0xF8FF ),
(0xF900, 0xFDCF ),
(0xFDF0, 0xFFEF),
(0x10000, 0x1FFFD ),
(0x20000, 0x2FFFD ),
(0x30000, 0x3FFFD),
(0x40000, 0x4FFFD ),
(0x50000, 0x5FFFD ),
(0x60000, 0x6FFFD),
(0x70000, 0x7FFFD ),
(0x80000, 0x8FFFD ),
(0x90000, 0x9FFFD),
(0xA0000, 0xAFFFD ),
(0xB0000, 0xBFFFD ),
(0xC0000, 0xCFFFD),
(0xD0000, 0xDFFFD ),
(0xE1000, 0xEFFFD),
(0xF0000, 0xFFFFD ),
(0x100000, 0x10FFFD)
]
def encode(c):
retval = c
i = ord(c)
for low, high in escape_range:
if i < low:
break
if i >= low and i <= high:
retval = "".join(["%%%2X" % ord(o) for o in c.encode('utf-8')])
break
return retval
def iri2uri(uri):
"""Convert an IRI to a URI. Note that IRIs must be
passed in a unicode strings. That is, do not utf-8 encode
the IRI before passing it into the function."""
if isinstance(uri ,unicode):
(scheme, authority, path, query, fragment) = urlparse.urlsplit(uri)
authority = authority.encode('idna')
# For each character in 'ucschar' or 'iprivate'
# 1. encode as utf-8
# 2. then %-encode each octet of that utf-8
uri = urlparse.urlunsplit((scheme, authority, path, query, fragment))
uri = "".join([encode(c) for c in uri])
return uri
if __name__ == "__main__":
import unittest
class Test(unittest.TestCase):
def test_uris(self):
"""Test that URIs are invariant under the transformation."""
invariant = [
u"ftp://ftp.is.co.za/rfc/rfc1808.txt",
u"http://www.ietf.org/rfc/rfc2396.txt",
u"ldap://[2001:db8::7]/c=GB?objectClass?one",
u"mailto:John.Doe@example.com",
u"news:comp.infosystems.www.servers.unix",
u"tel:+1-816-555-1212",
u"telnet://192.0.2.16:80/",
u"urn:oasis:names:specification:docbook:dtd:xml:4.1.2" ]
for uri in invariant:
self.assertEqual(uri, iri2uri(uri))
def test_iri(self):
""" Test that the right type of escaping is done for each part of the URI."""
self.assertEqual("http://xn--o3h.com/%E2%98%84", iri2uri(u"http://\N{COMET}.com/\N{COMET}"))
self.assertEqual("http://bitworking.org/?fred=%E2%98%84", iri2uri(u"http://bitworking.org/?fred=\N{COMET}"))
self.assertEqual("http://bitworking.org/#%E2%98%84", iri2uri(u"http://bitworking.org/#\N{COMET}"))
self.assertEqual("#%E2%98%84", iri2uri(u"#\N{COMET}"))
self.assertEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}"))
self.assertEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}")))
self.assertNotEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}".encode('utf-8')))
unittest.main()

View File

@ -254,7 +254,6 @@ def writeCache(feed_uri, feed_info, data):
def httpThread(thread_index, input_queue, output_queue, log):
import httplib2, md5
from socket import gaierror, error
from httplib import BadStatusLine
h = httplib2.Http(config.http_cache_directory())
@ -304,13 +303,12 @@ def httpThread(thread_index, input_queue, output_queue, log):
if resp.has_key('content-encoding'):
del resp['content-encoding']
setattr(feed, 'headers', resp)
except gaierror:
log.error("Fail to resolve server name %s via %d",
uri, thread_index)
except BadStatusLine:
log.error("Bad Status Line received for %s via %d",
uri, thread_index)
except error, e:
except httplib2.HttpLib2Error, e:
log.error("HttpLib2Error: %s via %d", str(e), thread_index)
except socket.error, e:
if e.__class__.__name__.lower()=='timeout':
feed.headers['status'] = '408'
log.warn("Timeout in thread-%d", thread_index)