1from __future__ import generators
2
3"""
4httplib2
5
6A caching http interface that supports ETags and gzip
7to conserve bandwidth.
8
9Requires Python 2.3 or later
10
11Changelog:
122007-08-18, Rick: Modified so it's able to use a socks proxy if needed.
13
14"""
15
16__author__ = "Joe Gregorio (joe@bitworking.org)"
17__copyright__ = "Copyright 2006, Joe Gregorio"
18__contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
19                    "James Antill",
20                    "Xavier Verges Farrero",
21                    "Jonathan Feinberg",
22                    "Blair Zajac",
23                    "Sam Ruby",
24                    "Louis Nyffenegger"]
25__license__ = "MIT"
26__version__ = "$Rev: 259 $"
27
28import re
29import sys
30
31try:
32    import hashlib
33
34    md = hashlib.md5()
35except ImportError:
36    # for Python << 2.5
37    import md5
38
39    md = md5.new()
40import email
41import email.Utils
42import email.Message
43import StringIO
44import gzip
45import zlib
46import httplib
47import urlparse
48import base64
49import os
50import copy
51import calendar
52import time
53import random
54
55try:
56    import hashlib
57
58    md = hashlib.md5()
59except ImportError:
60    # for Python << 2.5
61    import md5
62
63    md = md5.new()
64import hmac
65from gettext import gettext as _
66import socket
67
68try:
69    import socks
70except ImportError:
71    socks = None
72
73if sys.version_info >= (2, 3):
74    from iri2uri import iri2uri
75else:
76    def iri2uri(uri):
77        return uri
78
79__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error',
80           'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
81           'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',
82           'debuglevel']
83
84
85# The httplib debug level, set to a non-zero value to get debug output
86debuglevel = 0
87
88# Python 2.3 support
89if sys.version_info < (2, 4):
90    def sorted(seq):
91        seq.sort()
92        return seq
93
94# Python 2.3 support
95def HTTPResponse__getheaders(self):
96    """Return list of (header, value) tuples."""
97    if self.msg is None:
98        raise httplib.ResponseNotReady()
99    return self.msg.items()
100
101if not hasattr(httplib.HTTPResponse, 'getheaders'):
102    httplib.HTTPResponse.getheaders = HTTPResponse__getheaders
103
104# All exceptions raised here derive from HttpLib2Error
105class HttpLib2Error(Exception): pass
106
107# Some exceptions can be caught and optionally
108# be turned back into responses.
109class HttpLib2ErrorWithResponse(HttpLib2Error):
110    def __init__(self, desc, response, content):
111        self.response = response
112        self.content = content
113        HttpLib2Error.__init__(self, desc)
114
115
116class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass
117
118
119class RedirectLimit(HttpLib2ErrorWithResponse): pass
120
121
122class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass
123
124
125class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
126
127
128class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
129
130
131class RelativeURIError(HttpLib2Error): pass
132
133
134class ServerNotFoundError(HttpLib2Error): pass
135
136# Open Items:
137# -----------
138# Proxy support
139
140# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
141
142# Pluggable cache storage (supports storing the cache in
143#   flat files by default. We need a plug-in architecture
144#   that can support Berkeley DB and Squid)
145
146# == Known Issues ==
147# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
148# Does not handle Cache-Control: max-stale
149# Does not use Age: headers when calculating cache freshness.
150
151
152# The number of redirections to follow before giving up.
153# Note that only GET redirects are automatically followed.
154# Will also honor 301 requests by saving that info and never
155# requesting that URI again.
156DEFAULT_MAX_REDIRECTS = 5
157
158# Which headers are hop-by-hop headers by default
159HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers',
160              'transfer-encoding', 'upgrade']
161
162def _get_end2end_headers(response):
163    hopbyhop = list(HOP_BY_HOP)
164    hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')])
165    return [header for header in response.keys() if header not in hopbyhop]
166
167URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
168
169def parse_uri(uri):
170    """Parses a URI using the regex given in Appendix B of RFC 3986.
171
172        (scheme, authority, path, query, fragment) = parse_uri(uri)
173    """
174    groups = URI.match(uri).groups()
175    return groups[1], groups[3], groups[4], groups[6], groups[8]
176
177
178def urlnorm(uri):
179    (scheme, authority, path, query, fragment) = parse_uri(uri)
180    if not scheme or not authority:
181        raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
182    authority = authority.lower()
183    scheme = scheme.lower()
184    if not path:
185        path = "/"
186        # Could do syntax based normalization of the URI before
187    # computing the digest. See Section 6.2.2 of Std 66.
188    request_uri = query and "?".join([path, query]) or path
189    scheme = scheme.lower()
190    defrag_uri = scheme + "://" + authority + request_uri
191    return scheme, authority, request_uri, defrag_uri
192
193
194# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
195re_url_scheme = re.compile(r'^\w+://')
196re_slash = re.compile(r'[?/:|]+')
197
198def safename(filename):
199    """Return a filename suitable for the cache.
200
201    Strips dangerous and common characters to create a filename we
202    can use to store the cache in.
203    """
204
205    try:
206        if re_url_scheme.match(filename):
207            if isinstance(filename, str):
208                filename = filename.decode('utf-8')
209                filename = filename.encode('idna')
210            else:
211                filename = filename.encode('idna')
212    except UnicodeError:
213        pass
214    if isinstance(filename, unicode):
215        filename = filename.encode('utf-8')
216    filemd5 = md5.new(filename).hexdigest()
217    filename = re_url_scheme.sub("", filename)
218    filename = re_slash.sub(",", filename)
219
220    # limit length of filename
221    if len(filename) > 200:
222        filename = filename[:200]
223    return ",".join((filename, filemd5))
224
225NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')
226
227def _normalize_headers(headers):
228    return dict([(key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip())  for (key, value) in headers.iteritems()])
229
230
231def _parse_cache_control(headers):
232    retval = {}
233    if headers.has_key('cache-control'):
234        parts = headers['cache-control'].split(',')
235        parts_with_args = [tuple([x.strip() for x in part.split("=")]) for part in parts if -1 != part.find("=")]
236        parts_wo_args = [(name.strip(), 1) for name in parts if -1 == name.find("=")]
237        retval = dict(parts_with_args + parts_wo_args)
238    return retval
239
240# Whether to use a strict mode to parse WWW-Authenticate headers
241# Might lead to bad results in case of ill-formed header value,
242# so disabled by default, falling back to relaxed parsing.
243# Set to true to turn on, usefull for testing servers.
244USE_WWW_AUTH_STRICT_PARSING = 0
245
246# In regex below:
247#    [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+             matches a "token" as defined by HTTP
248#    "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?"    matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space
249# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both:
250#    \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?
251WWW_AUTH_STRICT = re.compile(
252    r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$")
253WWW_AUTH_RELAXED = re.compile(
254    r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$")
255UNQUOTE_PAIRS = re.compile(r'\\(.)')
256
257def _parse_www_authenticate(headers, headername='www-authenticate'):
258    """Returns a dictionary of dictionaries, one dict
259    per auth_scheme."""
260    retval = {}
261    if headers.has_key(headername):
262        authenticate = headers[headername].strip()
263        www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED
264        while authenticate:
265            # Break off the scheme at the beginning of the line
266            if headername == 'authentication-info':
267                (auth_scheme, the_rest) = ('digest', authenticate)
268            else:
269                (auth_scheme, the_rest) = authenticate.split(" ", 1)
270                # Now loop over all the key value pairs that come after the scheme,
271            # being careful not to roll into the next scheme
272            match = www_auth.search(the_rest)
273            auth_params = {}
274            while match:
275                if match and len(match.groups()) == 3:
276                    (key, value, the_rest) = match.groups()
277                    auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1',
278                                                                 value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')])
279                match = www_auth.search(the_rest)
280            retval[auth_scheme.lower()] = auth_params
281            authenticate = the_rest.strip()
282    return retval
283
284
285def _entry_disposition(response_headers, request_headers):
286    """Determine freshness from the Date, Expires and Cache-Control headers.
287
288    We don't handle the following:
289
290    1. Cache-Control: max-stale
291    2. Age: headers are not used in the calculations.
292
293    Not that this algorithm is simpler than you might think
294    because we are operating as a private (non-shared) cache.
295    This lets us ignore 's-maxage'. We can also ignore
296    'proxy-invalidate' since we aren't a proxy.
297    We will never return a stale document as
298    fresh as a design decision, and thus the non-implementation
299    of 'max-stale'. This also lets us safely ignore 'must-revalidate'
300    since we operate as if every server has sent 'must-revalidate'.
301    Since we are private we get to ignore both 'public' and
302    'private' parameters. We also ignore 'no-transform' since
303    we don't do any transformations.
304    The 'no-store' parameter is handled at a higher level.
305    So the only Cache-Control parameters we look at are:
306
307    no-cache
308    only-if-cached
309    max-age
310    min-fresh
311    """
312
313    retval = "STALE"
314    cc = _parse_cache_control(request_headers)
315    cc_response = _parse_cache_control(response_headers)
316
317    if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1:
318        retval = "TRANSPARENT"
319        if 'cache-control' not in request_headers:
320            request_headers['cache-control'] = 'no-cache'
321    elif cc.has_key('no-cache'):
322        retval = "TRANSPARENT"
323    elif cc_response.has_key('no-cache'):
324        retval = "STALE"
325    elif cc.has_key('only-if-cached'):
326        retval = "FRESH"
327    elif response_headers.has_key('date'):
328        date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date']))
329        now = time.time()
330        current_age = max(0, now - date)
331        if cc_response.has_key('max-age'):
332            try:
333                freshness_lifetime = int(cc_response['max-age'])
334            except ValueError:
335                freshness_lifetime = 0
336        elif response_headers.has_key('expires'):
337            expires = email.Utils.parsedate_tz(response_headers['expires'])
338            if None == expires:
339                freshness_lifetime = 0
340            else:
341                freshness_lifetime = max(0, calendar.timegm(expires) - date)
342        else:
343            freshness_lifetime = 0
344        if cc.has_key('max-age'):
345            try:
346                freshness_lifetime = int(cc['max-age'])
347            except ValueError:
348                freshness_lifetime = 0
349        if cc.has_key('min-fresh'):
350            try:
351                min_fresh = int(cc['min-fresh'])
352            except ValueError:
353                min_fresh = 0
354            current_age += min_fresh
355        if freshness_lifetime > current_age:
356            retval = "FRESH"
357    return retval
358
359
360def _decompressContent(response, new_content):
361    content = new_content
362    try:
363        encoding = response.get('content-encoding', None)
364        if encoding in ['gzip', 'deflate']:
365            if encoding == 'gzip':
366                content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read()
367            if encoding == 'deflate':
368                content = zlib.decompress(content)
369            response['content-length'] = str(len(content))
370            del response['content-encoding']
371    except IOError:
372        content = ""
373        raise FailedToDecompressContent(
374            _("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding')
375            , response, content)
376    return content
377
378
379def _updateCache(request_headers, response_headers, content, cache, cachekey):
380    if cachekey:
381        cc = _parse_cache_control(request_headers)
382        cc_response = _parse_cache_control(response_headers)
383        if cc.has_key('no-store') or cc_response.has_key('no-store'):
384            cache.delete(cachekey)
385        else:
386            info = email.Message.Message()
387            for key, value in response_headers.iteritems():
388                if key not in ['status', 'content-encoding', 'transfer-encoding']:
389                    info[key] = value
390
391            status = response_headers.status
392            if status == 304:
393                status = 200
394
395            status_header = 'status: %d\r\n' % response_headers.status
396
397            header_str = info.as_string()
398
399            header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
400            text = "".join([status_header, header_str, content])
401
402            cache.set(cachekey, text)
403
404
405def _cnonce():
406    dig = md5.new("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
407    return dig[:16]
408
409
410def _wsse_username_token(cnonce, iso_now, password):
411    return base64.encodestring(sha.new("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
412
413
414# For credentials we need two things, first
415# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
416# Then we also need a list of URIs that have already demanded authentication
417# That list is tricky since sub-URIs can take the same auth, or the
418# auth scheme may change as you descend the tree.
419# So we also need each Auth instance to be able to tell us
420# how close to the 'top' it is.
421
422class Authentication(object):
423    def __init__(self, credentials, host, request_uri, headers, response, content, http):
424        (scheme, authority, path, query, fragment) = parse_uri(request_uri)
425        self.path = path
426        self.host = host
427        self.credentials = credentials
428        self.http = http
429        self.response = response
430        self.headers = headers
431        self.content = content
432
433    def depth(self, request_uri):
434        (scheme, authority, path, query, fragment) = parse_uri(request_uri)
435        return request_uri[len(self.path):].count("/")
436
437    def inscope(self, host, request_uri):
438        # XXX Should we normalize the request_uri?
439        (scheme, authority, path, query, fragment) = parse_uri(request_uri)
440        return (host == self.host) and path.startswith(self.path)
441
442    def request(self, method, request_uri, headers, content):
443        """Modify the request headers to add the appropriate
444        Authorization header. Over-rise this in sub-classes."""
445        pass
446
447    def response(self, response, content):
448        """Gives us a chance to update with new nonces
449        or such returned from the last authorized response.
450        Over-rise this in sub-classes if necessary.
451
452        Return TRUE is the request is to be retried, for
453        example Digest may return stale=true.
454        """
455        return False
456
457
458class BasicAuthentication(Authentication):
459    def __init__(self, credentials, host, request_uri, headers, response, content, http):
460        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
461
462    def request(self, method, request_uri, headers, content):
463        """Modify the request headers to add the appropriate
464        Authorization header."""
465        headers['authorization'] = 'Basic ' + base64.encodestring("%s:%s" % self.credentials).strip()
466
467
468class DigestAuthentication(Authentication):
469    """Only do qop='auth' and MD5, since that
470    is all Apache currently implements"""
471
472    def __init__(self, credentials, host, request_uri, headers, response, content, http):
473        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
474        challenge = _parse_www_authenticate(response)
475        self.challenge = challenge['digest']
476        qop = self.challenge.get('qop')
477        self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
478        if self.challenge['qop'] is None:
479            raise UnimplementedDigestAuthOptionError(_("Unsupported value for qop: %s." % qop))
480        self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5')
481        if self.challenge['algorithm'] != 'MD5':
482            raise UnimplementedDigestAuthOptionError(
483                _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
484        self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])
485        self.challenge['nc'] = 1
486
487    def request(self, method, request_uri, headers, content, cnonce=None):
488        """Modify the request headers"""
489        H = lambda x: md5.new(x).hexdigest()
490        KD = lambda s, d: H("%s:%s" % (s, d))
491        A2 = "".join([method, ":", request_uri])
492        self.challenge['cnonce'] = cnonce or _cnonce()
493        request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'],
494                                                                     '%08x' % self.challenge['nc'],
495                                                                     self.challenge['cnonce'],
496                                                                     self.challenge['qop'], H(A2)
497        ))
498        headers[
499        'Authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % (
500        self.credentials[0],
501        self.challenge['realm'],
502        self.challenge['nonce'],
503        request_uri,
504        self.challenge['algorithm'],
505        request_digest,
506        self.challenge['qop'],
507        self.challenge['nc'],
508        self.challenge['cnonce'],
509        )
510        self.challenge['nc'] += 1
511
512    def response(self, response, content):
513        if not response.has_key('authentication-info'):
514            challenge = _parse_www_authenticate(response).get('digest', {})
515            if 'true' == challenge.get('stale'):
516                self.challenge['nonce'] = challenge['nonce']
517                self.challenge['nc'] = 1
518                return True
519        else:
520            updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})
521
522            if updated_challenge.has_key('nextnonce'):
523                self.challenge['nonce'] = updated_challenge['nextnonce']
524                self.challenge['nc'] = 1
525        return False
526
527
528class HmacDigestAuthentication(Authentication):
529    """Adapted from Robert Sayre's code and DigestAuthentication above."""
530    __author__ = "Thomas Broyer (t.broyer@ltgt.net)"
531
532    def __init__(self, credentials, host, request_uri, headers, response, content, http):
533        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
534        challenge = _parse_www_authenticate(response)
535        self.challenge = challenge['hmacdigest']
536        # TODO: self.challenge['domain']
537        self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')
538        if self.challenge['reason'] not in ['unauthorized', 'integrity']:
539            self.challenge['reason'] = 'unauthorized'
540        self.challenge['salt'] = self.challenge.get('salt', '')
541        if not self.challenge.get('snonce'):
542            raise UnimplementedHmacDigestAuthOptionError(
543                _("The challenge doesn't contain a server nonce, or this one is empty."))
544        self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1')
545        if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']:
546            raise UnimplementedHmacDigestAuthOptionError(
547                _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
548        self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1')
549        if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:
550            raise UnimplementedHmacDigestAuthOptionError(
551                _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))
552        if self.challenge['algorithm'] == 'HMAC-MD5':
553            self.hashmod = md5
554        else:
555            self.hashmod = sha
556        if self.challenge['pw-algorithm'] == 'MD5':
557            self.pwhashmod = md5
558        else:
559            self.pwhashmod = sha
560        self.key = "".join([self.credentials[0], ":",
561                            self.pwhashmod.new(
562                                "".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
563                            ":", self.challenge['realm']
564        ])
565        self.key = self.pwhashmod.new(self.key).hexdigest().lower()
566
567    def request(self, method, request_uri, headers, content):
568        """Modify the request headers"""
569        keys = _get_end2end_headers(headers)
570        keylist = "".join(["%s " % k for k in keys])
571        headers_val = "".join([headers[k] for k in keys])
572        created = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
573        cnonce = _cnonce()
574        request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val)
575        request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
576        headers[
577        'Authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (
578        self.credentials[0],
579        self.challenge['realm'],
580        self.challenge['snonce'],
581        cnonce,
582        request_uri,
583        created,
584        request_digest,
585        keylist,
586        )
587
588    def response(self, response, content):
589        challenge = _parse_www_authenticate(response).get('hmacdigest', {})
590        if challenge.get('reason') in ['integrity', 'stale']:
591            return True
592        return False
593
594
595class WsseAuthentication(Authentication):
596    """This is thinly tested and should not be relied upon.
597    At this time there isn't any third party server to test against.
598    Blogger and TypePad implemented this algorithm at one point
599    but Blogger has since switched to Basic over HTTPS and
600    TypePad has implemented it wrong, by never issuing a 401
601    challenge but instead requiring your client to telepathically know that
602    their endpoint is expecting WSSE profile="UsernameToken"."""
603
604    def __init__(self, credentials, host, request_uri, headers, response, content, http):
605        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
606
607    def request(self, method, request_uri, headers, content):
608        """Modify the request headers to add the appropriate
609        Authorization header."""
610        headers['Authorization'] = 'WSSE profile="UsernameToken"'
611        iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
612        cnonce = _cnonce()
613        password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
614        headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % (
615        self.credentials[0],
616        password_digest,
617        cnonce,
618        iso_now)
619
620
621class GoogleLoginAuthentication(Authentication):
622    def __init__(self, credentials, host, request_uri, headers, response, content, http):
623        from urllib import urlencode
624
625        Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
626        challenge = _parse_www_authenticate(response)
627        service = challenge['googlelogin'].get('service', 'xapi')
628        # Bloggger actually returns the service in the challenge
629        # For the rest we guess based on the URI
630        if service == 'xapi' and  request_uri.find("calendar") > 0:
631            service = "cl"
632            # No point in guessing Base or Spreadsheet
633        #elif request_uri.find("spreadsheets") > 0:
634        #    service = "wise"
635
636        auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])
637        resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST",
638                                          body=urlencode(auth),
639                                          headers={'Content-Type': 'application/x-www-form-urlencoded'})
640        lines = content.split('\n')
641        d = dict([tuple(line.split("=", 1)) for line in lines if line])
642        if resp.status == 403:
643            self.Auth = ""
644        else:
645            self.Auth = d['Auth']
646
647    def request(self, method, request_uri, headers, content):
648        """Modify the request headers to add the appropriate
649        Authorization header."""
650        headers['authorization'] = 'GoogleLogin Auth=' + self.Auth
651
652
653AUTH_SCHEME_CLASSES = {
654    "basic": BasicAuthentication,
655    "wsse": WsseAuthentication,
656    "digest": DigestAuthentication,
657    "hmacdigest": HmacDigestAuthentication,
658    "googlelogin": GoogleLoginAuthentication
659}
660
661AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
662
663def _md5(s):
664    return
665
666
667class FileCache(object):
668    """Uses a local directory as a store for cached files.
669    Not really safe to use if multiple threads or processes are going to
670    be running on the same cache.
671    """
672
673    def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
674        self.cache = cache
675        self.safe = safe
676        if not os.path.exists(cache):
677            os.makedirs(self.cache)
678
679    def get(self, key):
680        retval = None
681        cacheFullPath = os.path.join(self.cache, self.safe(key))
682        try:
683            f = file(cacheFullPath, "r")
684            retval = f.read()
685            f.close()
686        except IOError:
687            pass
688        return retval
689
690    def set(self, key, value):
691        cacheFullPath = os.path.join(self.cache, self.safe(key))
692        f = file(cacheFullPath, "w")
693        f.write(value)
694        f.close()
695
696    def delete(self, key):
697        cacheFullPath = os.path.join(self.cache, self.safe(key))
698        if os.path.exists(cacheFullPath):
699            os.remove(cacheFullPath)
700
701
702class Credentials(object):
703    def __init__(self):
704        self.credentials = []
705
706    def add(self, name, password, domain=""):
707        self.credentials.append((domain.lower(), name, password))
708
709    def clear(self):
710        self.credentials = []
711
712    def iter(self, domain):
713        for (cdomain, name, password) in self.credentials:
714            if cdomain == "" or domain == cdomain:
715                yield (name, password)
716
717
718class KeyCerts(Credentials):
719    """Identical to Credentials except that
720    name/password are mapped to key/cert."""
721    pass
722
723
724class ProxyInfo(object):
725    """Collect information required to use a proxy."""
726
727    def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None):
728        """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX
729        constants. For example:
730
731  p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000)
732        """
733        self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass
734
735    def astuple(self):
736        return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns,
737                self.proxy_user, self.proxy_pass)
738
739    def isgood(self):
740        return socks and (self.proxy_host != None) and (self.proxy_port != None)
741
742
743class HTTPConnectionWithTimeout(httplib.HTTPConnection):
744    """HTTPConnection subclass that supports timeouts"""
745
746    def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None):
747        httplib.HTTPConnection.__init__(self, host, port, strict)
748        self.timeout = timeout
749        self.proxy_info = proxy_info
750
751    def connect(self):
752        """Connect to the host and port specified in __init__."""
753        # Mostly verbatim from httplib.py.
754        msg = "getaddrinfo returns an empty list"
755        for res in socket.getaddrinfo(self.host, self.port, 0,
756                                      socket.SOCK_STREAM):
757            af, socktype, proto, canonname, sa = res
758            try:
759                if self.proxy_info and self.proxy_info.isgood():
760                    self.sock = socks.socksocket(af, socktype, proto)
761                    self.sock.setproxy(*self.proxy_info.astuple())
762                else:
763                    self.sock = socket.socket(af, socktype, proto)
764                    # Different from httplib: support timeouts.
765                if self.timeout is not None:
766                    self.sock.settimeout(self.timeout)
767                    # End of difference from httplib.
768                if self.debuglevel > 0:
769                    print "connect: (%s, %s)" % (self.host, self.port)
770                self.sock.connect(sa)
771            except socket.error, msg:
772                if self.debuglevel > 0:
773                    print 'connect fail:', (self.host, self.port)
774                if self.sock:
775                    self.sock.close()
776                self.sock = None
777                continue
778            break
779        if not self.sock:
780            raise socket.error, msg
781
782
783class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):
784    "This class allows communication via SSL."
785
786    def __init__(self, host, port=None, key_file=None, cert_file=None,
787                 strict=None, timeout=None, proxy_info=None):
788        self.timeout = timeout
789        self.proxy_info = proxy_info
790        httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file,
791                                         cert_file=cert_file, strict=strict)
792
793    def connect(self):
794        "Connect to a host on a given (SSL) port."
795
796        if self.proxy_info and self.proxy_info.isgood():
797            self.sock.setproxy(*self.proxy_info.astuple())
798            sock.setproxy(*self.proxy_info.astuple())
799        else:
800            sock = socket.socket()
801        if self.timeout is not None:
802            sock.settimeout(self.timeout)
803        sock.connect((self.host, self.port))
804        ssl = socket.ssl(sock, self.key_file, self.cert_file)
805        self.sock = httplib.FakeSocket(sock, ssl)
806
807
808class Http(object):
809    """An HTTP client that handles:
810- all methods
811- caching
812- ETags
813- compression,
814- HTTPS
815- Basic
816- Digest
817- WSSE
818
819and more.
820    """
821
822    def __init__(self, cache=None, timeout=None, proxy_info=None):
823        """The value of proxy_info is a ProxyInfo instance.
824
825If 'cache' is a string then it is used as a directory name
826for a disk cache. Otherwise it must be an object that supports
827the same interface as FileCache."""
828        self.proxy_info = proxy_info
829        # Map domain name to an httplib connection
830        self.connections = {}
831        # The location of the cache, for now a directory
832        # where cached responses are held.
833        if cache and isinstance(cache, str):
834            self.cache = FileCache(cache)
835        else:
836            self.cache = cache
837
838        # Name/password
839        self.credentials = Credentials()
840
841        # Key/cert
842        self.certificates = KeyCerts()
843
844        # authorization objects
845        self.authorizations = []
846
847        # If set to False then no redirects are followed, even safe ones.
848        self.follow_redirects = True
849
850        # If 'follow_redirects' is True, and this is set to True then
851        # all redirecs are followed, including unsafe ones.
852        self.follow_all_redirects = False
853
854        self.ignore_etag = False
855
856        self.force_exception_to_status_code = False
857
858        self.timeout = timeout
859
860    def _auth_from_challenge(self, host, request_uri, headers, response, content):
861        """A generator that creates Authorization objects
862           that can be applied to requests.
863        """
864        challenges = _parse_www_authenticate(response)
865        for cred in self.credentials.iter(host):
866            for scheme in AUTH_SCHEME_ORDER:
867                if challenges.has_key(scheme):
868                    yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
869
870    def add_credentials(self, name, password, domain=""):
871        """Add a name and password that will be used
872        any time a request requires authentication."""
873        self.credentials.add(name, password, domain)
874
875    def add_certificate(self, key, cert, domain):
876        """Add a key and cert that will be used
877        any time a request requires authentication."""
878        self.certificates.add(key, cert, domain)
879
880    def clear_credentials(self):
881        """Remove all the names and passwords
882        that are used for authentication"""
883        self.credentials.clear()
884        self.authorizations = []
885
886    def _conn_request(self, conn, request_uri, method, body, headers):
887        for i in range(2):
888            try:
889                conn.request(method, request_uri, body, headers)
890                response = conn.getresponse()
891            except socket.gaierror:
892                conn.close()
893                raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
894            except httplib.HTTPException, e:
895                if not i:
896                    conn.close()
897                    conn.connect()
898                    continue
899                else:
900                    raise
901            else:
902                content = response.read()
903                response = Response(response)
904                if method != "HEAD":
905                    content = _decompressContent(response, content)
906
907            break
908        return response, content
909
910
911    def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey):
912        """Do the actual request using the connection object
913        and also follow one level of redirects if necessary"""
914
915        auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
916        auth = auths and sorted(auths)[0][1] or None
917        if auth:
918            auth.request(method, request_uri, headers, body)
919
920        (response, content) = self._conn_request(conn, request_uri, method, body, headers)
921
922        if auth:
923            if auth.response(response, body):
924                auth.request(method, request_uri, headers, body)
925                (response, content) = self._conn_request(conn, request_uri, method, body, headers)
926                response._stale_digest = 1
927
928        if response.status == 401:
929            for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
930                authorization.request(method, request_uri, headers, body)
931                (response, content) = self._conn_request(conn, request_uri, method, body, headers, )
932                if response.status != 401:
933                    self.authorizations.append(authorization)
934                    authorization.response(response, body)
935                    break
936
937        if self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303:
938            if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
939                # Pick out the location header and basically start from the beginning
940                # remembering first to strip the ETag header and decrement our 'depth'
941                if redirections:
942                    if not response.has_key('location') and response.status != 300:
943                        raise RedirectMissingLocation(_("Redirected but the response is missing a Location: header."),
944                                                      response, content)
945                        # Fix-up relative redirects (which violate an RFC 2616 MUST)
946                    if response.has_key('location'):
947                        location = response['location']
948                        (scheme, authority, path, query, fragment) = parse_uri(location)
949                        if authority == None:
950                            response['location'] = urlparse.urljoin(absolute_uri, location)
951                    if response.status == 301 and method in ["GET", "HEAD"]:
952                        response['-x-permanent-redirect-url'] = response['location']
953                        if not response.has_key('content-location'):
954                            response['content-location'] = absolute_uri
955                        _updateCache(headers, response, content, self.cache, cachekey)
956                    if headers.has_key('if-none-match'):
957                        del headers['if-none-match']
958                    if headers.has_key('if-modified-since'):
959                        del headers['if-modified-since']
960                    if response.has_key('location'):
961                        location = response['location']
962                        old_response = copy.deepcopy(response)
963                        if not old_response.has_key('content-location'):
964                            old_response['content-location'] = absolute_uri
965                        redirect_method = ((response.status == 303) and (
966                        method not in ["GET", "HEAD"])) and "GET" or method
967                        (response, content) = self.request(location, redirect_method, body=body, headers=headers,
968                                                           redirections=redirections - 1)
969                        response.previous = old_response
970                else:
971                    raise RedirectLimit(_("Redirected more times than rediection_limit allows."), response, content)
972            elif response.status in [200, 203] and method == "GET":
973                # Don't cache 206's since we aren't going to handle byte range requests
974                if not response.has_key('content-location'):
975                    response['content-location'] = absolute_uri
976                _updateCache(headers, response, content, self.cache, cachekey)
977
978        return response, content
979
980
981    # Need to catch and rebrand some exceptions
982    # Then need to optionally turn all exceptions into status codes
983    # including all socket.* and httplib.* exceptions.
984
985
986    def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS,
987                connection_type=None):
988        """ Performs a single HTTP request.
989The 'uri' is the URI of the HTTP resource and can begin
990with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
991
992The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.
993There is no restriction on the methods allowed.
994
995The 'body' is the entity body to be sent with the request. It is a string
996object.
997
998Any extra headers that are to be sent with the request should be provided in the
999'headers' dictionary.
1000
1001The maximum number of redirect to follow before raising an
1002exception is 'redirections. The default is 5.
1003
1004The return value is a tuple of (response, content), the first
1005being and instance of the 'Response' class, the second being
1006a string that contains the response entity body.
1007        """
1008        try:
1009            if headers is None:
1010                headers = {}
1011            else:
1012                headers = _normalize_headers(headers)
1013
1014            if not headers.has_key('user-agent'):
1015                headers['user-agent'] = "Python-httplib2/%s" % __version__
1016
1017            uri = iri2uri(uri)
1018
1019            (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
1020
1021            conn_key = scheme + ":" + authority
1022            if conn_key in self.connections:
1023                conn = self.connections[conn_key]
1024            else:
1025                if not connection_type:
1026                    connection_type = (scheme == 'https') and HTTPSConnectionWithTimeout or HTTPConnectionWithTimeout
1027                certs = list(self.certificates.iter(authority))
1028                if scheme == 'https' and certs:
1029                    conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0],
1030                                                                        cert_file=certs[0][1], timeout=self.timeout,
1031                                                                        proxy_info=self.proxy_info)
1032                else:
1033                    conn = self.connections[conn_key] = connection_type(authority, timeout=self.timeout,
1034                                                                        proxy_info=self.proxy_info)
1035                conn.set_debuglevel(debuglevel)
1036
1037            if method in ["GET", "HEAD"] and 'range' not in headers:
1038                headers['accept-encoding'] = 'compress, gzip'
1039
1040            info = email.Message.Message()
1041            cached_value = None
1042            if self.cache:
1043                cachekey = defrag_uri
1044                cached_value = self.cache.get(cachekey)
1045                if cached_value:
1046                    info = email.message_from_string(cached_value)
1047                    try:
1048                        content = cached_value.split('\r\n\r\n', 1)[1]
1049                    except IndexError:
1050                        self.cache.delete(cachekey)
1051                        cachekey = None
1052                        cached_value = None
1053            else:
1054                cachekey = None
1055
1056            if method in ["PUT"] and self.cache and info.has_key(
1057                'etag') and not self.ignore_etag and 'if-match' not in headers:
1058                # http://www.w3.org/1999/04/Editing/
1059                headers['if-match'] = info['etag']
1060
1061            if method not in ["GET", "HEAD"] and self.cache and cachekey:
1062                # RFC 2616 Section 13.10
1063                self.cache.delete(cachekey)
1064
1065            if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
1066                if info.has_key('-x-permanent-redirect-url'):
1067                    # Should cached permanent redirects be counted in our redirection count? For now, yes.
1068                    (response, new_content) = self.request(info['-x-permanent-redirect-url'], headers=headers,
1069                                                           redirections=redirections - 1)
1070                    response.previous = Response(info)
1071                    response.previous.fromcache = True
1072                else:
1073                    # Determine our course of action:
1074                    #   Is the cached entry fresh or stale?
1075                    #   Has the client requested a non-cached response?
1076                    #
1077                    # There seems to be three possible answers:
1078                    # 1. [FRESH] Return the cache entry w/o doing a GET
1079                    # 2. [STALE] Do the GET (but add in cache validators if available)
1080                    # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
1081                    entry_disposition = _entry_disposition(info, headers)
1082
1083                    if entry_disposition == "FRESH":
1084                        if not cached_value:
1085                            info['status'] = '504'
1086                            content = ""
1087                        response = Response(info)
1088                        if cached_value:
1089                            response.fromcache = True
1090                        return response, content
1091
1092                    if entry_disposition == "STALE":
1093                        if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
1094                            headers['if-none-match'] = info['etag']
1095                        if info.has_key('last-modified') and not 'last-modified' in headers:
1096                            headers['if-modified-since'] = info['last-modified']
1097                    elif entry_disposition == "TRANSPARENT":
1098                        pass
1099
1100                    (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers,
1101                                                            redirections, cachekey)
1102
1103                if response.status == 304 and method == "GET":
1104                    # Rewrite the cache entry with the new end-to-end headers
1105                    # Take all headers that are in response
1106                    # and overwrite their values in info.
1107                    # unless they are hop-by-hop, or are listed in the connection header.
1108
1109                    for key in _get_end2end_headers(response):
1110                        info[key] = response[key]
1111                    merged_response = Response(info)
1112                    if hasattr(response, "_stale_digest"):
1113                        merged_response._stale_digest = response._stale_digest
1114                    _updateCache(headers, merged_response, content, self.cache, cachekey)
1115                    response = merged_response
1116                    response.status = 200
1117                    response.fromcache = True
1118
1119                elif response.status == 200:
1120                    content = new_content
1121                else:
1122                    self.cache.delete(cachekey)
1123                    content = new_content
1124            else:
1125                (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers,
1126                                                    redirections, cachekey)
1127        except Exception, e:
1128            if self.force_exception_to_status_code:
1129                if isinstance(e, HttpLib2ErrorWithResponse):
1130                    response = e.response
1131                    content = e.content
1132                    response.status = 500
1133                    response.reason = str(e)
1134                elif isinstance(e, socket.timeout):
1135                    content = "Request Timeout"
1136                    response = Response({
1137                        "content-type": "text/plain",
1138                        "status": "408",
1139                        "content-length": len(content)
1140                    })
1141                    response.reason = "Request Timeout"
1142                else:
1143                    content = str(e)
1144                    response = Response({
1145                        "content-type": "text/plain",
1146                        "status": "400",
1147                        "content-length": len(content)
1148                    })
1149                    response.reason = "Bad Request"
1150            else:
1151                raise
1152
1153        return response, content
1154
1155
1156class Response(dict):
1157    """An object more like email.Message than httplib.HTTPResponse."""
1158
1159    """Is this response from our local cache"""
1160    fromcache = False
1161
1162    """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """
1163    version = 11
1164
1165    "Status code returned by server. "
1166    status = 200
1167
1168    """Reason phrase returned by server."""
1169    reason = "Ok"
1170
1171    previous = None
1172
1173    def __init__(self, info):
1174        # info is either an email.Message or
1175        # an httplib.HTTPResponse object.
1176        if isinstance(info, httplib.HTTPResponse):
1177            for key, value in info.getheaders():
1178                self[key] = value
1179            self.status = info.status
1180            self['status'] = str(self.status)
1181            self.reason = info.reason
1182            self.version = info.version
1183        elif isinstance(info, email.Message.Message):
1184            for key, value in info.items():
1185                self[key] = value
1186            self.status = int(self['status'])
1187        else:
1188            for key, value in info.iteritems():
1189                self[key] = value
1190            self.status = int(self.get('status', self.status))
1191
1192
1193    def __getattr__(self, name):
1194        if name == 'dict':
1195            return self
1196        else:
1197            raise AttributeError, name
1198