1from __future__ import generators 2 3""" 4httplib2 5 6A caching http interface that supports ETags and gzip 7to conserve bandwidth. 8 9Requires Python 2.3 or later 10 11Changelog: 122007-08-18, Rick: Modified so it's able to use a socks proxy if needed. 13 14""" 15 16__author__ = "Joe Gregorio (joe@bitworking.org)" 17__copyright__ = "Copyright 2006, Joe Gregorio" 18__contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)", 19 "James Antill", 20 "Xavier Verges Farrero", 21 "Jonathan Feinberg", 22 "Blair Zajac", 23 "Sam Ruby", 24 "Louis Nyffenegger"] 25__license__ = "MIT" 26__version__ = "$Rev: 259 $" 27 28import re 29import sys 30 31try: 32 import hashlib 33 34 md = hashlib.md5() 35except ImportError: 36 # for Python << 2.5 37 import md5 38 39 md = md5.new() 40import email 41import email.Utils 42import email.Message 43import StringIO 44import gzip 45import zlib 46import httplib 47import urlparse 48import base64 49import os 50import copy 51import calendar 52import time 53import random 54 55try: 56 import hashlib 57 58 md = hashlib.md5() 59except ImportError: 60 # for Python << 2.5 61 import md5 62 63 md = md5.new() 64import hmac 65from gettext import gettext as _ 66import socket 67 68try: 69 import socks 70except ImportError: 71 socks = None 72 73if sys.version_info >= (2, 3): 74 from iri2uri import iri2uri 75else: 76 def iri2uri(uri): 77 return uri 78 79__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error', 80 'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent', 81 'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError', 82 'debuglevel'] 83 84 85# The httplib debug level, set to a non-zero value to get debug output 86debuglevel = 0 87 88# Python 2.3 support 89if sys.version_info < (2, 4): 90 def sorted(seq): 91 seq.sort() 92 return seq 93 94# Python 2.3 support 95def HTTPResponse__getheaders(self): 96 """Return list of (header, value) tuples.""" 97 if self.msg is None: 98 raise httplib.ResponseNotReady() 99 return self.msg.items() 100 101if not hasattr(httplib.HTTPResponse, 'getheaders'): 102 httplib.HTTPResponse.getheaders = HTTPResponse__getheaders 103 104# All exceptions raised here derive from HttpLib2Error 105class HttpLib2Error(Exception): pass 106 107# Some exceptions can be caught and optionally 108# be turned back into responses. 109class HttpLib2ErrorWithResponse(HttpLib2Error): 110 def __init__(self, desc, response, content): 111 self.response = response 112 self.content = content 113 HttpLib2Error.__init__(self, desc) 114 115 116class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass 117 118 119class RedirectLimit(HttpLib2ErrorWithResponse): pass 120 121 122class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass 123 124 125class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass 126 127 128class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass 129 130 131class RelativeURIError(HttpLib2Error): pass 132 133 134class ServerNotFoundError(HttpLib2Error): pass 135 136# Open Items: 137# ----------- 138# Proxy support 139 140# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?) 141 142# Pluggable cache storage (supports storing the cache in 143# flat files by default. We need a plug-in architecture 144# that can support Berkeley DB and Squid) 145 146# == Known Issues == 147# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator. 148# Does not handle Cache-Control: max-stale 149# Does not use Age: headers when calculating cache freshness. 150 151 152# The number of redirections to follow before giving up. 153# Note that only GET redirects are automatically followed. 154# Will also honor 301 requests by saving that info and never 155# requesting that URI again. 156DEFAULT_MAX_REDIRECTS = 5 157 158# Which headers are hop-by-hop headers by default 159HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 160 'transfer-encoding', 'upgrade'] 161 162def _get_end2end_headers(response): 163 hopbyhop = list(HOP_BY_HOP) 164 hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')]) 165 return [header for header in response.keys() if header not in hopbyhop] 166 167URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") 168 169def parse_uri(uri): 170 """Parses a URI using the regex given in Appendix B of RFC 3986. 171 172 (scheme, authority, path, query, fragment) = parse_uri(uri) 173 """ 174 groups = URI.match(uri).groups() 175 return groups[1], groups[3], groups[4], groups[6], groups[8] 176 177 178def urlnorm(uri): 179 (scheme, authority, path, query, fragment) = parse_uri(uri) 180 if not scheme or not authority: 181 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri) 182 authority = authority.lower() 183 scheme = scheme.lower() 184 if not path: 185 path = "/" 186 # Could do syntax based normalization of the URI before 187 # computing the digest. See Section 6.2.2 of Std 66. 188 request_uri = query and "?".join([path, query]) or path 189 scheme = scheme.lower() 190 defrag_uri = scheme + "://" + authority + request_uri 191 return scheme, authority, request_uri, defrag_uri 192 193 194# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/) 195re_url_scheme = re.compile(r'^\w+://') 196re_slash = re.compile(r'[?/:|]+') 197 198def safename(filename): 199 """Return a filename suitable for the cache. 200 201 Strips dangerous and common characters to create a filename we 202 can use to store the cache in. 203 """ 204 205 try: 206 if re_url_scheme.match(filename): 207 if isinstance(filename, str): 208 filename = filename.decode('utf-8') 209 filename = filename.encode('idna') 210 else: 211 filename = filename.encode('idna') 212 except UnicodeError: 213 pass 214 if isinstance(filename, unicode): 215 filename = filename.encode('utf-8') 216 filemd5 = md5.new(filename).hexdigest() 217 filename = re_url_scheme.sub("", filename) 218 filename = re_slash.sub(",", filename) 219 220 # limit length of filename 221 if len(filename) > 200: 222 filename = filename[:200] 223 return ",".join((filename, filemd5)) 224 225NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+') 226 227def _normalize_headers(headers): 228 return dict([(key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (key, value) in headers.iteritems()]) 229 230 231def _parse_cache_control(headers): 232 retval = {} 233 if headers.has_key('cache-control'): 234 parts = headers['cache-control'].split(',') 235 parts_with_args = [tuple([x.strip() for x in part.split("=")]) for part in parts if -1 != part.find("=")] 236 parts_wo_args = [(name.strip(), 1) for name in parts if -1 == name.find("=")] 237 retval = dict(parts_with_args + parts_wo_args) 238 return retval 239 240# Whether to use a strict mode to parse WWW-Authenticate headers 241# Might lead to bad results in case of ill-formed header value, 242# so disabled by default, falling back to relaxed parsing. 243# Set to true to turn on, usefull for testing servers. 244USE_WWW_AUTH_STRICT_PARSING = 0 245 246# In regex below: 247# [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP 248# "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space 249# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both: 250# \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"? 251WWW_AUTH_STRICT = re.compile( 252 r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$") 253WWW_AUTH_RELAXED = re.compile( 254 r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$") 255UNQUOTE_PAIRS = re.compile(r'\\(.)') 256 257def _parse_www_authenticate(headers, headername='www-authenticate'): 258 """Returns a dictionary of dictionaries, one dict 259 per auth_scheme.""" 260 retval = {} 261 if headers.has_key(headername): 262 authenticate = headers[headername].strip() 263 www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED 264 while authenticate: 265 # Break off the scheme at the beginning of the line 266 if headername == 'authentication-info': 267 (auth_scheme, the_rest) = ('digest', authenticate) 268 else: 269 (auth_scheme, the_rest) = authenticate.split(" ", 1) 270 # Now loop over all the key value pairs that come after the scheme, 271 # being careful not to roll into the next scheme 272 match = www_auth.search(the_rest) 273 auth_params = {} 274 while match: 275 if match and len(match.groups()) == 3: 276 (key, value, the_rest) = match.groups() 277 auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', 278 value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')]) 279 match = www_auth.search(the_rest) 280 retval[auth_scheme.lower()] = auth_params 281 authenticate = the_rest.strip() 282 return retval 283 284 285def _entry_disposition(response_headers, request_headers): 286 """Determine freshness from the Date, Expires and Cache-Control headers. 287 288 We don't handle the following: 289 290 1. Cache-Control: max-stale 291 2. Age: headers are not used in the calculations. 292 293 Not that this algorithm is simpler than you might think 294 because we are operating as a private (non-shared) cache. 295 This lets us ignore 's-maxage'. We can also ignore 296 'proxy-invalidate' since we aren't a proxy. 297 We will never return a stale document as 298 fresh as a design decision, and thus the non-implementation 299 of 'max-stale'. This also lets us safely ignore 'must-revalidate' 300 since we operate as if every server has sent 'must-revalidate'. 301 Since we are private we get to ignore both 'public' and 302 'private' parameters. We also ignore 'no-transform' since 303 we don't do any transformations. 304 The 'no-store' parameter is handled at a higher level. 305 So the only Cache-Control parameters we look at are: 306 307 no-cache 308 only-if-cached 309 max-age 310 min-fresh 311 """ 312 313 retval = "STALE" 314 cc = _parse_cache_control(request_headers) 315 cc_response = _parse_cache_control(response_headers) 316 317 if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1: 318 retval = "TRANSPARENT" 319 if 'cache-control' not in request_headers: 320 request_headers['cache-control'] = 'no-cache' 321 elif cc.has_key('no-cache'): 322 retval = "TRANSPARENT" 323 elif cc_response.has_key('no-cache'): 324 retval = "STALE" 325 elif cc.has_key('only-if-cached'): 326 retval = "FRESH" 327 elif response_headers.has_key('date'): 328 date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date'])) 329 now = time.time() 330 current_age = max(0, now - date) 331 if cc_response.has_key('max-age'): 332 try: 333 freshness_lifetime = int(cc_response['max-age']) 334 except ValueError: 335 freshness_lifetime = 0 336 elif response_headers.has_key('expires'): 337 expires = email.Utils.parsedate_tz(response_headers['expires']) 338 if None == expires: 339 freshness_lifetime = 0 340 else: 341 freshness_lifetime = max(0, calendar.timegm(expires) - date) 342 else: 343 freshness_lifetime = 0 344 if cc.has_key('max-age'): 345 try: 346 freshness_lifetime = int(cc['max-age']) 347 except ValueError: 348 freshness_lifetime = 0 349 if cc.has_key('min-fresh'): 350 try: 351 min_fresh = int(cc['min-fresh']) 352 except ValueError: 353 min_fresh = 0 354 current_age += min_fresh 355 if freshness_lifetime > current_age: 356 retval = "FRESH" 357 return retval 358 359 360def _decompressContent(response, new_content): 361 content = new_content 362 try: 363 encoding = response.get('content-encoding', None) 364 if encoding in ['gzip', 'deflate']: 365 if encoding == 'gzip': 366 content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read() 367 if encoding == 'deflate': 368 content = zlib.decompress(content) 369 response['content-length'] = str(len(content)) 370 del response['content-encoding'] 371 except IOError: 372 content = "" 373 raise FailedToDecompressContent( 374 _("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding') 375 , response, content) 376 return content 377 378 379def _updateCache(request_headers, response_headers, content, cache, cachekey): 380 if cachekey: 381 cc = _parse_cache_control(request_headers) 382 cc_response = _parse_cache_control(response_headers) 383 if cc.has_key('no-store') or cc_response.has_key('no-store'): 384 cache.delete(cachekey) 385 else: 386 info = email.Message.Message() 387 for key, value in response_headers.iteritems(): 388 if key not in ['status', 'content-encoding', 'transfer-encoding']: 389 info[key] = value 390 391 status = response_headers.status 392 if status == 304: 393 status = 200 394 395 status_header = 'status: %d\r\n' % response_headers.status 396 397 header_str = info.as_string() 398 399 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str) 400 text = "".join([status_header, header_str, content]) 401 402 cache.set(cachekey, text) 403 404 405def _cnonce(): 406 dig = md5.new("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest() 407 return dig[:16] 408 409 410def _wsse_username_token(cnonce, iso_now, password): 411 return base64.encodestring(sha.new("%s%s%s" % (cnonce, iso_now, password)).digest()).strip() 412 413 414# For credentials we need two things, first 415# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.) 416# Then we also need a list of URIs that have already demanded authentication 417# That list is tricky since sub-URIs can take the same auth, or the 418# auth scheme may change as you descend the tree. 419# So we also need each Auth instance to be able to tell us 420# how close to the 'top' it is. 421 422class Authentication(object): 423 def __init__(self, credentials, host, request_uri, headers, response, content, http): 424 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 425 self.path = path 426 self.host = host 427 self.credentials = credentials 428 self.http = http 429 self.response = response 430 self.headers = headers 431 self.content = content 432 433 def depth(self, request_uri): 434 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 435 return request_uri[len(self.path):].count("/") 436 437 def inscope(self, host, request_uri): 438 # XXX Should we normalize the request_uri? 439 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 440 return (host == self.host) and path.startswith(self.path) 441 442 def request(self, method, request_uri, headers, content): 443 """Modify the request headers to add the appropriate 444 Authorization header. Over-rise this in sub-classes.""" 445 pass 446 447 def response(self, response, content): 448 """Gives us a chance to update with new nonces 449 or such returned from the last authorized response. 450 Over-rise this in sub-classes if necessary. 451 452 Return TRUE is the request is to be retried, for 453 example Digest may return stale=true. 454 """ 455 return False 456 457 458class BasicAuthentication(Authentication): 459 def __init__(self, credentials, host, request_uri, headers, response, content, http): 460 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 461 462 def request(self, method, request_uri, headers, content): 463 """Modify the request headers to add the appropriate 464 Authorization header.""" 465 headers['authorization'] = 'Basic ' + base64.encodestring("%s:%s" % self.credentials).strip() 466 467 468class DigestAuthentication(Authentication): 469 """Only do qop='auth' and MD5, since that 470 is all Apache currently implements""" 471 472 def __init__(self, credentials, host, request_uri, headers, response, content, http): 473 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 474 challenge = _parse_www_authenticate(response) 475 self.challenge = challenge['digest'] 476 qop = self.challenge.get('qop') 477 self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None 478 if self.challenge['qop'] is None: 479 raise UnimplementedDigestAuthOptionError(_("Unsupported value for qop: %s." % qop)) 480 self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5') 481 if self.challenge['algorithm'] != 'MD5': 482 raise UnimplementedDigestAuthOptionError( 483 _("Unsupported value for algorithm: %s." % self.challenge['algorithm'])) 484 self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]]) 485 self.challenge['nc'] = 1 486 487 def request(self, method, request_uri, headers, content, cnonce=None): 488 """Modify the request headers""" 489 H = lambda x: md5.new(x).hexdigest() 490 KD = lambda s, d: H("%s:%s" % (s, d)) 491 A2 = "".join([method, ":", request_uri]) 492 self.challenge['cnonce'] = cnonce or _cnonce() 493 request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'], 494 '%08x' % self.challenge['nc'], 495 self.challenge['cnonce'], 496 self.challenge['qop'], H(A2) 497 )) 498 headers[ 499 'Authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % ( 500 self.credentials[0], 501 self.challenge['realm'], 502 self.challenge['nonce'], 503 request_uri, 504 self.challenge['algorithm'], 505 request_digest, 506 self.challenge['qop'], 507 self.challenge['nc'], 508 self.challenge['cnonce'], 509 ) 510 self.challenge['nc'] += 1 511 512 def response(self, response, content): 513 if not response.has_key('authentication-info'): 514 challenge = _parse_www_authenticate(response).get('digest', {}) 515 if 'true' == challenge.get('stale'): 516 self.challenge['nonce'] = challenge['nonce'] 517 self.challenge['nc'] = 1 518 return True 519 else: 520 updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {}) 521 522 if updated_challenge.has_key('nextnonce'): 523 self.challenge['nonce'] = updated_challenge['nextnonce'] 524 self.challenge['nc'] = 1 525 return False 526 527 528class HmacDigestAuthentication(Authentication): 529 """Adapted from Robert Sayre's code and DigestAuthentication above.""" 530 __author__ = "Thomas Broyer (t.broyer@ltgt.net)" 531 532 def __init__(self, credentials, host, request_uri, headers, response, content, http): 533 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 534 challenge = _parse_www_authenticate(response) 535 self.challenge = challenge['hmacdigest'] 536 # TODO: self.challenge['domain'] 537 self.challenge['reason'] = self.challenge.get('reason', 'unauthorized') 538 if self.challenge['reason'] not in ['unauthorized', 'integrity']: 539 self.challenge['reason'] = 'unauthorized' 540 self.challenge['salt'] = self.challenge.get('salt', '') 541 if not self.challenge.get('snonce'): 542 raise UnimplementedHmacDigestAuthOptionError( 543 _("The challenge doesn't contain a server nonce, or this one is empty.")) 544 self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1') 545 if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']: 546 raise UnimplementedHmacDigestAuthOptionError( 547 _("Unsupported value for algorithm: %s." % self.challenge['algorithm'])) 548 self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1') 549 if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']: 550 raise UnimplementedHmacDigestAuthOptionError( 551 _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm'])) 552 if self.challenge['algorithm'] == 'HMAC-MD5': 553 self.hashmod = md5 554 else: 555 self.hashmod = sha 556 if self.challenge['pw-algorithm'] == 'MD5': 557 self.pwhashmod = md5 558 else: 559 self.pwhashmod = sha 560 self.key = "".join([self.credentials[0], ":", 561 self.pwhashmod.new( 562 "".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(), 563 ":", self.challenge['realm'] 564 ]) 565 self.key = self.pwhashmod.new(self.key).hexdigest().lower() 566 567 def request(self, method, request_uri, headers, content): 568 """Modify the request headers""" 569 keys = _get_end2end_headers(headers) 570 keylist = "".join(["%s " % k for k in keys]) 571 headers_val = "".join([headers[k] for k in keys]) 572 created = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()) 573 cnonce = _cnonce() 574 request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val) 575 request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower() 576 headers[ 577 'Authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % ( 578 self.credentials[0], 579 self.challenge['realm'], 580 self.challenge['snonce'], 581 cnonce, 582 request_uri, 583 created, 584 request_digest, 585 keylist, 586 ) 587 588 def response(self, response, content): 589 challenge = _parse_www_authenticate(response).get('hmacdigest', {}) 590 if challenge.get('reason') in ['integrity', 'stale']: 591 return True 592 return False 593 594 595class WsseAuthentication(Authentication): 596 """This is thinly tested and should not be relied upon. 597 At this time there isn't any third party server to test against. 598 Blogger and TypePad implemented this algorithm at one point 599 but Blogger has since switched to Basic over HTTPS and 600 TypePad has implemented it wrong, by never issuing a 401 601 challenge but instead requiring your client to telepathically know that 602 their endpoint is expecting WSSE profile="UsernameToken".""" 603 604 def __init__(self, credentials, host, request_uri, headers, response, content, http): 605 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 606 607 def request(self, method, request_uri, headers, content): 608 """Modify the request headers to add the appropriate 609 Authorization header.""" 610 headers['Authorization'] = 'WSSE profile="UsernameToken"' 611 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) 612 cnonce = _cnonce() 613 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1]) 614 headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % ( 615 self.credentials[0], 616 password_digest, 617 cnonce, 618 iso_now) 619 620 621class GoogleLoginAuthentication(Authentication): 622 def __init__(self, credentials, host, request_uri, headers, response, content, http): 623 from urllib import urlencode 624 625 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 626 challenge = _parse_www_authenticate(response) 627 service = challenge['googlelogin'].get('service', 'xapi') 628 # Bloggger actually returns the service in the challenge 629 # For the rest we guess based on the URI 630 if service == 'xapi' and request_uri.find("calendar") > 0: 631 service = "cl" 632 # No point in guessing Base or Spreadsheet 633 #elif request_uri.find("spreadsheets") > 0: 634 # service = "wise" 635 636 auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent']) 637 resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", 638 body=urlencode(auth), 639 headers={'Content-Type': 'application/x-www-form-urlencoded'}) 640 lines = content.split('\n') 641 d = dict([tuple(line.split("=", 1)) for line in lines if line]) 642 if resp.status == 403: 643 self.Auth = "" 644 else: 645 self.Auth = d['Auth'] 646 647 def request(self, method, request_uri, headers, content): 648 """Modify the request headers to add the appropriate 649 Authorization header.""" 650 headers['authorization'] = 'GoogleLogin Auth=' + self.Auth 651 652 653AUTH_SCHEME_CLASSES = { 654 "basic": BasicAuthentication, 655 "wsse": WsseAuthentication, 656 "digest": DigestAuthentication, 657 "hmacdigest": HmacDigestAuthentication, 658 "googlelogin": GoogleLoginAuthentication 659} 660 661AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"] 662 663def _md5(s): 664 return 665 666 667class FileCache(object): 668 """Uses a local directory as a store for cached files. 669 Not really safe to use if multiple threads or processes are going to 670 be running on the same cache. 671 """ 672 673 def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior 674 self.cache = cache 675 self.safe = safe 676 if not os.path.exists(cache): 677 os.makedirs(self.cache) 678 679 def get(self, key): 680 retval = None 681 cacheFullPath = os.path.join(self.cache, self.safe(key)) 682 try: 683 f = file(cacheFullPath, "r") 684 retval = f.read() 685 f.close() 686 except IOError: 687 pass 688 return retval 689 690 def set(self, key, value): 691 cacheFullPath = os.path.join(self.cache, self.safe(key)) 692 f = file(cacheFullPath, "w") 693 f.write(value) 694 f.close() 695 696 def delete(self, key): 697 cacheFullPath = os.path.join(self.cache, self.safe(key)) 698 if os.path.exists(cacheFullPath): 699 os.remove(cacheFullPath) 700 701 702class Credentials(object): 703 def __init__(self): 704 self.credentials = [] 705 706 def add(self, name, password, domain=""): 707 self.credentials.append((domain.lower(), name, password)) 708 709 def clear(self): 710 self.credentials = [] 711 712 def iter(self, domain): 713 for (cdomain, name, password) in self.credentials: 714 if cdomain == "" or domain == cdomain: 715 yield (name, password) 716 717 718class KeyCerts(Credentials): 719 """Identical to Credentials except that 720 name/password are mapped to key/cert.""" 721 pass 722 723 724class ProxyInfo(object): 725 """Collect information required to use a proxy.""" 726 727 def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None): 728 """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX 729 constants. For example: 730 731 p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000) 732 """ 733 self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass 734 735 def astuple(self): 736 return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, 737 self.proxy_user, self.proxy_pass) 738 739 def isgood(self): 740 return socks and (self.proxy_host != None) and (self.proxy_port != None) 741 742 743class HTTPConnectionWithTimeout(httplib.HTTPConnection): 744 """HTTPConnection subclass that supports timeouts""" 745 746 def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None): 747 httplib.HTTPConnection.__init__(self, host, port, strict) 748 self.timeout = timeout 749 self.proxy_info = proxy_info 750 751 def connect(self): 752 """Connect to the host and port specified in __init__.""" 753 # Mostly verbatim from httplib.py. 754 msg = "getaddrinfo returns an empty list" 755 for res in socket.getaddrinfo(self.host, self.port, 0, 756 socket.SOCK_STREAM): 757 af, socktype, proto, canonname, sa = res 758 try: 759 if self.proxy_info and self.proxy_info.isgood(): 760 self.sock = socks.socksocket(af, socktype, proto) 761 self.sock.setproxy(*self.proxy_info.astuple()) 762 else: 763 self.sock = socket.socket(af, socktype, proto) 764 # Different from httplib: support timeouts. 765 if self.timeout is not None: 766 self.sock.settimeout(self.timeout) 767 # End of difference from httplib. 768 if self.debuglevel > 0: 769 print "connect: (%s, %s)" % (self.host, self.port) 770 self.sock.connect(sa) 771 except socket.error, msg: 772 if self.debuglevel > 0: 773 print 'connect fail:', (self.host, self.port) 774 if self.sock: 775 self.sock.close() 776 self.sock = None 777 continue 778 break 779 if not self.sock: 780 raise socket.error, msg 781 782 783class HTTPSConnectionWithTimeout(httplib.HTTPSConnection): 784 "This class allows communication via SSL." 785 786 def __init__(self, host, port=None, key_file=None, cert_file=None, 787 strict=None, timeout=None, proxy_info=None): 788 self.timeout = timeout 789 self.proxy_info = proxy_info 790 httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file, 791 cert_file=cert_file, strict=strict) 792 793 def connect(self): 794 "Connect to a host on a given (SSL) port." 795 796 if self.proxy_info and self.proxy_info.isgood(): 797 self.sock.setproxy(*self.proxy_info.astuple()) 798 sock.setproxy(*self.proxy_info.astuple()) 799 else: 800 sock = socket.socket() 801 if self.timeout is not None: 802 sock.settimeout(self.timeout) 803 sock.connect((self.host, self.port)) 804 ssl = socket.ssl(sock, self.key_file, self.cert_file) 805 self.sock = httplib.FakeSocket(sock, ssl) 806 807 808class Http(object): 809 """An HTTP client that handles: 810- all methods 811- caching 812- ETags 813- compression, 814- HTTPS 815- Basic 816- Digest 817- WSSE 818 819and more. 820 """ 821 822 def __init__(self, cache=None, timeout=None, proxy_info=None): 823 """The value of proxy_info is a ProxyInfo instance. 824 825If 'cache' is a string then it is used as a directory name 826for a disk cache. Otherwise it must be an object that supports 827the same interface as FileCache.""" 828 self.proxy_info = proxy_info 829 # Map domain name to an httplib connection 830 self.connections = {} 831 # The location of the cache, for now a directory 832 # where cached responses are held. 833 if cache and isinstance(cache, str): 834 self.cache = FileCache(cache) 835 else: 836 self.cache = cache 837 838 # Name/password 839 self.credentials = Credentials() 840 841 # Key/cert 842 self.certificates = KeyCerts() 843 844 # authorization objects 845 self.authorizations = [] 846 847 # If set to False then no redirects are followed, even safe ones. 848 self.follow_redirects = True 849 850 # If 'follow_redirects' is True, and this is set to True then 851 # all redirecs are followed, including unsafe ones. 852 self.follow_all_redirects = False 853 854 self.ignore_etag = False 855 856 self.force_exception_to_status_code = False 857 858 self.timeout = timeout 859 860 def _auth_from_challenge(self, host, request_uri, headers, response, content): 861 """A generator that creates Authorization objects 862 that can be applied to requests. 863 """ 864 challenges = _parse_www_authenticate(response) 865 for cred in self.credentials.iter(host): 866 for scheme in AUTH_SCHEME_ORDER: 867 if challenges.has_key(scheme): 868 yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self) 869 870 def add_credentials(self, name, password, domain=""): 871 """Add a name and password that will be used 872 any time a request requires authentication.""" 873 self.credentials.add(name, password, domain) 874 875 def add_certificate(self, key, cert, domain): 876 """Add a key and cert that will be used 877 any time a request requires authentication.""" 878 self.certificates.add(key, cert, domain) 879 880 def clear_credentials(self): 881 """Remove all the names and passwords 882 that are used for authentication""" 883 self.credentials.clear() 884 self.authorizations = [] 885 886 def _conn_request(self, conn, request_uri, method, body, headers): 887 for i in range(2): 888 try: 889 conn.request(method, request_uri, body, headers) 890 response = conn.getresponse() 891 except socket.gaierror: 892 conn.close() 893 raise ServerNotFoundError("Unable to find the server at %s" % conn.host) 894 except httplib.HTTPException, e: 895 if not i: 896 conn.close() 897 conn.connect() 898 continue 899 else: 900 raise 901 else: 902 content = response.read() 903 response = Response(response) 904 if method != "HEAD": 905 content = _decompressContent(response, content) 906 907 break 908 return response, content 909 910 911 def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey): 912 """Do the actual request using the connection object 913 and also follow one level of redirects if necessary""" 914 915 auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)] 916 auth = auths and sorted(auths)[0][1] or None 917 if auth: 918 auth.request(method, request_uri, headers, body) 919 920 (response, content) = self._conn_request(conn, request_uri, method, body, headers) 921 922 if auth: 923 if auth.response(response, body): 924 auth.request(method, request_uri, headers, body) 925 (response, content) = self._conn_request(conn, request_uri, method, body, headers) 926 response._stale_digest = 1 927 928 if response.status == 401: 929 for authorization in self._auth_from_challenge(host, request_uri, headers, response, content): 930 authorization.request(method, request_uri, headers, body) 931 (response, content) = self._conn_request(conn, request_uri, method, body, headers, ) 932 if response.status != 401: 933 self.authorizations.append(authorization) 934 authorization.response(response, body) 935 break 936 937 if self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303: 938 if self.follow_redirects and response.status in [300, 301, 302, 303, 307]: 939 # Pick out the location header and basically start from the beginning 940 # remembering first to strip the ETag header and decrement our 'depth' 941 if redirections: 942 if not response.has_key('location') and response.status != 300: 943 raise RedirectMissingLocation(_("Redirected but the response is missing a Location: header."), 944 response, content) 945 # Fix-up relative redirects (which violate an RFC 2616 MUST) 946 if response.has_key('location'): 947 location = response['location'] 948 (scheme, authority, path, query, fragment) = parse_uri(location) 949 if authority == None: 950 response['location'] = urlparse.urljoin(absolute_uri, location) 951 if response.status == 301 and method in ["GET", "HEAD"]: 952 response['-x-permanent-redirect-url'] = response['location'] 953 if not response.has_key('content-location'): 954 response['content-location'] = absolute_uri 955 _updateCache(headers, response, content, self.cache, cachekey) 956 if headers.has_key('if-none-match'): 957 del headers['if-none-match'] 958 if headers.has_key('if-modified-since'): 959 del headers['if-modified-since'] 960 if response.has_key('location'): 961 location = response['location'] 962 old_response = copy.deepcopy(response) 963 if not old_response.has_key('content-location'): 964 old_response['content-location'] = absolute_uri 965 redirect_method = ((response.status == 303) and ( 966 method not in ["GET", "HEAD"])) and "GET" or method 967 (response, content) = self.request(location, redirect_method, body=body, headers=headers, 968 redirections=redirections - 1) 969 response.previous = old_response 970 else: 971 raise RedirectLimit(_("Redirected more times than rediection_limit allows."), response, content) 972 elif response.status in [200, 203] and method == "GET": 973 # Don't cache 206's since we aren't going to handle byte range requests 974 if not response.has_key('content-location'): 975 response['content-location'] = absolute_uri 976 _updateCache(headers, response, content, self.cache, cachekey) 977 978 return response, content 979 980 981 # Need to catch and rebrand some exceptions 982 # Then need to optionally turn all exceptions into status codes 983 # including all socket.* and httplib.* exceptions. 984 985 986 def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, 987 connection_type=None): 988 """ Performs a single HTTP request. 989The 'uri' is the URI of the HTTP resource and can begin 990with either 'http' or 'https'. The value of 'uri' must be an absolute URI. 991 992The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc. 993There is no restriction on the methods allowed. 994 995The 'body' is the entity body to be sent with the request. It is a string 996object. 997 998Any extra headers that are to be sent with the request should be provided in the 999'headers' dictionary. 1000 1001The maximum number of redirect to follow before raising an 1002exception is 'redirections. The default is 5. 1003 1004The return value is a tuple of (response, content), the first 1005being and instance of the 'Response' class, the second being 1006a string that contains the response entity body. 1007 """ 1008 try: 1009 if headers is None: 1010 headers = {} 1011 else: 1012 headers = _normalize_headers(headers) 1013 1014 if not headers.has_key('user-agent'): 1015 headers['user-agent'] = "Python-httplib2/%s" % __version__ 1016 1017 uri = iri2uri(uri) 1018 1019 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri) 1020 1021 conn_key = scheme + ":" + authority 1022 if conn_key in self.connections: 1023 conn = self.connections[conn_key] 1024 else: 1025 if not connection_type: 1026 connection_type = (scheme == 'https') and HTTPSConnectionWithTimeout or HTTPConnectionWithTimeout 1027 certs = list(self.certificates.iter(authority)) 1028 if scheme == 'https' and certs: 1029 conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0], 1030 cert_file=certs[0][1], timeout=self.timeout, 1031 proxy_info=self.proxy_info) 1032 else: 1033 conn = self.connections[conn_key] = connection_type(authority, timeout=self.timeout, 1034 proxy_info=self.proxy_info) 1035 conn.set_debuglevel(debuglevel) 1036 1037 if method in ["GET", "HEAD"] and 'range' not in headers: 1038 headers['accept-encoding'] = 'compress, gzip' 1039 1040 info = email.Message.Message() 1041 cached_value = None 1042 if self.cache: 1043 cachekey = defrag_uri 1044 cached_value = self.cache.get(cachekey) 1045 if cached_value: 1046 info = email.message_from_string(cached_value) 1047 try: 1048 content = cached_value.split('\r\n\r\n', 1)[1] 1049 except IndexError: 1050 self.cache.delete(cachekey) 1051 cachekey = None 1052 cached_value = None 1053 else: 1054 cachekey = None 1055 1056 if method in ["PUT"] and self.cache and info.has_key( 1057 'etag') and not self.ignore_etag and 'if-match' not in headers: 1058 # http://www.w3.org/1999/04/Editing/ 1059 headers['if-match'] = info['etag'] 1060 1061 if method not in ["GET", "HEAD"] and self.cache and cachekey: 1062 # RFC 2616 Section 13.10 1063 self.cache.delete(cachekey) 1064 1065 if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers: 1066 if info.has_key('-x-permanent-redirect-url'): 1067 # Should cached permanent redirects be counted in our redirection count? For now, yes. 1068 (response, new_content) = self.request(info['-x-permanent-redirect-url'], headers=headers, 1069 redirections=redirections - 1) 1070 response.previous = Response(info) 1071 response.previous.fromcache = True 1072 else: 1073 # Determine our course of action: 1074 # Is the cached entry fresh or stale? 1075 # Has the client requested a non-cached response? 1076 # 1077 # There seems to be three possible answers: 1078 # 1. [FRESH] Return the cache entry w/o doing a GET 1079 # 2. [STALE] Do the GET (but add in cache validators if available) 1080 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request 1081 entry_disposition = _entry_disposition(info, headers) 1082 1083 if entry_disposition == "FRESH": 1084 if not cached_value: 1085 info['status'] = '504' 1086 content = "" 1087 response = Response(info) 1088 if cached_value: 1089 response.fromcache = True 1090 return response, content 1091 1092 if entry_disposition == "STALE": 1093 if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers: 1094 headers['if-none-match'] = info['etag'] 1095 if info.has_key('last-modified') and not 'last-modified' in headers: 1096 headers['if-modified-since'] = info['last-modified'] 1097 elif entry_disposition == "TRANSPARENT": 1098 pass 1099 1100 (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, 1101 redirections, cachekey) 1102 1103 if response.status == 304 and method == "GET": 1104 # Rewrite the cache entry with the new end-to-end headers 1105 # Take all headers that are in response 1106 # and overwrite their values in info. 1107 # unless they are hop-by-hop, or are listed in the connection header. 1108 1109 for key in _get_end2end_headers(response): 1110 info[key] = response[key] 1111 merged_response = Response(info) 1112 if hasattr(response, "_stale_digest"): 1113 merged_response._stale_digest = response._stale_digest 1114 _updateCache(headers, merged_response, content, self.cache, cachekey) 1115 response = merged_response 1116 response.status = 200 1117 response.fromcache = True 1118 1119 elif response.status == 200: 1120 content = new_content 1121 else: 1122 self.cache.delete(cachekey) 1123 content = new_content 1124 else: 1125 (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, 1126 redirections, cachekey) 1127 except Exception, e: 1128 if self.force_exception_to_status_code: 1129 if isinstance(e, HttpLib2ErrorWithResponse): 1130 response = e.response 1131 content = e.content 1132 response.status = 500 1133 response.reason = str(e) 1134 elif isinstance(e, socket.timeout): 1135 content = "Request Timeout" 1136 response = Response({ 1137 "content-type": "text/plain", 1138 "status": "408", 1139 "content-length": len(content) 1140 }) 1141 response.reason = "Request Timeout" 1142 else: 1143 content = str(e) 1144 response = Response({ 1145 "content-type": "text/plain", 1146 "status": "400", 1147 "content-length": len(content) 1148 }) 1149 response.reason = "Bad Request" 1150 else: 1151 raise 1152 1153 return response, content 1154 1155 1156class Response(dict): 1157 """An object more like email.Message than httplib.HTTPResponse.""" 1158 1159 """Is this response from our local cache""" 1160 fromcache = False 1161 1162 """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """ 1163 version = 11 1164 1165 "Status code returned by server. " 1166 status = 200 1167 1168 """Reason phrase returned by server.""" 1169 reason = "Ok" 1170 1171 previous = None 1172 1173 def __init__(self, info): 1174 # info is either an email.Message or 1175 # an httplib.HTTPResponse object. 1176 if isinstance(info, httplib.HTTPResponse): 1177 for key, value in info.getheaders(): 1178 self[key] = value 1179 self.status = info.status 1180 self['status'] = str(self.status) 1181 self.reason = info.reason 1182 self.version = info.version 1183 elif isinstance(info, email.Message.Message): 1184 for key, value in info.items(): 1185 self[key] = value 1186 self.status = int(self['status']) 1187 else: 1188 for key, value in info.iteritems(): 1189 self[key] = value 1190 self.status = int(self.get('status', self.status)) 1191 1192 1193 def __getattr__(self, name): 1194 if name == 'dict': 1195 return self 1196 else: 1197 raise AttributeError, name 1198