1from __future__ import absolute_import
2from contextlib import contextmanager
3import zlib
4import io
5from socket import timeout as SocketTimeout
6from socket import error as SocketError
7
8from ._collections import HTTPHeaderDict
9from .exceptions import (
10    ProtocolError, DecodeError, ReadTimeoutError, ResponseNotChunked
11)
12from .packages.six import string_types as basestring, binary_type, PY3
13from .packages.six.moves import http_client as httplib
14from .connection import HTTPException, BaseSSLError
15from .util.response import is_fp_closed, is_response_to_head
16
17
18class DeflateDecoder(object):
19
20    def __init__(self):
21        self._first_try = True
22        self._data = binary_type()
23        self._obj = zlib.decompressobj()
24
25    def __getattr__(self, name):
26        return getattr(self._obj, name)
27
28    def decompress(self, data):
29        if not data:
30            return data
31
32        if not self._first_try:
33            return self._obj.decompress(data)
34
35        self._data += data
36        try:
37            return self._obj.decompress(data)
38        except zlib.error:
39            self._first_try = False
40            self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
41            try:
42                return self.decompress(self._data)
43            finally:
44                self._data = None
45
46
47class GzipDecoder(object):
48
49    def __init__(self):
50        self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
51
52    def __getattr__(self, name):
53        return getattr(self._obj, name)
54
55    def decompress(self, data):
56        if not data:
57            return data
58        return self._obj.decompress(data)
59
60
61def _get_decoder(mode):
62    if mode == 'gzip':
63        return GzipDecoder()
64
65    return DeflateDecoder()
66
67
68class HTTPResponse(io.IOBase):
69    """
70    HTTP Response container.
71
72    Backwards-compatible to httplib's HTTPResponse but the response ``body`` is
73    loaded and decoded on-demand when the ``data`` property is accessed.  This
74    class is also compatible with the Python standard library's :mod:`io`
75    module, and can hence be treated as a readable object in the context of that
76    framework.
77
78    Extra parameters for behaviour not present in httplib.HTTPResponse:
79
80    :param preload_content:
81        If True, the response's body will be preloaded during construction.
82
83    :param decode_content:
84        If True, attempts to decode specific content-encoding's based on headers
85        (like 'gzip' and 'deflate') will be skipped and raw data will be used
86        instead.
87
88    :param original_response:
89        When this HTTPResponse wrapper is generated from an httplib.HTTPResponse
90        object, it's convenient to include the original for debug purposes. It's
91        otherwise unused.
92    """
93
94    CONTENT_DECODERS = ['gzip', 'deflate']
95    REDIRECT_STATUSES = [301, 302, 303, 307, 308]
96
97    def __init__(self, body='', headers=None, status=0, version=0, reason=None,
98                 strict=0, preload_content=True, decode_content=True,
99                 original_response=None, pool=None, connection=None):
100
101        if isinstance(headers, HTTPHeaderDict):
102            self.headers = headers
103        else:
104            self.headers = HTTPHeaderDict(headers)
105        self.status = status
106        self.version = version
107        self.reason = reason
108        self.strict = strict
109        self.decode_content = decode_content
110
111        self._decoder = None
112        self._body = None
113        self._fp = None
114        self._original_response = original_response
115        self._fp_bytes_read = 0
116
117        if body and isinstance(body, (basestring, binary_type)):
118            self._body = body
119
120        self._pool = pool
121        self._connection = connection
122
123        if hasattr(body, 'read'):
124            self._fp = body
125
126        # Are we using the chunked-style of transfer encoding?
127        self.chunked = False
128        self.chunk_left = None
129        tr_enc = self.headers.get('transfer-encoding', '').lower()
130        # Don't incur the penalty of creating a list and then discarding it
131        encodings = (enc.strip() for enc in tr_enc.split(","))
132        if "chunked" in encodings:
133            self.chunked = True
134
135        # If requested, preload the body.
136        if preload_content and not self._body:
137            self._body = self.read(decode_content=decode_content)
138
139    def get_redirect_location(self):
140        """
141        Should we redirect and where to?
142
143        :returns: Truthy redirect location string if we got a redirect status
144            code and valid location. ``None`` if redirect status and no
145            location. ``False`` if not a redirect status code.
146        """
147        if self.status in self.REDIRECT_STATUSES:
148            return self.headers.get('location')
149
150        return False
151
152    def release_conn(self):
153        if not self._pool or not self._connection:
154            return
155
156        self._pool._put_conn(self._connection)
157        self._connection = None
158
159    @property
160    def data(self):
161        # For backwords-compat with earlier urllib3 0.4 and earlier.
162        if self._body:
163            return self._body
164
165        if self._fp:
166            return self.read(cache_content=True)
167
168    def tell(self):
169        """
170        Obtain the number of bytes pulled over the wire so far. May differ from
171        the amount of content returned by :meth:``HTTPResponse.read`` if bytes
172        are encoded on the wire (e.g, compressed).
173        """
174        return self._fp_bytes_read
175
176    def _init_decoder(self):
177        """
178        Set-up the _decoder attribute if necessar.
179        """
180        # Note: content-encoding value should be case-insensitive, per RFC 7230
181        # Section 3.2
182        content_encoding = self.headers.get('content-encoding', '').lower()
183        if self._decoder is None and content_encoding in self.CONTENT_DECODERS:
184            self._decoder = _get_decoder(content_encoding)
185
186    def _decode(self, data, decode_content, flush_decoder):
187        """
188        Decode the data passed in and potentially flush the decoder.
189        """
190        try:
191            if decode_content and self._decoder:
192                data = self._decoder.decompress(data)
193        except (IOError, zlib.error) as e:
194            content_encoding = self.headers.get('content-encoding', '').lower()
195            raise DecodeError(
196                "Received response with content-encoding: %s, but "
197                "failed to decode it." % content_encoding, e)
198
199        if flush_decoder and decode_content:
200            data += self._flush_decoder()
201
202        return data
203
204    def _flush_decoder(self):
205        """
206        Flushes the decoder. Should only be called if the decoder is actually
207        being used.
208        """
209        if self._decoder:
210            buf = self._decoder.decompress(b'')
211            return buf + self._decoder.flush()
212
213        return b''
214
215    @contextmanager
216    def _error_catcher(self):
217        """
218        Catch low-level python exceptions, instead re-raising urllib3
219        variants, so that low-level exceptions are not leaked in the
220        high-level api.
221
222        On exit, release the connection back to the pool.
223        """
224        try:
225            try:
226                yield
227
228            except SocketTimeout:
229                # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
230                # there is yet no clean way to get at it from this context.
231                raise ReadTimeoutError(self._pool, None, 'Read timed out.')
232
233            except BaseSSLError as e:
234                # FIXME: Is there a better way to differentiate between SSLErrors?
235                if 'read operation timed out' not in str(e):  # Defensive:
236                    # This shouldn't happen but just in case we're missing an edge
237                    # case, let's avoid swallowing SSL errors.
238                    raise
239
240                raise ReadTimeoutError(self._pool, None, 'Read timed out.')
241
242            except (HTTPException, SocketError) as e:
243                # This includes IncompleteRead.
244                raise ProtocolError('Connection broken: %r' % e, e)
245
246        except Exception:
247            # The response may not be closed but we're not going to use it anymore
248            # so close it now to ensure that the connection is released back to the pool.
249            if self._original_response and not self._original_response.isclosed():
250                self._original_response.close()
251
252            # Closing the response may not actually be sufficient to close
253            # everything, so if we have a hold of the connection close that
254            # too.
255            if self._connection is not None:
256                self._connection.close()
257
258            raise
259        finally:
260            if self._original_response and self._original_response.isclosed():
261                self.release_conn()
262
263    def read(self, amt=None, decode_content=None, cache_content=False):
264        """
265        Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
266        parameters: ``decode_content`` and ``cache_content``.
267
268        :param amt:
269            How much of the content to read. If specified, caching is skipped
270            because it doesn't make sense to cache partial content as the full
271            response.
272
273        :param decode_content:
274            If True, will attempt to decode the body based on the
275            'content-encoding' header.
276
277        :param cache_content:
278            If True, will save the returned data such that the same result is
279            returned despite of the state of the underlying file object. This
280            is useful if you want the ``.data`` property to continue working
281            after having ``.read()`` the file object. (Overridden if ``amt`` is
282            set.)
283        """
284        self._init_decoder()
285        if decode_content is None:
286            decode_content = self.decode_content
287
288        if self._fp is None:
289            return
290
291        flush_decoder = False
292        data = None
293
294        with self._error_catcher():
295            if amt is None:
296                # cStringIO doesn't like amt=None
297                data = self._fp.read()
298                flush_decoder = True
299            else:
300                cache_content = False
301                data = self._fp.read(amt)
302                if amt != 0 and not data:  # Platform-specific: Buggy versions of Python.
303                    # Close the connection when no data is returned
304                    #
305                    # This is redundant to what httplib/http.client _should_
306                    # already do.  However, versions of python released before
307                    # December 15, 2012 (http://bugs.python.org/issue16298) do
308                    # not properly close the connection in all cases. There is
309                    # no harm in redundantly calling close.
310                    self._fp.close()
311                    flush_decoder = True
312
313        if data:
314            self._fp_bytes_read += len(data)
315
316            data = self._decode(data, decode_content, flush_decoder)
317
318            if cache_content:
319                self._body = data
320
321        return data
322
323    def stream(self, amt=2**16, decode_content=None):
324        """
325        A generator wrapper for the read() method. A call will block until
326        ``amt`` bytes have been read from the connection or until the
327        connection is closed.
328
329        :param amt:
330            How much of the content to read. The generator will return up to
331            much data per iteration, but may return less. This is particularly
332            likely when using compressed data. However, the empty string will
333            never be returned.
334
335        :param decode_content:
336            If True, will attempt to decode the body based on the
337            'content-encoding' header.
338        """
339        if self.chunked:
340            for line in self.read_chunked(amt, decode_content=decode_content):
341                yield line
342        else:
343            while not is_fp_closed(self._fp):
344                data = self.read(amt=amt, decode_content=decode_content)
345
346                if data:
347                    yield data
348
349    @classmethod
350    def from_httplib(ResponseCls, r, **response_kw):
351        """
352        Given an :class:`httplib.HTTPResponse` instance ``r``, return a
353        corresponding :class:`urllib3.response.HTTPResponse` object.
354
355        Remaining parameters are passed to the HTTPResponse constructor, along
356        with ``original_response=r``.
357        """
358        headers = r.msg
359
360        if not isinstance(headers, HTTPHeaderDict):
361            if PY3:  # Python 3
362                headers = HTTPHeaderDict(headers.items())
363            else:  # Python 2
364                headers = HTTPHeaderDict.from_httplib(headers)
365
366        # HTTPResponse objects in Python 3 don't have a .strict attribute
367        strict = getattr(r, 'strict', 0)
368        resp = ResponseCls(body=r,
369                           headers=headers,
370                           status=r.status,
371                           version=r.version,
372                           reason=r.reason,
373                           strict=strict,
374                           original_response=r,
375                           **response_kw)
376        return resp
377
378    # Backwards-compatibility methods for httplib.HTTPResponse
379    def getheaders(self):
380        return self.headers
381
382    def getheader(self, name, default=None):
383        return self.headers.get(name, default)
384
385    # Overrides from io.IOBase
386    def close(self):
387        if not self.closed:
388            self._fp.close()
389
390    @property
391    def closed(self):
392        if self._fp is None:
393            return True
394        elif hasattr(self._fp, 'closed'):
395            return self._fp.closed
396        elif hasattr(self._fp, 'isclosed'):  # Python 2
397            return self._fp.isclosed()
398        else:
399            return True
400
401    def fileno(self):
402        if self._fp is None:
403            raise IOError("HTTPResponse has no file to get a fileno from")
404        elif hasattr(self._fp, "fileno"):
405            return self._fp.fileno()
406        else:
407            raise IOError("The file-like object this HTTPResponse is wrapped "
408                          "around has no file descriptor")
409
410    def flush(self):
411        if self._fp is not None and hasattr(self._fp, 'flush'):
412            return self._fp.flush()
413
414    def readable(self):
415        # This method is required for `io` module compatibility.
416        return True
417
418    def readinto(self, b):
419        # This method is required for `io` module compatibility.
420        temp = self.read(len(b))
421        if len(temp) == 0:
422            return 0
423        else:
424            b[:len(temp)] = temp
425            return len(temp)
426
427    def _update_chunk_length(self):
428        # First, we'll figure out length of a chunk and then
429        # we'll try to read it from socket.
430        if self.chunk_left is not None:
431            return
432        line = self._fp.fp.readline()
433        line = line.split(b';', 1)[0]
434        try:
435            self.chunk_left = int(line, 16)
436        except ValueError:
437            # Invalid chunked protocol response, abort.
438            self.close()
439            raise httplib.IncompleteRead(line)
440
441    def _handle_chunk(self, amt):
442        returned_chunk = None
443        if amt is None:
444            chunk = self._fp._safe_read(self.chunk_left)
445            returned_chunk = chunk
446            self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
447            self.chunk_left = None
448        elif amt < self.chunk_left:
449            value = self._fp._safe_read(amt)
450            self.chunk_left = self.chunk_left - amt
451            returned_chunk = value
452        elif amt == self.chunk_left:
453            value = self._fp._safe_read(amt)
454            self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
455            self.chunk_left = None
456            returned_chunk = value
457        else:  # amt > self.chunk_left
458            returned_chunk = self._fp._safe_read(self.chunk_left)
459            self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
460            self.chunk_left = None
461        return returned_chunk
462
463    def read_chunked(self, amt=None, decode_content=None):
464        """
465        Similar to :meth:`HTTPResponse.read`, but with an additional
466        parameter: ``decode_content``.
467
468        :param decode_content:
469            If True, will attempt to decode the body based on the
470            'content-encoding' header.
471        """
472        self._init_decoder()
473        # FIXME: Rewrite this method and make it a class with a better structured logic.
474        if not self.chunked:
475            raise ResponseNotChunked(
476                "Response is not chunked. "
477                "Header 'transfer-encoding: chunked' is missing.")
478
479        # Don't bother reading the body of a HEAD request.
480        if self._original_response and is_response_to_head(self._original_response):
481            self._original_response.close()
482            return
483
484        with self._error_catcher():
485            while True:
486                self._update_chunk_length()
487                if self.chunk_left == 0:
488                    break
489                chunk = self._handle_chunk(amt)
490                decoded = self._decode(chunk, decode_content=decode_content,
491                                       flush_decoder=False)
492                if decoded:
493                    yield decoded
494
495            if decode_content:
496                # On CPython and PyPy, we should never need to flush the
497                # decoder. However, on Jython we *might* need to, so
498                # lets defensively do it anyway.
499                decoded = self._flush_decoder()
500                if decoded:  # Platform-specific: Jython.
501                    yield decoded
502
503            # Chunk content ends with \r\n: discard it.
504            while True:
505                line = self._fp.fp.readline()
506                if not line:
507                    # Some sites may not end with '\r\n'.
508                    break
509                if line == b'\r\n':
510                    break
511
512            # We read everything; close the "file".
513            if self._original_response:
514                self._original_response.close()
515