summaryrefslogtreecommitdiff
path: root/mercurial/httpclient
diff options
context:
space:
mode:
authorLorry <lorry@roadtrain.codethink.co.uk>2012-08-22 14:49:51 +0100
committerLorry <lorry@roadtrain.codethink.co.uk>2012-08-22 14:49:51 +0100
commita498da43c7fdb9f24b73680c02a4a3588cc62d9a (patch)
treedaf8119dae1749b5165b68033a1b23a7375ce9ce /mercurial/httpclient
downloadmercurial-tarball-a498da43c7fdb9f24b73680c02a4a3588cc62d9a.tar.gz
Tarball conversion
Diffstat (limited to 'mercurial/httpclient')
-rw-r--r--mercurial/httpclient/__init__.py674
-rw-r--r--mercurial/httpclient/_readers.py195
-rw-r--r--mercurial/httpclient/socketutil.py127
3 files changed, 996 insertions, 0 deletions
diff --git a/mercurial/httpclient/__init__.py b/mercurial/httpclient/__init__.py
new file mode 100644
index 0000000..f5c3baf
--- /dev/null
+++ b/mercurial/httpclient/__init__.py
@@ -0,0 +1,674 @@
+# Copyright 2010, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""Improved HTTP/1.1 client library
+
+This library contains an HTTPConnection which is similar to the one in
+httplib, but has several additional features:
+
+ * supports keepalives natively
+ * uses select() to block for incoming data
+ * notices when the server responds early to a request
+ * implements ssl inline instead of in a different class
+"""
+
+import cStringIO
+import errno
+import httplib
+import logging
+import rfc822
+import select
+import socket
+
+import _readers
+import socketutil
+
+logger = logging.getLogger(__name__)
+
+__all__ = ['HTTPConnection', 'HTTPResponse']
+
+HTTP_VER_1_0 = 'HTTP/1.0'
+HTTP_VER_1_1 = 'HTTP/1.1'
+
+OUTGOING_BUFFER_SIZE = 1 << 15
+INCOMING_BUFFER_SIZE = 1 << 20
+
+HDR_ACCEPT_ENCODING = 'accept-encoding'
+HDR_CONNECTION_CTRL = 'connection'
+HDR_CONTENT_LENGTH = 'content-length'
+HDR_XFER_ENCODING = 'transfer-encoding'
+
+XFER_ENCODING_CHUNKED = 'chunked'
+
+CONNECTION_CLOSE = 'close'
+
+EOL = '\r\n'
+_END_HEADERS = EOL * 2
+
+# Based on some searching around, 1 second seems like a reasonable
+# default here.
+TIMEOUT_ASSUME_CONTINUE = 1
+TIMEOUT_DEFAULT = None
+
+
+class HTTPResponse(object):
+ """Response from an HTTP server.
+
+ The response will continue to load as available. If you need the
+ complete response before continuing, check the .complete() method.
+ """
+ def __init__(self, sock, timeout, method):
+ self.sock = sock
+ self.method = method
+ self.raw_response = ''
+ self._headers_len = 0
+ self.headers = None
+ self.will_close = False
+ self.status_line = ''
+ self.status = None
+ self.continued = False
+ self.http_version = None
+ self.reason = None
+ self._reader = None
+
+ self._read_location = 0
+ self._eol = EOL
+
+ self._timeout = timeout
+
+ @property
+ def _end_headers(self):
+ return self._eol * 2
+
+ def complete(self):
+ """Returns true if this response is completely loaded.
+
+ Note that if this is a connection where complete means the
+ socket is closed, this will nearly always return False, even
+ in cases where all the data has actually been loaded.
+ """
+ if self._reader:
+ return self._reader.done()
+
+ def _close(self):
+ if self._reader is not None:
+ self._reader._close()
+
+ def readline(self):
+ """Read a single line from the response body.
+
+ This may block until either a line ending is found or the
+ response is complete.
+ """
+ # TODO: move this into the reader interface where it can be
+ # smarter (and probably avoid copies)
+ bytes = []
+ while not bytes:
+ try:
+ bytes = [self._reader.read(1)]
+ except _readers.ReadNotReady:
+ self._select()
+ while bytes[-1] != '\n' and not self.complete():
+ self._select()
+ bytes.append(self._reader.read(1))
+ if bytes[-1] != '\n':
+ next = self._reader.read(1)
+ while next and next != '\n':
+ bytes.append(next)
+ next = self._reader.read(1)
+ bytes.append(next)
+ return ''.join(bytes)
+
+ def read(self, length=None):
+ # if length is None, unbounded read
+ while (not self.complete() # never select on a finished read
+ and (not length # unbounded, so we wait for complete()
+ or length > self._reader.available_data)):
+ self._select()
+ if not length:
+ length = self._reader.available_data
+ r = self._reader.read(length)
+ if self.complete() and self.will_close:
+ self.sock.close()
+ return r
+
+ def _select(self):
+ r, _, _ = select.select([self.sock], [], [], self._timeout)
+ if not r:
+ # socket was not readable. If the response is not
+ # complete, raise a timeout.
+ if not self.complete():
+ logger.info('timed out with timeout of %s', self._timeout)
+ raise HTTPTimeoutException('timeout reading data')
+ try:
+ data = self.sock.recv(INCOMING_BUFFER_SIZE)
+ except socket.sslerror, e:
+ if e.args[0] != socket.SSL_ERROR_WANT_READ:
+ raise
+ logger.debug('SSL_WANT_READ in _select, should retry later')
+ return True
+ logger.debug('response read %d data during _select', len(data))
+ # If the socket was readable and no data was read, that means
+ # the socket was closed. Inform the reader (if any) so it can
+ # raise an exception if this is an invalid situation.
+ if not data:
+ if self._reader:
+ self._reader._close()
+ return False
+ else:
+ self._load_response(data)
+ return True
+
+ def _load_response(self, data):
+ # Being here implies we're not at the end of the headers yet,
+ # since at the end of this method if headers were completely
+ # loaded we replace this method with the load() method of the
+ # reader we created.
+ self.raw_response += data
+ # This is a bogus server with bad line endings
+ if self._eol not in self.raw_response:
+ for bad_eol in ('\n', '\r'):
+ if (bad_eol in self.raw_response
+ # verify that bad_eol is not the end of the incoming data
+ # as this could be a response line that just got
+ # split between \r and \n.
+ and (self.raw_response.index(bad_eol) <
+ (len(self.raw_response) - 1))):
+ logger.info('bogus line endings detected, '
+ 'using %r for EOL', bad_eol)
+ self._eol = bad_eol
+ break
+ # exit early if not at end of headers
+ if self._end_headers not in self.raw_response or self.headers:
+ return
+
+ # handle 100-continue response
+ hdrs, body = self.raw_response.split(self._end_headers, 1)
+ http_ver, status = hdrs.split(' ', 1)
+ if status.startswith('100'):
+ self.raw_response = body
+ self.continued = True
+ logger.debug('continue seen, setting body to %r', body)
+ return
+
+ # arriving here means we should parse response headers
+ # as all headers have arrived completely
+ hdrs, body = self.raw_response.split(self._end_headers, 1)
+ del self.raw_response
+ if self._eol in hdrs:
+ self.status_line, hdrs = hdrs.split(self._eol, 1)
+ else:
+ self.status_line = hdrs
+ hdrs = ''
+ # TODO HTTP < 1.0 support
+ (self.http_version, self.status,
+ self.reason) = self.status_line.split(' ', 2)
+ self.status = int(self.status)
+ if self._eol != EOL:
+ hdrs = hdrs.replace(self._eol, '\r\n')
+ headers = rfc822.Message(cStringIO.StringIO(hdrs))
+ content_len = None
+ if HDR_CONTENT_LENGTH in headers:
+ content_len = int(headers[HDR_CONTENT_LENGTH])
+ if self.http_version == HTTP_VER_1_0:
+ self.will_close = True
+ elif HDR_CONNECTION_CTRL in headers:
+ self.will_close = (
+ headers[HDR_CONNECTION_CTRL].lower() == CONNECTION_CLOSE)
+ if (HDR_XFER_ENCODING in headers
+ and headers[HDR_XFER_ENCODING].lower() == XFER_ENCODING_CHUNKED):
+ self._reader = _readers.ChunkedReader(self._eol)
+ logger.debug('using a chunked reader')
+ else:
+ # HEAD responses are forbidden from returning a body, and
+ # it's implausible for a CONNECT response to use
+ # close-is-end logic for an OK response.
+ if (self.method == 'HEAD' or
+ (self.method == 'CONNECT' and content_len is None)):
+ content_len = 0
+ if content_len is not None:
+ logger.debug('using a content-length reader with length %d',
+ content_len)
+ self._reader = _readers.ContentLengthReader(content_len)
+ else:
+ # Response body had no length specified and is not
+ # chunked, so the end of the body will only be
+ # identifiable by the termination of the socket by the
+ # server. My interpretation of the spec means that we
+ # are correct in hitting this case if
+ # transfer-encoding, content-length, and
+ # connection-control were left unspecified.
+ self._reader = _readers.CloseIsEndReader()
+ logger.debug('using a close-is-end reader')
+ self.will_close = True
+
+ if body:
+ self._reader._load(body)
+ logger.debug('headers complete')
+ self.headers = headers
+ self._load_response = self._reader._load
+
+
+class HTTPConnection(object):
+ """Connection to a single http server.
+
+ Supports 100-continue and keepalives natively. Uses select() for
+ non-blocking socket operations.
+ """
+ http_version = HTTP_VER_1_1
+ response_class = HTTPResponse
+
+ def __init__(self, host, port=None, use_ssl=None, ssl_validator=None,
+ timeout=TIMEOUT_DEFAULT,
+ continue_timeout=TIMEOUT_ASSUME_CONTINUE,
+ proxy_hostport=None, **ssl_opts):
+ """Create a new HTTPConnection.
+
+ Args:
+ host: The host to which we'll connect.
+ port: Optional. The port over which we'll connect. Default 80 for
+ non-ssl, 443 for ssl.
+ use_ssl: Optional. Wether to use ssl. Defaults to False if port is
+ not 443, true if port is 443.
+ ssl_validator: a function(socket) to validate the ssl cert
+ timeout: Optional. Connection timeout, default is TIMEOUT_DEFAULT.
+ continue_timeout: Optional. Timeout for waiting on an expected
+ "100 Continue" response. Default is TIMEOUT_ASSUME_CONTINUE.
+ proxy_hostport: Optional. Tuple of (host, port) to use as an http
+ proxy for the connection. Default is to not use a proxy.
+ """
+ if port is None and host.count(':') == 1 or ']:' in host:
+ host, port = host.rsplit(':', 1)
+ port = int(port)
+ if '[' in host:
+ host = host[1:-1]
+ if use_ssl is None and port is None:
+ use_ssl = False
+ port = 80
+ elif use_ssl is None:
+ use_ssl = (port == 443)
+ elif port is None:
+ port = (use_ssl and 443 or 80)
+ self.port = port
+ if use_ssl and not socketutil.have_ssl:
+ raise Exception('ssl requested but unavailable on this Python')
+ self.ssl = use_ssl
+ self.ssl_opts = ssl_opts
+ self._ssl_validator = ssl_validator
+ self.host = host
+ self.sock = None
+ self._current_response = None
+ self._current_response_taken = False
+ if proxy_hostport is None:
+ self._proxy_host = self._proxy_port = None
+ else:
+ self._proxy_host, self._proxy_port = proxy_hostport
+
+ self.timeout = timeout
+ self.continue_timeout = continue_timeout
+
+ def _connect(self):
+ """Connect to the host and port specified in __init__."""
+ if self.sock:
+ return
+ if self._proxy_host is not None:
+ logger.info('Connecting to http proxy %s:%s',
+ self._proxy_host, self._proxy_port)
+ sock = socketutil.create_connection((self._proxy_host,
+ self._proxy_port))
+ if self.ssl:
+ # TODO proxy header support
+ data = self.buildheaders('CONNECT', '%s:%d' % (self.host,
+ self.port),
+ {}, HTTP_VER_1_0)
+ sock.send(data)
+ sock.setblocking(0)
+ r = self.response_class(sock, self.timeout, 'CONNECT')
+ timeout_exc = HTTPTimeoutException(
+ 'Timed out waiting for CONNECT response from proxy')
+ while not r.complete():
+ try:
+ if not r._select():
+ if not r.complete():
+ raise timeout_exc
+ except HTTPTimeoutException:
+ # This raise/except pattern looks goofy, but
+ # _select can raise the timeout as well as the
+ # loop body. I wish it wasn't this convoluted,
+ # but I don't have a better solution
+ # immediately handy.
+ raise timeout_exc
+ if r.status != 200:
+ raise HTTPProxyConnectFailedException(
+ 'Proxy connection failed: %d %s' % (r.status,
+ r.read()))
+ logger.info('CONNECT (for SSL) to %s:%s via proxy succeeded.',
+ self.host, self.port)
+ else:
+ sock = socketutil.create_connection((self.host, self.port))
+ if self.ssl:
+ # This is the default, but in the case of proxied SSL
+ # requests the proxy logic above will have cleared
+ # blocking mode, so reenable it just to be safe.
+ sock.setblocking(1)
+ logger.debug('wrapping socket for ssl with options %r',
+ self.ssl_opts)
+ sock = socketutil.wrap_socket(sock, **self.ssl_opts)
+ if self._ssl_validator:
+ self._ssl_validator(sock)
+ sock.setblocking(0)
+ self.sock = sock
+
+ def buildheaders(self, method, path, headers, http_ver):
+ if self.ssl and self.port == 443 or self.port == 80:
+ # default port for protocol, so leave it out
+ hdrhost = self.host
+ else:
+ # include nonstandard port in header
+ if ':' in self.host: # must be IPv6
+ hdrhost = '[%s]:%d' % (self.host, self.port)
+ else:
+ hdrhost = '%s:%d' % (self.host, self.port)
+ if self._proxy_host and not self.ssl:
+ # When talking to a regular http proxy we must send the
+ # full URI, but in all other cases we must not (although
+ # technically RFC 2616 says servers must accept our
+ # request if we screw up, experimentally few do that
+ # correctly.)
+ assert path[0] == '/', 'path must start with a /'
+ path = 'http://%s%s' % (hdrhost, path)
+ outgoing = ['%s %s %s%s' % (method, path, http_ver, EOL)]
+ headers['host'] = ('Host', hdrhost)
+ headers[HDR_ACCEPT_ENCODING] = (HDR_ACCEPT_ENCODING, 'identity')
+ for hdr, val in headers.itervalues():
+ outgoing.append('%s: %s%s' % (hdr, val, EOL))
+ outgoing.append(EOL)
+ return ''.join(outgoing)
+
+ def close(self):
+ """Close the connection to the server.
+
+ This is a no-op if the connection is already closed. The
+ connection may automatically close if requessted by the server
+ or required by the nature of a response.
+ """
+ if self.sock is None:
+ return
+ self.sock.close()
+ self.sock = None
+ logger.info('closed connection to %s on %s', self.host, self.port)
+
+ def busy(self):
+ """Returns True if this connection object is currently in use.
+
+ If a response is still pending, this will return True, even if
+ the request has finished sending. In the future,
+ HTTPConnection may transparently juggle multiple connections
+ to the server, in which case this will be useful to detect if
+ any of those connections is ready for use.
+ """
+ cr = self._current_response
+ if cr is not None:
+ if self._current_response_taken:
+ if cr.will_close:
+ self.sock = None
+ self._current_response = None
+ return False
+ elif cr.complete():
+ self._current_response = None
+ return False
+ return True
+ return False
+
+ def request(self, method, path, body=None, headers={},
+ expect_continue=False):
+ """Send a request to the server.
+
+ For increased flexibility, this does not return the response
+ object. Future versions of HTTPConnection that juggle multiple
+ sockets will be able to send (for example) 5 requests all at
+ once, and then let the requests arrive as data is
+ available. Use the `getresponse()` method to retrieve the
+ response.
+ """
+ if self.busy():
+ raise httplib.CannotSendRequest(
+ 'Can not send another request before '
+ 'current response is read!')
+ self._current_response_taken = False
+
+ logger.info('sending %s request for %s to %s on port %s',
+ method, path, self.host, self.port)
+ hdrs = dict((k.lower(), (k, v)) for k, v in headers.iteritems())
+ if hdrs.get('expect', ('', ''))[1].lower() == '100-continue':
+ expect_continue = True
+ elif expect_continue:
+ hdrs['expect'] = ('Expect', '100-Continue')
+
+ chunked = False
+ if body and HDR_CONTENT_LENGTH not in hdrs:
+ if getattr(body, '__len__', False):
+ hdrs[HDR_CONTENT_LENGTH] = (HDR_CONTENT_LENGTH, len(body))
+ elif getattr(body, 'read', False):
+ hdrs[HDR_XFER_ENCODING] = (HDR_XFER_ENCODING,
+ XFER_ENCODING_CHUNKED)
+ chunked = True
+ else:
+ raise BadRequestData('body has no __len__() nor read()')
+
+ self._connect()
+ outgoing_headers = self.buildheaders(
+ method, path, hdrs, self.http_version)
+ response = None
+ first = True
+
+ def reconnect(where):
+ logger.info('reconnecting during %s', where)
+ self.close()
+ self._connect()
+
+ while ((outgoing_headers or body)
+ and not (response and response.complete())):
+ select_timeout = self.timeout
+ out = outgoing_headers or body
+ blocking_on_continue = False
+ if expect_continue and not outgoing_headers and not (
+ response and (response.headers or response.continued)):
+ logger.info(
+ 'waiting up to %s seconds for'
+ ' continue response from server',
+ self.continue_timeout)
+ select_timeout = self.continue_timeout
+ blocking_on_continue = True
+ out = False
+ if out:
+ w = [self.sock]
+ else:
+ w = []
+ r, w, x = select.select([self.sock], w, [], select_timeout)
+ # if we were expecting a 100 continue and it's been long
+ # enough, just go ahead and assume it's ok. This is the
+ # recommended behavior from the RFC.
+ if r == w == x == []:
+ if blocking_on_continue:
+ expect_continue = False
+ logger.info('no response to continue expectation from '
+ 'server, optimistically sending request body')
+ else:
+ raise HTTPTimeoutException('timeout sending data')
+ was_first = first
+
+ # incoming data
+ if r:
+ try:
+ try:
+ data = r[0].recv(INCOMING_BUFFER_SIZE)
+ except socket.sslerror, e:
+ if e.args[0] != socket.SSL_ERROR_WANT_READ:
+ raise
+ logger.debug(
+ 'SSL_WANT_READ while sending data, retrying...')
+ continue
+ if not data:
+ logger.info('socket appears closed in read')
+ self.sock = None
+ self._current_response = None
+ if response is not None:
+ response._close()
+ # This if/elif ladder is a bit subtle,
+ # comments in each branch should help.
+ if response is not None and response.complete():
+ # Server responded completely and then
+ # closed the socket. We should just shut
+ # things down and let the caller get their
+ # response.
+ logger.info('Got an early response, '
+ 'aborting remaining request.')
+ break
+ elif was_first and response is None:
+ # Most likely a keepalive that got killed
+ # on the server's end. Commonly happens
+ # after getting a really large response
+ # from the server.
+ logger.info(
+ 'Connection appeared closed in read on first'
+ ' request loop iteration, will retry.')
+ reconnect('read')
+ continue
+ else:
+ # We didn't just send the first data hunk,
+ # and either have a partial response or no
+ # response at all. There's really nothing
+ # meaningful we can do here.
+ raise HTTPStateError(
+ 'Connection appears closed after '
+ 'some request data was written, but the '
+ 'response was missing or incomplete!')
+ logger.debug('read %d bytes in request()', len(data))
+ if response is None:
+ response = self.response_class(r[0], self.timeout, method)
+ response._load_response(data)
+ # Jump to the next select() call so we load more
+ # data if the server is still sending us content.
+ continue
+ except socket.error, e:
+ if e[0] != errno.EPIPE and not was_first:
+ raise
+
+ # outgoing data
+ if w and out:
+ try:
+ if getattr(out, 'read', False):
+ data = out.read(OUTGOING_BUFFER_SIZE)
+ if not data:
+ continue
+ if len(data) < OUTGOING_BUFFER_SIZE:
+ if chunked:
+ body = '0' + EOL + EOL
+ else:
+ body = None
+ if chunked:
+ out = hex(len(data))[2:] + EOL + data + EOL
+ else:
+ out = data
+ amt = w[0].send(out)
+ except socket.error, e:
+ if e[0] == socket.SSL_ERROR_WANT_WRITE and self.ssl:
+ # This means that SSL hasn't flushed its buffer into
+ # the socket yet.
+ # TODO: find a way to block on ssl flushing its buffer
+ # similar to selecting on a raw socket.
+ continue
+ elif (e[0] not in (errno.ECONNRESET, errno.EPIPE)
+ and not first):
+ raise
+ reconnect('write')
+ amt = self.sock.send(out)
+ logger.debug('sent %d', amt)
+ first = False
+ # stash data we think we sent in case the socket breaks
+ # when we read from it
+ if was_first:
+ sent_data = out[:amt]
+ if out is body:
+ body = out[amt:]
+ else:
+ outgoing_headers = out[amt:]
+
+ # close if the server response said to or responded before eating
+ # the whole request
+ if response is None:
+ response = self.response_class(self.sock, self.timeout, method)
+ complete = response.complete()
+ data_left = bool(outgoing_headers or body)
+ if data_left:
+ logger.info('stopped sending request early, '
+ 'will close the socket to be safe.')
+ response.will_close = True
+ if response.will_close:
+ # The socket will be closed by the response, so we disown
+ # the socket
+ self.sock = None
+ self._current_response = response
+
+ def getresponse(self):
+ if self._current_response is None:
+ raise httplib.ResponseNotReady()
+ r = self._current_response
+ while r.headers is None:
+ if not r._select() and not r.complete():
+ raise _readers.HTTPRemoteClosedError()
+ if r.will_close:
+ self.sock = None
+ self._current_response = None
+ elif r.complete():
+ self._current_response = None
+ else:
+ self._current_response_taken = True
+ return r
+
+
+class HTTPTimeoutException(httplib.HTTPException):
+ """A timeout occurred while waiting on the server."""
+
+
+class BadRequestData(httplib.HTTPException):
+ """Request body object has neither __len__ nor read."""
+
+
+class HTTPProxyConnectFailedException(httplib.HTTPException):
+ """Connecting to the HTTP proxy failed."""
+
+
+class HTTPStateError(httplib.HTTPException):
+ """Invalid internal state encountered."""
+
+# Forward this exception type from _readers since it needs to be part
+# of the public API.
+HTTPRemoteClosedError = _readers.HTTPRemoteClosedError
+# no-check-code
diff --git a/mercurial/httpclient/_readers.py b/mercurial/httpclient/_readers.py
new file mode 100644
index 0000000..0beb551
--- /dev/null
+++ b/mercurial/httpclient/_readers.py
@@ -0,0 +1,195 @@
+# Copyright 2011, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""Reader objects to abstract out different body response types.
+
+This module is package-private. It is not expected that these will
+have any clients outside of httpplus.
+"""
+
+import httplib
+import itertools
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class ReadNotReady(Exception):
+ """Raised when read() is attempted but not enough data is loaded."""
+
+
+class HTTPRemoteClosedError(httplib.HTTPException):
+ """The server closed the remote socket in the middle of a response."""
+
+
+class AbstractReader(object):
+ """Abstract base class for response readers.
+
+ Subclasses must implement _load, and should implement _close if
+ it's not an error for the server to close their socket without
+ some termination condition being detected during _load.
+ """
+ def __init__(self):
+ self._finished = False
+ self._done_chunks = []
+
+ @property
+ def available_data(self):
+ return sum(map(len, self._done_chunks))
+
+ def done(self):
+ return self._finished
+
+ def read(self, amt):
+ if self.available_data < amt and not self._finished:
+ raise ReadNotReady()
+ need = [amt]
+ def pred(s):
+ needed = need[0] > 0
+ need[0] -= len(s)
+ return needed
+ blocks = list(itertools.takewhile(pred, self._done_chunks))
+ self._done_chunks = self._done_chunks[len(blocks):]
+ over_read = sum(map(len, blocks)) - amt
+ if over_read > 0 and blocks:
+ logger.debug('need to reinsert %d data into done chunks', over_read)
+ last = blocks[-1]
+ blocks[-1], reinsert = last[:-over_read], last[-over_read:]
+ self._done_chunks.insert(0, reinsert)
+ result = ''.join(blocks)
+ assert len(result) == amt or (self._finished and len(result) < amt)
+ return result
+
+ def _load(self, data): # pragma: no cover
+ """Subclasses must implement this.
+
+ As data is available to be read out of this object, it should
+ be placed into the _done_chunks list. Subclasses should not
+ rely on data remaining in _done_chunks forever, as it may be
+ reaped if the client is parsing data as it comes in.
+ """
+ raise NotImplementedError
+
+ def _close(self):
+ """Default implementation of close.
+
+ The default implementation assumes that the reader will mark
+ the response as finished on the _finished attribute once the
+ entire response body has been read. In the event that this is
+ not true, the subclass should override the implementation of
+ close (for example, close-is-end responses have to set
+ self._finished in the close handler.)
+ """
+ if not self._finished:
+ raise HTTPRemoteClosedError(
+ 'server appears to have closed the socket mid-response')
+
+
+class AbstractSimpleReader(AbstractReader):
+ """Abstract base class for simple readers that require no response decoding.
+
+ Examples of such responses are Connection: Close (close-is-end)
+ and responses that specify a content length.
+ """
+ def _load(self, data):
+ if data:
+ assert not self._finished, (
+ 'tried to add data (%r) to a closed reader!' % data)
+ logger.debug('%s read an addtional %d data', self.name, len(data))
+ self._done_chunks.append(data)
+
+
+class CloseIsEndReader(AbstractSimpleReader):
+ """Reader for responses that specify Connection: Close for length."""
+ name = 'close-is-end'
+
+ def _close(self):
+ logger.info('Marking close-is-end reader as closed.')
+ self._finished = True
+
+
+class ContentLengthReader(AbstractSimpleReader):
+ """Reader for responses that specify an exact content length."""
+ name = 'content-length'
+
+ def __init__(self, amount):
+ AbstractReader.__init__(self)
+ self._amount = amount
+ if amount == 0:
+ self._finished = True
+ self._amount_seen = 0
+
+ def _load(self, data):
+ AbstractSimpleReader._load(self, data)
+ self._amount_seen += len(data)
+ if self._amount_seen >= self._amount:
+ self._finished = True
+ logger.debug('content-length read complete')
+
+
+class ChunkedReader(AbstractReader):
+ """Reader for chunked transfer encoding responses."""
+ def __init__(self, eol):
+ AbstractReader.__init__(self)
+ self._eol = eol
+ self._leftover_skip_amt = 0
+ self._leftover_data = ''
+
+ def _load(self, data):
+ assert not self._finished, 'tried to add data to a closed reader!'
+ logger.debug('chunked read an addtional %d data', len(data))
+ position = 0
+ if self._leftover_data:
+ logger.debug('chunked reader trying to finish block from leftover data')
+ # TODO: avoid this string concatenation if possible
+ data = self._leftover_data + data
+ position = self._leftover_skip_amt
+ self._leftover_data = ''
+ self._leftover_skip_amt = 0
+ datalen = len(data)
+ while position < datalen:
+ split = data.find(self._eol, position)
+ if split == -1:
+ self._leftover_data = data
+ self._leftover_skip_amt = position
+ return
+ amt = int(data[position:split], base=16)
+ block_start = split + len(self._eol)
+ # If the whole data chunk plus the eol trailer hasn't
+ # loaded, we'll wait for the next load.
+ if block_start + amt + len(self._eol) > len(data):
+ self._leftover_data = data
+ self._leftover_skip_amt = position
+ return
+ if amt == 0:
+ self._finished = True
+ logger.debug('closing chunked redaer due to chunk of length 0')
+ return
+ self._done_chunks.append(data[block_start:block_start + amt])
+ position = block_start + amt + len(self._eol)
+# no-check-code
diff --git a/mercurial/httpclient/socketutil.py b/mercurial/httpclient/socketutil.py
new file mode 100644
index 0000000..6f06a3c
--- /dev/null
+++ b/mercurial/httpclient/socketutil.py
@@ -0,0 +1,127 @@
+# Copyright 2010, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""Abstraction to simplify socket use for Python < 2.6
+
+This will attempt to use the ssl module and the new
+socket.create_connection method, but fall back to the old
+methods if those are unavailable.
+"""
+import logging
+import socket
+
+logger = logging.getLogger(__name__)
+
+try:
+ import ssl
+ ssl.wrap_socket # make demandimporters load the module
+ have_ssl = True
+except ImportError:
+ import httplib
+ import urllib2
+ have_ssl = getattr(urllib2, 'HTTPSHandler', False)
+ ssl = False
+
+
+try:
+ create_connection = socket.create_connection
+except AttributeError:
+ def create_connection(address):
+ host, port = address
+ msg = "getaddrinfo returns an empty list"
+ sock = None
+ for res in socket.getaddrinfo(host, port, 0,
+ socket.SOCK_STREAM):
+ af, socktype, proto, _canonname, sa = res
+ try:
+ sock = socket.socket(af, socktype, proto)
+ logger.info("connect: (%s, %s)", host, port)
+ sock.connect(sa)
+ except socket.error, msg:
+ logger.info('connect fail: %s %s', host, port)
+ if sock:
+ sock.close()
+ sock = None
+ continue
+ break
+ if not sock:
+ raise socket.error, msg
+ return sock
+
+if ssl:
+ wrap_socket = ssl.wrap_socket
+ CERT_NONE = ssl.CERT_NONE
+ CERT_OPTIONAL = ssl.CERT_OPTIONAL
+ CERT_REQUIRED = ssl.CERT_REQUIRED
+else:
+ class FakeSocket(httplib.FakeSocket):
+ """Socket wrapper that supports SSL.
+ """
+ # backport the behavior from Python 2.6, which is to busy wait
+ # on the socket instead of anything nice. Sigh.
+ # See http://bugs.python.org/issue3890 for more info.
+ def recv(self, buflen=1024, flags=0):
+ """ssl-aware wrapper around socket.recv
+ """
+ if flags != 0:
+ raise ValueError(
+ "non-zero flags not allowed in calls to recv() on %s" %
+ self.__class__)
+ while True:
+ try:
+ return self._ssl.read(buflen)
+ except socket.sslerror, x:
+ if x.args[0] == socket.SSL_ERROR_WANT_READ:
+ continue
+ else:
+ raise x
+
+ _PROTOCOL_SSLv23 = 2
+
+ CERT_NONE = 0
+ CERT_OPTIONAL = 1
+ CERT_REQUIRED = 2
+
+ def wrap_socket(sock, keyfile=None, certfile=None,
+ server_side=False, cert_reqs=CERT_NONE,
+ ssl_version=_PROTOCOL_SSLv23, ca_certs=None,
+ do_handshake_on_connect=True,
+ suppress_ragged_eofs=True):
+ if cert_reqs != CERT_NONE and ca_certs:
+ raise CertificateValidationUnsupported(
+ 'SSL certificate validation requires the ssl module'
+ '(included in Python 2.6 and later.)')
+ sslob = socket.ssl(sock)
+ # borrow httplib's workaround for no ssl.wrap_socket
+ sock = FakeSocket(sock, sslob)
+ return sock
+
+
+class CertificateValidationUnsupported(Exception):
+ """Exception raised when cert validation is requested but unavailable."""
+# no-check-code