diff options
author | Dana Powers <dana.powers@rd.io> | 2014-09-07 18:52:05 -0700 |
---|---|---|
committer | Dana Powers <dana.powers@rd.io> | 2014-09-07 19:09:32 -0700 |
commit | 715425c639a476139065689afde3d255a07d6f96 (patch) | |
tree | 0ef2cd875c97c8ca867d89328d6fd5fec7dfcbe8 /kafka | |
parent | a99384f4c601d127ab1c4fe5b272ea5c07fd695d (diff) | |
parent | be23042ecd9ab330886745ccc9ec9e3a0039836f (diff) | |
download | kafka-python-715425c639a476139065689afde3d255a07d6f96.tar.gz |
Merge pull request #227 from wizzat-feature/py3
Python 3 Support
Conflicts:
kafka/producer.py
test/test_client.py
test/test_client_integration.py
test/test_codec.py
test/test_consumer.py
test/test_consumer_integration.py
test/test_failover_integration.py
test/test_producer.py
test/test_producer_integration.py
test/test_protocol.py
test/test_util.py
Diffstat (limited to 'kafka')
-rw-r--r-- | kafka/client.py | 17 | ||||
-rw-r--r-- | kafka/codec.py | 19 | ||||
-rw-r--r-- | kafka/conn.py | 14 | ||||
-rw-r--r-- | kafka/consumer.py | 17 | ||||
-rw-r--r-- | kafka/partitioner.py | 2 | ||||
-rw-r--r-- | kafka/producer.py | 16 | ||||
-rw-r--r-- | kafka/protocol.py | 17 | ||||
-rw-r--r-- | kafka/util.py | 17 |
8 files changed, 77 insertions, 42 deletions
diff --git a/kafka/client.py b/kafka/client.py index 8630f66..a918091 100644 --- a/kafka/client.py +++ b/kafka/client.py @@ -1,3 +1,4 @@ +import binascii import collections import copy import functools @@ -20,7 +21,7 @@ log = logging.getLogger("kafka") class KafkaClient(object): - CLIENT_ID = "kafka-python" + CLIENT_ID = b"kafka-python" ID_GEN = itertools.count() # NOTE: The timeout given to the client should always be greater than the @@ -81,7 +82,7 @@ class KafkaClient(object): """ Generate a new correlation id """ - return KafkaClient.ID_GEN.next() + return next(KafkaClient.ID_GEN) def _send_broker_unaware_request(self, requestId, request): """ @@ -96,7 +97,7 @@ class KafkaClient(object): return response except Exception as e: log.warning("Could not send request [%r] to server %s:%i, " - "trying next server: %s" % (request, host, port, e)) + "trying next server: %s" % (binascii.b2a_hex(request), host, port, e)) raise KafkaUnavailableError("All servers failed to process request") @@ -145,7 +146,7 @@ class KafkaClient(object): # For each broker, send the list of request payloads for broker, payloads in payloads_by_broker.items(): - conn = self._get_conn(broker.host, broker.port) + conn = self._get_conn(broker.host.decode('utf-8'), broker.port) requestId = self._next_id() request = encoder_fn(client_id=self.client_id, correlation_id=requestId, payloads=payloads) @@ -160,11 +161,11 @@ class KafkaClient(object): response = conn.recv(requestId) except ConnectionError as e: log.warning("Could not receive response to request [%s] " - "from server %s: %s", request, conn, e) + "from server %s: %s", binascii.b2a_hex(request), conn, e) failed = True except ConnectionError as e: log.warning("Could not send request [%s] to server %s: %s", - request, conn, e) + binascii.b2a_hex(request), conn, e) failed = True if failed: @@ -233,8 +234,8 @@ class KafkaClient(object): A reinit() has to be done on the copy before it can be used again """ c = copy.deepcopy(self) - for k, v in c.conns.items(): - c.conns[k] = v.copy() + for key in c.conns: + c.conns[key] = self.conns[key].copy() return c def reinit(self): diff --git a/kafka/codec.py b/kafka/codec.py index 206ddb4..2279200 100644 --- a/kafka/codec.py +++ b/kafka/codec.py @@ -1,8 +1,11 @@ -from cStringIO import StringIO +from io import BytesIO import gzip import struct -_XERIAL_V1_HEADER = (-126, 'S', 'N', 'A', 'P', 'P', 'Y', 0, 1, 1) +import six +from six.moves import xrange + +_XERIAL_V1_HEADER = (-126, b'S', b'N', b'A', b'P', b'P', b'Y', 0, 1, 1) _XERIAL_V1_FORMAT = 'bccccccBii' try: @@ -21,7 +24,7 @@ def has_snappy(): def gzip_encode(payload): - buffer = StringIO() + buffer = BytesIO() handle = gzip.GzipFile(fileobj=buffer, mode="w") handle.write(payload) handle.close() @@ -32,7 +35,7 @@ def gzip_encode(payload): def gzip_decode(payload): - buffer = StringIO(payload) + buffer = BytesIO(payload) handle = gzip.GzipFile(fileobj=buffer, mode='r') result = handle.read() handle.close() @@ -68,9 +71,9 @@ def snappy_encode(payload, xerial_compatible=False, xerial_blocksize=32 * 1024): for i in xrange(0, len(payload), xerial_blocksize): yield payload[i:i+xerial_blocksize] - out = StringIO() + out = BytesIO() - header = ''.join([struct.pack('!' + fmt, dat) for fmt, dat + header = b''.join([struct.pack('!' + fmt, dat) for fmt, dat in zip(_XERIAL_V1_FORMAT, _XERIAL_V1_HEADER)]) out.write(header) @@ -121,8 +124,8 @@ def snappy_decode(payload): if _detect_xerial_stream(payload): # TODO ? Should become a fileobj ? - out = StringIO() - byt = buffer(payload[16:]) + out = BytesIO() + byt = payload[16:] length = len(byt) cursor = 0 diff --git a/kafka/conn.py b/kafka/conn.py index a577eba..ddfee8b 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -5,6 +5,8 @@ import socket import struct from threading import local +import six + from kafka.common import ConnectionError log = logging.getLogger("kafka") @@ -19,7 +21,7 @@ def collect_hosts(hosts, randomize=True): randomize the returned list. """ - if isinstance(hosts, basestring): + if isinstance(hosts, six.string_types): hosts = hosts.strip().split(',') result = [] @@ -92,7 +94,7 @@ class KafkaConnection(local): # Receiving empty string from recv signals # that the socket is in error. we will never get # more data from this socket - if data == '': + if data == b'': raise socket.error("Not enough data to read message -- did server kill socket?") except socket.error: @@ -103,7 +105,7 @@ class KafkaConnection(local): log.debug("Read %d/%d bytes from Kafka", num_bytes - bytes_left, num_bytes) responses.append(data) - return ''.join(responses) + return b''.join(responses) ################## # Public API # @@ -144,7 +146,7 @@ class KafkaConnection(local): # Read the remainder of the response resp = self._read_bytes(size) - return str(resp) + return resp def copy(self): """ @@ -153,6 +155,10 @@ class KafkaConnection(local): return a new KafkaConnection object """ c = copy.deepcopy(self) + # Python 3 doesn't copy custom attributes of the threadlocal subclass + c.host = copy.copy(self.host) + c.port = copy.copy(self.port) + c.timeout = copy.copy(self.timeout) c._sock = None return c diff --git a/kafka/consumer.py b/kafka/consumer.py index 928bbac..fa1b8bc 100644 --- a/kafka/consumer.py +++ b/kafka/consumer.py @@ -1,12 +1,21 @@ from __future__ import absolute_import -from itertools import izip_longest, repeat +try: + from itertools import zip_longest as izip_longest, repeat # pylint: disable-msg=E0611 +except ImportError: # python 2 + from itertools import izip_longest as izip_longest, repeat import logging import time import numbers from threading import Lock from multiprocessing import Process, Queue as MPQueue, Event, Value -from Queue import Empty, Queue + +import six + +try: + from Queue import Empty, Queue +except ImportError: # python 2 + from queue import Empty, Queue import kafka.common from kafka.common import ( @@ -420,7 +429,7 @@ class SimpleConsumer(Consumer): for p in self.fetch_offsets.keys()) while partitions: requests = [] - for partition, buffer_size in partitions.iteritems(): + for partition, buffer_size in six.iteritems(partitions): requests.append(FetchRequest(self.topic, partition, self.fetch_offsets[partition], buffer_size)) @@ -582,7 +591,7 @@ class MultiProcessConsumer(Consumer): for chunk in chunks: chunk = filter(lambda x: x is not None, chunk) args = (client.copy(), - group, topic, chunk, + group, topic, list(chunk), self.queue, self.start, self.exit, self.pause, self.size) diff --git a/kafka/partitioner.py b/kafka/partitioner.py index 5287cef..695dd6f 100644 --- a/kafka/partitioner.py +++ b/kafka/partitioner.py @@ -43,7 +43,7 @@ class RoundRobinPartitioner(Partitioner): if self.partitions != partitions: self._set_partitions(partitions) - return self.iterpart.next() + return next(self.iterpart) class HashedPartitioner(Partitioner): diff --git a/kafka/producer.py b/kafka/producer.py index b28a424..4a04b38 100644 --- a/kafka/producer.py +++ b/kafka/producer.py @@ -4,11 +4,17 @@ import logging import time import random -from Queue import Empty +try: + from queue import Empty +except ImportError: + from Queue import Empty from collections import defaultdict from itertools import cycle from multiprocessing import Queue, Process +import six +from six.moves import xrange + from kafka.common import ( ProduceRequest, TopicAndPartition, UnsupportedCodecError, UnknownTopicOrPartitionError ) @@ -173,7 +179,7 @@ class Producer(object): raise TypeError("msg is not a list or tuple!") # Raise TypeError if any message is not encoded as bytes - if any(not isinstance(m, bytes) for m in msg): + if any(not isinstance(m, six.binary_type) for m in msg): raise TypeError("all produce message payloads must be type bytes") if self.async: @@ -221,7 +227,7 @@ class SimpleProducer(Producer): batch_send_every_t - If set, messages are send after this timeout random_start - If true, randomize the initial partition which the the first message block will be published to, otherwise - if false, the first message block will always publish + if false, the first message block will always publish to partition 0 before cycling through each partition """ def __init__(self, client, async=False, @@ -252,9 +258,9 @@ class SimpleProducer(Producer): if self.random_start: num_partitions = len(self.client.topic_partitions[topic]) for _ in xrange(random.randint(0, num_partitions-1)): - self.partition_cycles[topic].next() + next(self.partition_cycles[topic]) - return self.partition_cycles[topic].next() + return next(self.partition_cycles[topic]) def send_messages(self, topic, *msg): partition = self._next_partition(topic) diff --git a/kafka/protocol.py b/kafka/protocol.py index 58661c7..e5356c5 100644 --- a/kafka/protocol.py +++ b/kafka/protocol.py @@ -1,6 +1,9 @@ import logging import struct -import zlib + +import six + +from six.moves import xrange from kafka.codec import ( gzip_encode, gzip_decode, snappy_encode, snappy_decode @@ -13,7 +16,7 @@ from kafka.common import ( UnsupportedCodecError ) from kafka.util import ( - read_short_string, read_int_string, relative_unpack, + crc32, read_short_string, read_int_string, relative_unpack, write_short_string, write_int_string, group_by_topic_and_partition ) @@ -67,7 +70,7 @@ class KafkaProtocol(object): Offset => int64 MessageSize => int32 """ - message_set = "" + message_set = b"" for message in messages: encoded_message = KafkaProtocol._encode_message(message) message_set += struct.pack('>qi%ds' % len(encoded_message), 0, len(encoded_message), encoded_message) @@ -94,8 +97,8 @@ class KafkaProtocol(object): msg = struct.pack('>BB', message.magic, message.attributes) msg += write_int_string(message.key) msg += write_int_string(message.value) - crc = zlib.crc32(msg) - msg = struct.pack('>i%ds' % len(msg), crc, msg) + crc = crc32(msg) + msg = struct.pack('>I%ds' % len(msg), crc, msg) else: raise ProtocolError("Unexpected magic number: %d" % message.magic) return msg @@ -145,8 +148,8 @@ class KafkaProtocol(object): The offset is actually read from decode_message_set_iter (it is part of the MessageSet payload). """ - ((crc, magic, att), cur) = relative_unpack('>iBB', data, 0) - if crc != zlib.crc32(data[4:]): + ((crc, magic, att), cur) = relative_unpack('>IBB', data, 0) + if crc != crc32(data[4:]): raise ChecksumError("Message checksum failed") (key, cur) = read_int_string(data, cur) diff --git a/kafka/util.py b/kafka/util.py index 9121374..1e03cf1 100644 --- a/kafka/util.py +++ b/kafka/util.py @@ -1,14 +1,21 @@ +import binascii import collections import struct import sys from threading import Thread, Event +import six + from kafka.common import BufferUnderflowError +def crc32(data): + return binascii.crc32(data) & 0xffffffff + + def write_int_string(s): - if s is not None and not isinstance(s, str): - raise TypeError('Expected "%s" to be str\n' + if s is not None and not isinstance(s, six.binary_type): + raise TypeError('Expected "%s" to be bytes\n' 'data=%s' % (type(s), repr(s))) if s is None: return struct.pack('>i', -1) @@ -17,12 +24,12 @@ def write_int_string(s): def write_short_string(s): - if s is not None and not isinstance(s, str): - raise TypeError('Expected "%s" to be str\n' + if s is not None and not isinstance(s, six.binary_type): + raise TypeError('Expected "%s" to be bytes\n' 'data=%s' % (type(s), repr(s))) if s is None: return struct.pack('>h', -1) - elif len(s) > 32767 and sys.version < (2, 7): + elif len(s) > 32767 and sys.version_info < (2, 7): # Python 2.6 issues a deprecation warning instead of a struct error raise struct.error(len(s)) else: |