summaryrefslogtreecommitdiff
path: root/kafka
diff options
context:
space:
mode:
authorDana Powers <dana.powers@rd.io>2014-09-07 18:52:05 -0700
committerDana Powers <dana.powers@rd.io>2014-09-07 19:09:32 -0700
commit715425c639a476139065689afde3d255a07d6f96 (patch)
tree0ef2cd875c97c8ca867d89328d6fd5fec7dfcbe8 /kafka
parenta99384f4c601d127ab1c4fe5b272ea5c07fd695d (diff)
parentbe23042ecd9ab330886745ccc9ec9e3a0039836f (diff)
downloadkafka-python-715425c639a476139065689afde3d255a07d6f96.tar.gz
Merge pull request #227 from wizzat-feature/py3
Python 3 Support Conflicts: kafka/producer.py test/test_client.py test/test_client_integration.py test/test_codec.py test/test_consumer.py test/test_consumer_integration.py test/test_failover_integration.py test/test_producer.py test/test_producer_integration.py test/test_protocol.py test/test_util.py
Diffstat (limited to 'kafka')
-rw-r--r--kafka/client.py17
-rw-r--r--kafka/codec.py19
-rw-r--r--kafka/conn.py14
-rw-r--r--kafka/consumer.py17
-rw-r--r--kafka/partitioner.py2
-rw-r--r--kafka/producer.py16
-rw-r--r--kafka/protocol.py17
-rw-r--r--kafka/util.py17
8 files changed, 77 insertions, 42 deletions
diff --git a/kafka/client.py b/kafka/client.py
index 8630f66..a918091 100644
--- a/kafka/client.py
+++ b/kafka/client.py
@@ -1,3 +1,4 @@
+import binascii
import collections
import copy
import functools
@@ -20,7 +21,7 @@ log = logging.getLogger("kafka")
class KafkaClient(object):
- CLIENT_ID = "kafka-python"
+ CLIENT_ID = b"kafka-python"
ID_GEN = itertools.count()
# NOTE: The timeout given to the client should always be greater than the
@@ -81,7 +82,7 @@ class KafkaClient(object):
"""
Generate a new correlation id
"""
- return KafkaClient.ID_GEN.next()
+ return next(KafkaClient.ID_GEN)
def _send_broker_unaware_request(self, requestId, request):
"""
@@ -96,7 +97,7 @@ class KafkaClient(object):
return response
except Exception as e:
log.warning("Could not send request [%r] to server %s:%i, "
- "trying next server: %s" % (request, host, port, e))
+ "trying next server: %s" % (binascii.b2a_hex(request), host, port, e))
raise KafkaUnavailableError("All servers failed to process request")
@@ -145,7 +146,7 @@ class KafkaClient(object):
# For each broker, send the list of request payloads
for broker, payloads in payloads_by_broker.items():
- conn = self._get_conn(broker.host, broker.port)
+ conn = self._get_conn(broker.host.decode('utf-8'), broker.port)
requestId = self._next_id()
request = encoder_fn(client_id=self.client_id,
correlation_id=requestId, payloads=payloads)
@@ -160,11 +161,11 @@ class KafkaClient(object):
response = conn.recv(requestId)
except ConnectionError as e:
log.warning("Could not receive response to request [%s] "
- "from server %s: %s", request, conn, e)
+ "from server %s: %s", binascii.b2a_hex(request), conn, e)
failed = True
except ConnectionError as e:
log.warning("Could not send request [%s] to server %s: %s",
- request, conn, e)
+ binascii.b2a_hex(request), conn, e)
failed = True
if failed:
@@ -233,8 +234,8 @@ class KafkaClient(object):
A reinit() has to be done on the copy before it can be used again
"""
c = copy.deepcopy(self)
- for k, v in c.conns.items():
- c.conns[k] = v.copy()
+ for key in c.conns:
+ c.conns[key] = self.conns[key].copy()
return c
def reinit(self):
diff --git a/kafka/codec.py b/kafka/codec.py
index 206ddb4..2279200 100644
--- a/kafka/codec.py
+++ b/kafka/codec.py
@@ -1,8 +1,11 @@
-from cStringIO import StringIO
+from io import BytesIO
import gzip
import struct
-_XERIAL_V1_HEADER = (-126, 'S', 'N', 'A', 'P', 'P', 'Y', 0, 1, 1)
+import six
+from six.moves import xrange
+
+_XERIAL_V1_HEADER = (-126, b'S', b'N', b'A', b'P', b'P', b'Y', 0, 1, 1)
_XERIAL_V1_FORMAT = 'bccccccBii'
try:
@@ -21,7 +24,7 @@ def has_snappy():
def gzip_encode(payload):
- buffer = StringIO()
+ buffer = BytesIO()
handle = gzip.GzipFile(fileobj=buffer, mode="w")
handle.write(payload)
handle.close()
@@ -32,7 +35,7 @@ def gzip_encode(payload):
def gzip_decode(payload):
- buffer = StringIO(payload)
+ buffer = BytesIO(payload)
handle = gzip.GzipFile(fileobj=buffer, mode='r')
result = handle.read()
handle.close()
@@ -68,9 +71,9 @@ def snappy_encode(payload, xerial_compatible=False, xerial_blocksize=32 * 1024):
for i in xrange(0, len(payload), xerial_blocksize):
yield payload[i:i+xerial_blocksize]
- out = StringIO()
+ out = BytesIO()
- header = ''.join([struct.pack('!' + fmt, dat) for fmt, dat
+ header = b''.join([struct.pack('!' + fmt, dat) for fmt, dat
in zip(_XERIAL_V1_FORMAT, _XERIAL_V1_HEADER)])
out.write(header)
@@ -121,8 +124,8 @@ def snappy_decode(payload):
if _detect_xerial_stream(payload):
# TODO ? Should become a fileobj ?
- out = StringIO()
- byt = buffer(payload[16:])
+ out = BytesIO()
+ byt = payload[16:]
length = len(byt)
cursor = 0
diff --git a/kafka/conn.py b/kafka/conn.py
index a577eba..ddfee8b 100644
--- a/kafka/conn.py
+++ b/kafka/conn.py
@@ -5,6 +5,8 @@ import socket
import struct
from threading import local
+import six
+
from kafka.common import ConnectionError
log = logging.getLogger("kafka")
@@ -19,7 +21,7 @@ def collect_hosts(hosts, randomize=True):
randomize the returned list.
"""
- if isinstance(hosts, basestring):
+ if isinstance(hosts, six.string_types):
hosts = hosts.strip().split(',')
result = []
@@ -92,7 +94,7 @@ class KafkaConnection(local):
# Receiving empty string from recv signals
# that the socket is in error. we will never get
# more data from this socket
- if data == '':
+ if data == b'':
raise socket.error("Not enough data to read message -- did server kill socket?")
except socket.error:
@@ -103,7 +105,7 @@ class KafkaConnection(local):
log.debug("Read %d/%d bytes from Kafka", num_bytes - bytes_left, num_bytes)
responses.append(data)
- return ''.join(responses)
+ return b''.join(responses)
##################
# Public API #
@@ -144,7 +146,7 @@ class KafkaConnection(local):
# Read the remainder of the response
resp = self._read_bytes(size)
- return str(resp)
+ return resp
def copy(self):
"""
@@ -153,6 +155,10 @@ class KafkaConnection(local):
return a new KafkaConnection object
"""
c = copy.deepcopy(self)
+ # Python 3 doesn't copy custom attributes of the threadlocal subclass
+ c.host = copy.copy(self.host)
+ c.port = copy.copy(self.port)
+ c.timeout = copy.copy(self.timeout)
c._sock = None
return c
diff --git a/kafka/consumer.py b/kafka/consumer.py
index 928bbac..fa1b8bc 100644
--- a/kafka/consumer.py
+++ b/kafka/consumer.py
@@ -1,12 +1,21 @@
from __future__ import absolute_import
-from itertools import izip_longest, repeat
+try:
+ from itertools import zip_longest as izip_longest, repeat # pylint: disable-msg=E0611
+except ImportError: # python 2
+ from itertools import izip_longest as izip_longest, repeat
import logging
import time
import numbers
from threading import Lock
from multiprocessing import Process, Queue as MPQueue, Event, Value
-from Queue import Empty, Queue
+
+import six
+
+try:
+ from Queue import Empty, Queue
+except ImportError: # python 2
+ from queue import Empty, Queue
import kafka.common
from kafka.common import (
@@ -420,7 +429,7 @@ class SimpleConsumer(Consumer):
for p in self.fetch_offsets.keys())
while partitions:
requests = []
- for partition, buffer_size in partitions.iteritems():
+ for partition, buffer_size in six.iteritems(partitions):
requests.append(FetchRequest(self.topic, partition,
self.fetch_offsets[partition],
buffer_size))
@@ -582,7 +591,7 @@ class MultiProcessConsumer(Consumer):
for chunk in chunks:
chunk = filter(lambda x: x is not None, chunk)
args = (client.copy(),
- group, topic, chunk,
+ group, topic, list(chunk),
self.queue, self.start, self.exit,
self.pause, self.size)
diff --git a/kafka/partitioner.py b/kafka/partitioner.py
index 5287cef..695dd6f 100644
--- a/kafka/partitioner.py
+++ b/kafka/partitioner.py
@@ -43,7 +43,7 @@ class RoundRobinPartitioner(Partitioner):
if self.partitions != partitions:
self._set_partitions(partitions)
- return self.iterpart.next()
+ return next(self.iterpart)
class HashedPartitioner(Partitioner):
diff --git a/kafka/producer.py b/kafka/producer.py
index b28a424..4a04b38 100644
--- a/kafka/producer.py
+++ b/kafka/producer.py
@@ -4,11 +4,17 @@ import logging
import time
import random
-from Queue import Empty
+try:
+ from queue import Empty
+except ImportError:
+ from Queue import Empty
from collections import defaultdict
from itertools import cycle
from multiprocessing import Queue, Process
+import six
+from six.moves import xrange
+
from kafka.common import (
ProduceRequest, TopicAndPartition, UnsupportedCodecError, UnknownTopicOrPartitionError
)
@@ -173,7 +179,7 @@ class Producer(object):
raise TypeError("msg is not a list or tuple!")
# Raise TypeError if any message is not encoded as bytes
- if any(not isinstance(m, bytes) for m in msg):
+ if any(not isinstance(m, six.binary_type) for m in msg):
raise TypeError("all produce message payloads must be type bytes")
if self.async:
@@ -221,7 +227,7 @@ class SimpleProducer(Producer):
batch_send_every_t - If set, messages are send after this timeout
random_start - If true, randomize the initial partition which the
the first message block will be published to, otherwise
- if false, the first message block will always publish
+ if false, the first message block will always publish
to partition 0 before cycling through each partition
"""
def __init__(self, client, async=False,
@@ -252,9 +258,9 @@ class SimpleProducer(Producer):
if self.random_start:
num_partitions = len(self.client.topic_partitions[topic])
for _ in xrange(random.randint(0, num_partitions-1)):
- self.partition_cycles[topic].next()
+ next(self.partition_cycles[topic])
- return self.partition_cycles[topic].next()
+ return next(self.partition_cycles[topic])
def send_messages(self, topic, *msg):
partition = self._next_partition(topic)
diff --git a/kafka/protocol.py b/kafka/protocol.py
index 58661c7..e5356c5 100644
--- a/kafka/protocol.py
+++ b/kafka/protocol.py
@@ -1,6 +1,9 @@
import logging
import struct
-import zlib
+
+import six
+
+from six.moves import xrange
from kafka.codec import (
gzip_encode, gzip_decode, snappy_encode, snappy_decode
@@ -13,7 +16,7 @@ from kafka.common import (
UnsupportedCodecError
)
from kafka.util import (
- read_short_string, read_int_string, relative_unpack,
+ crc32, read_short_string, read_int_string, relative_unpack,
write_short_string, write_int_string, group_by_topic_and_partition
)
@@ -67,7 +70,7 @@ class KafkaProtocol(object):
Offset => int64
MessageSize => int32
"""
- message_set = ""
+ message_set = b""
for message in messages:
encoded_message = KafkaProtocol._encode_message(message)
message_set += struct.pack('>qi%ds' % len(encoded_message), 0, len(encoded_message), encoded_message)
@@ -94,8 +97,8 @@ class KafkaProtocol(object):
msg = struct.pack('>BB', message.magic, message.attributes)
msg += write_int_string(message.key)
msg += write_int_string(message.value)
- crc = zlib.crc32(msg)
- msg = struct.pack('>i%ds' % len(msg), crc, msg)
+ crc = crc32(msg)
+ msg = struct.pack('>I%ds' % len(msg), crc, msg)
else:
raise ProtocolError("Unexpected magic number: %d" % message.magic)
return msg
@@ -145,8 +148,8 @@ class KafkaProtocol(object):
The offset is actually read from decode_message_set_iter (it is part
of the MessageSet payload).
"""
- ((crc, magic, att), cur) = relative_unpack('>iBB', data, 0)
- if crc != zlib.crc32(data[4:]):
+ ((crc, magic, att), cur) = relative_unpack('>IBB', data, 0)
+ if crc != crc32(data[4:]):
raise ChecksumError("Message checksum failed")
(key, cur) = read_int_string(data, cur)
diff --git a/kafka/util.py b/kafka/util.py
index 9121374..1e03cf1 100644
--- a/kafka/util.py
+++ b/kafka/util.py
@@ -1,14 +1,21 @@
+import binascii
import collections
import struct
import sys
from threading import Thread, Event
+import six
+
from kafka.common import BufferUnderflowError
+def crc32(data):
+ return binascii.crc32(data) & 0xffffffff
+
+
def write_int_string(s):
- if s is not None and not isinstance(s, str):
- raise TypeError('Expected "%s" to be str\n'
+ if s is not None and not isinstance(s, six.binary_type):
+ raise TypeError('Expected "%s" to be bytes\n'
'data=%s' % (type(s), repr(s)))
if s is None:
return struct.pack('>i', -1)
@@ -17,12 +24,12 @@ def write_int_string(s):
def write_short_string(s):
- if s is not None and not isinstance(s, str):
- raise TypeError('Expected "%s" to be str\n'
+ if s is not None and not isinstance(s, six.binary_type):
+ raise TypeError('Expected "%s" to be bytes\n'
'data=%s' % (type(s), repr(s)))
if s is None:
return struct.pack('>h', -1)
- elif len(s) > 32767 and sys.version < (2, 7):
+ elif len(s) > 32767 and sys.version_info < (2, 7):
# Python 2.6 issues a deprecation warning instead of a struct error
raise struct.error(len(s))
else: