Skip to content

Commit

Permalink
Merge pull request dpkp#233 from dpkp/str_join_speedup
Browse files Browse the repository at this point in the history
Improve string concatenation performance on pypy and python 3
  • Loading branch information
wizzat committed Sep 11, 2014
2 parents 931670f + 55e377b commit 29f5619
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 71 deletions.
124 changes: 71 additions & 53 deletions kafka/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,13 @@ def _encode_message_set(cls, messages):
Offset => int64
MessageSize => int32
"""
message_set = b""
message_set = []
for message in messages:
encoded_message = KafkaProtocol._encode_message(message)
message_set += struct.pack('>qi%ds' % len(encoded_message), 0, len(encoded_message), encoded_message)
return message_set
message_set.append(struct.pack('>qi%ds' % len(encoded_message), 0,
len(encoded_message),
encoded_message))
return b''.join(message_set)

@classmethod
def _encode_message(cls, message):
Expand All @@ -95,9 +97,11 @@ def _encode_message(cls, message):
Value => bytes
"""
if message.magic == 0:
msg = struct.pack('>BB', message.magic, message.attributes)
msg += write_int_string(message.key)
msg += write_int_string(message.value)
msg = b''.join([
struct.pack('>BB', message.magic, message.attributes),
write_int_string(message.key),
write_int_string(message.value)
])
crc = crc32(msg)
msg = struct.pack('>I%ds' % len(msg), crc, msg)
else:
Expand Down Expand Up @@ -197,21 +201,24 @@ def encode_produce_request(cls, client_id, correlation_id,
payloads = [] if payloads is None else payloads
grouped_payloads = group_by_topic_and_partition(payloads)

message = cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.PRODUCE_KEY)
message = []
message.append(cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.PRODUCE_KEY))

message += struct.pack('>hii', acks, timeout, len(grouped_payloads))
message.append(struct.pack('>hii', acks, timeout,
len(grouped_payloads)))

for topic, topic_payloads in grouped_payloads.items():
message += struct.pack('>h%dsi' % len(topic),
len(topic), topic, len(topic_payloads))
message.append(struct.pack('>h%dsi' % len(topic), len(topic), topic,
len(topic_payloads)))

for partition, payload in topic_payloads.items():
msg_set = KafkaProtocol._encode_message_set(payload.messages)
message += struct.pack('>ii%ds' % len(msg_set), partition,
len(msg_set), msg_set)
message.append(struct.pack('>ii%ds' % len(msg_set), partition,
len(msg_set), msg_set))

return struct.pack('>i%ds' % len(message), len(message), message)
msg = b''.join(message)
return struct.pack('>i%ds' % len(msg), len(msg), msg)

@classmethod
def decode_produce_response(cls, data):
Expand Down Expand Up @@ -254,21 +261,23 @@ def encode_fetch_request(cls, client_id, correlation_id, payloads=None,
payloads = [] if payloads is None else payloads
grouped_payloads = group_by_topic_and_partition(payloads)

message = cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.FETCH_KEY)
message = []
message.append(cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.FETCH_KEY))

# -1 is the replica id
message += struct.pack('>iiii', -1, max_wait_time, min_bytes,
len(grouped_payloads))
message.append(struct.pack('>iiii', -1, max_wait_time, min_bytes,
len(grouped_payloads)))

for topic, topic_payloads in grouped_payloads.items():
message += write_short_string(topic)
message += struct.pack('>i', len(topic_payloads))
message.append(write_short_string(topic))
message.append(struct.pack('>i', len(topic_payloads)))
for partition, payload in topic_payloads.items():
message += struct.pack('>iqi', partition, payload.offset,
payload.max_bytes)
message.append(struct.pack('>iqi', partition, payload.offset,
payload.max_bytes))

return struct.pack('>i%ds' % len(message), len(message), message)
msg = b''.join(message)
return struct.pack('>i%ds' % len(msg), len(msg), msg)

@classmethod
def decode_fetch_response(cls, data):
Expand Down Expand Up @@ -301,21 +310,23 @@ def encode_offset_request(cls, client_id, correlation_id, payloads=None):
payloads = [] if payloads is None else payloads
grouped_payloads = group_by_topic_and_partition(payloads)

message = cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.OFFSET_KEY)
message = []
message.append(cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.OFFSET_KEY))

# -1 is the replica id
message += struct.pack('>ii', -1, len(grouped_payloads))
message.append(struct.pack('>ii', -1, len(grouped_payloads)))

for topic, topic_payloads in grouped_payloads.items():
message += write_short_string(topic)
message += struct.pack('>i', len(topic_payloads))
message.append(write_short_string(topic))
message.append(struct.pack('>i', len(topic_payloads)))

for partition, payload in topic_payloads.items():
message += struct.pack('>iqi', partition, payload.time,
payload.max_offsets)
message.append(struct.pack('>iqi', partition, payload.time,
payload.max_offsets))

return struct.pack('>i%ds' % len(message), len(message), message)
msg = b''.join(message)
return struct.pack('>i%ds' % len(msg), len(msg), msg)

@classmethod
def decode_offset_response(cls, data):
Expand Down Expand Up @@ -360,15 +371,17 @@ def encode_metadata_request(cls, client_id, correlation_id, topics=None,
else:
topics = payloads

message = cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.METADATA_KEY)
message = []
message.append(cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.METADATA_KEY))

message += struct.pack('>i', len(topics))
message.append(struct.pack('>i', len(topics)))

for topic in topics:
message += struct.pack('>h%ds' % len(topic), len(topic), topic)
message.append(struct.pack('>h%ds' % len(topic), len(topic), topic))

return write_int_string(message)
msg = b''.join(message)
return write_int_string(msg)

@classmethod
def decode_metadata_response(cls, data):
Expand Down Expand Up @@ -435,20 +448,22 @@ def encode_offset_commit_request(cls, client_id, correlation_id,
"""
grouped_payloads = group_by_topic_and_partition(payloads)

message = cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.OFFSET_COMMIT_KEY)
message += write_short_string(group)
message += struct.pack('>i', len(grouped_payloads))
message = []
message.append(cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.OFFSET_COMMIT_KEY))
message.append(write_short_string(group))
message.append(struct.pack('>i', len(grouped_payloads)))

for topic, topic_payloads in grouped_payloads.items():
message += write_short_string(topic)
message += struct.pack('>i', len(topic_payloads))
message.append(write_short_string(topic))
message.append(struct.pack('>i', len(topic_payloads)))

for partition, payload in topic_payloads.items():
message += struct.pack('>iq', partition, payload.offset)
message += write_short_string(payload.metadata)
message.append(struct.pack('>iq', partition, payload.offset))
message.append(write_short_string(payload.metadata))

return struct.pack('>i%ds' % len(message), len(message), message)
msg = b''.join(message)
return struct.pack('>i%ds' % len(msg), len(msg), msg)

@classmethod
def decode_offset_commit_response(cls, data):
Expand Down Expand Up @@ -484,20 +499,23 @@ def encode_offset_fetch_request(cls, client_id, correlation_id,
payloads: list of OffsetFetchRequest
"""
grouped_payloads = group_by_topic_and_partition(payloads)
message = cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.OFFSET_FETCH_KEY)

message += write_short_string(group)
message += struct.pack('>i', len(grouped_payloads))
message = []
message.append(cls._encode_message_header(client_id, correlation_id,
KafkaProtocol.OFFSET_FETCH_KEY))

message.append(write_short_string(group))
message.append(struct.pack('>i', len(grouped_payloads)))

for topic, topic_payloads in grouped_payloads.items():
message += write_short_string(topic)
message += struct.pack('>i', len(topic_payloads))
message.append(write_short_string(topic))
message.append(struct.pack('>i', len(topic_payloads)))

for partition, payload in topic_payloads.items():
message += struct.pack('>i', partition)
message.append(struct.pack('>i', partition))

return struct.pack('>i%ds' % len(message), len(message), message)
msg = b''.join(message)
return struct.pack('>i%ds' % len(msg), len(msg), msg)

@classmethod
def decode_offset_fetch_response(cls, data):
Expand Down
2 changes: 2 additions & 0 deletions test/test_producer_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import time
import uuid

from six.moves import range

from kafka import (
SimpleProducer, KeyedProducer,
create_message, create_gzip_message, create_snappy_message,
Expand Down
36 changes: 18 additions & 18 deletions test/test_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,31 +453,31 @@ def test_encode_metadata_request_with_topics(self):
self.assertEqual(encoded, expected)

def _create_encoded_metadata_response(self, brokers, topics):
encoded = struct.pack('>ii', 3, len(brokers))
encoded = []
encoded.append(struct.pack('>ii', 3, len(brokers)))
for broker in brokers:
encoded += struct.pack('>ih%dsi' % len(broker.host), broker.nodeId,
len(broker.host), broker.host, broker.port)
encoded.append(struct.pack('>ih%dsi' % len(broker.host),
broker.nodeId, len(broker.host),
broker.host, broker.port))

encoded += struct.pack('>i', len(topics))
encoded.append(struct.pack('>i', len(topics)))
for topic in topics:
encoded += struct.pack('>hh%dsi' % len(topic.topic),
topic.error, len(topic.topic),
topic.topic, len(topic.partitions))
encoded.append(struct.pack('>hh%dsi' % len(topic.topic),
topic.error, len(topic.topic),
topic.topic, len(topic.partitions)))
for metadata in topic.partitions:
encoded += struct.pack('>hiii',
metadata.error,
metadata.partition,
metadata.leader,
len(metadata.replicas))
encoded.append(struct.pack('>hiii', metadata.error,
metadata.partition, metadata.leader,
len(metadata.replicas)))
if len(metadata.replicas) > 0:
encoded += struct.pack('>%di' % len(metadata.replicas),
*metadata.replicas)
encoded.append(struct.pack('>%di' % len(metadata.replicas),
*metadata.replicas))

encoded += struct.pack('>i', len(metadata.isr))
encoded.append(struct.pack('>i', len(metadata.isr)))
if len(metadata.isr) > 0:
encoded += struct.pack('>%di' % len(metadata.isr),
*metadata.isr)
return encoded
encoded.append(struct.pack('>%di' % len(metadata.isr),
*metadata.isr))
return b''.join(encoded)

def test_decode_metadata_response(self):
node_brokers = [
Expand Down

0 comments on commit 29f5619

Please sign in to comment.