Skip to content

Commit 9ad0be6

Browse files
committed
Merge pull request #314 from dpkp/keyed_producer_failover
Handle keyed producer failover
2 parents 60a7378 + 6ed6ad5 commit 9ad0be6

File tree

6 files changed

+231
-145
lines changed

6 files changed

+231
-145
lines changed

kafka/partitioner/base.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,13 @@ def __init__(self, partitions):
1212
"""
1313
self.partitions = partitions
1414

15-
def partition(self, key, partitions):
15+
def partition(self, key, partitions=None):
1616
"""
1717
Takes a string key and num_partitions as argument and returns
1818
a partition to be used for the message
1919
2020
Arguments:
21-
partitions: The list of partitions is passed in every call. This
22-
may look like an overhead, but it will be useful
23-
(in future) when we handle cases like rebalancing
21+
key: the key to use for partitioning
22+
partitions: (optional) a list of partitions.
2423
"""
2524
raise NotImplementedError('partition function has to be implemented')

kafka/partitioner/hashed.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ class HashedPartitioner(Partitioner):
55
Implements a partitioner which selects the target partition based on
66
the hash of the key
77
"""
8-
def partition(self, key, partitions):
8+
def partition(self, key, partitions=None):
9+
if not partitions:
10+
partitions = self.partitions
911
size = len(partitions)
1012
idx = hash(key) % size
1113

kafka/partitioner/roundrobin.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ def _set_partitions(self, partitions):
1515
self.partitions = partitions
1616
self.iterpart = cycle(partitions)
1717

18-
def partition(self, key, partitions):
18+
def partition(self, key, partitions=None):
1919
# Refresh the partition list if necessary
20-
if self.partitions != partitions:
20+
if partitions and self.partitions != partitions:
2121
self._set_partitions(partitions)
2222

2323
return next(self.iterpart)

kafka/producer/keyed.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def _next_partition(self, topic, key):
5454
self.partitioners[topic] = self.partitioner_class(self.client.get_partition_ids_for_topic(topic))
5555

5656
partitioner = self.partitioners[topic]
57-
return partitioner.partition(key, self.client.get_partition_ids_for_topic(topic))
57+
return partitioner.partition(key)
5858

5959
def send_messages(self,topic,key,*msg):
6060
partition = self._next_partition(topic, key)

test/test_failover_integration.py

Lines changed: 62 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from kafka import KafkaClient, SimpleConsumer
88
from kafka.common import TopicAndPartition, FailedPayloadsError, ConnectionError
99
from kafka.producer.base import Producer
10+
from kafka.producer import KeyedProducer
1011

1112
from test.fixtures import ZookeeperFixture, KafkaFixture
1213
from test.testutil import (
@@ -17,8 +18,7 @@
1718
class TestFailover(KafkaIntegrationTestCase):
1819
create_client = False
1920

20-
@classmethod
21-
def setUpClass(cls): # noqa
21+
def setUp(self):
2222
if not os.environ.get('KAFKA_VERSION'):
2323
return
2424

@@ -27,33 +27,41 @@ def setUpClass(cls): # noqa
2727
partitions = 2
2828

2929
# mini zookeeper, 2 kafka brokers
30-
cls.zk = ZookeeperFixture.instance()
31-
kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions]
32-
cls.brokers = [KafkaFixture.instance(i, *kk_args) for i in range(replicas)]
30+
self.zk = ZookeeperFixture.instance()
31+
kk_args = [self.zk.host, self.zk.port, zk_chroot, replicas, partitions]
32+
self.brokers = [KafkaFixture.instance(i, *kk_args) for i in range(replicas)]
3333

34-
hosts = ['%s:%d' % (b.host, b.port) for b in cls.brokers]
35-
cls.client = KafkaClient(hosts)
34+
hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers]
35+
self.client = KafkaClient(hosts)
36+
super(TestFailover, self).setUp()
3637

37-
@classmethod
38-
def tearDownClass(cls):
38+
def tearDown(self):
39+
super(TestFailover, self).tearDown()
3940
if not os.environ.get('KAFKA_VERSION'):
4041
return
4142

42-
cls.client.close()
43-
for broker in cls.brokers:
43+
self.client.close()
44+
for broker in self.brokers:
4445
broker.close()
45-
cls.zk.close()
46+
self.zk.close()
4647

4748
@kafka_versions("all")
4849
def test_switch_leader(self):
4950
topic = self.topic
5051
partition = 0
5152

52-
# Test the base class Producer -- send_messages to a specific partition
53+
# Testing the base Producer class here so that we can easily send
54+
# messages to a specific partition, kill the leader for that partition
55+
# and check that after another broker takes leadership the producer
56+
# is able to resume sending messages
57+
58+
# require that the server commit messages to all in-sync replicas
59+
# so that failover doesn't lose any messages on server-side
60+
# and we can assert that server-side message count equals client-side
5361
producer = Producer(self.client, async=False,
5462
req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT)
5563

56-
# Send 10 random messages
64+
# Send 100 random messages to a specific partition
5765
self._send_random_messages(producer, topic, partition, 100)
5866

5967
# kill leader for partition
@@ -80,7 +88,7 @@ def test_switch_leader(self):
8088
self._send_random_messages(producer, topic, partition, 100)
8189

8290
# count number of messages
83-
# Should be equal to 10 before + 1 recovery + 10 after
91+
# Should be equal to 100 before + 1 recovery + 100 after
8492
self.assert_message_count(topic, 201, partitions=(partition,))
8593

8694

@@ -116,6 +124,45 @@ def test_switch_leader_async(self):
116124
# Should be equal to 10 before + 1 recovery + 10 after
117125
self.assert_message_count(topic, 21, partitions=(partition,))
118126

127+
@kafka_versions("all")
128+
def test_switch_leader_keyed_producer(self):
129+
topic = self.topic
130+
131+
producer = KeyedProducer(self.client, async=False)
132+
133+
# Send 10 random messages
134+
for _ in range(10):
135+
key = random_string(3)
136+
msg = random_string(10)
137+
producer.send_messages(topic, key, msg)
138+
139+
# kill leader for partition 0
140+
self._kill_leader(topic, 0)
141+
142+
recovered = False
143+
started = time.time()
144+
timeout = 60
145+
while not recovered and (time.time() - started) < timeout:
146+
try:
147+
key = random_string(3)
148+
msg = random_string(10)
149+
producer.send_messages(topic, key, msg)
150+
if producer.partitioners[topic].partition(key) == 0:
151+
recovered = True
152+
except (FailedPayloadsError, ConnectionError):
153+
logging.debug("caught exception sending message -- will retry")
154+
continue
155+
156+
# Verify we successfully sent the message
157+
self.assertTrue(recovered)
158+
159+
# send some more messages just to make sure no more exceptions
160+
for _ in range(10):
161+
key = random_string(3)
162+
msg = random_string(10)
163+
producer.send_messages(topic, key, msg)
164+
165+
119166
def _send_random_messages(self, producer, topic, partition, n):
120167
for j in range(n):
121168
logging.debug('_send_random_message to %s:%d -- try %d', topic, partition, j)

0 commit comments

Comments
 (0)