I am attempting to use kafka-node to read compacted messages from a kafka topic.
The problem is that recently inserted messages are left above the EOL and are not reachable until additional messages are inserted. Effectively there is a gap between the EOL and the High Water Offset which prevents reading of the latest messages. Its not clear why this is.
A topic has been created with
kafka-topics.sh --zookeeper ${KAFKA_HOST}:2181 --create --topic atopic --config "cleanup.policy=compact" --config "delete.retention.ms=100" --config "segment.ms=100" --config "min.cleanable.dirty.ratio=0" --partitions 1 --replication-factor 1
A number of key values are produced into the topic. Some of the keys were the same.
var client = new kafka.KafkaClient({kafkaHost: "<host:port>",autoConnect: true})
var producer = new HighLevelProducer(client);
producer.send(payload, function(error, result) {
debug('Sent payload to Kafka: ', payload);
if (error) {
console.error(error);
} else {
res(true)
}
client.close()
});
});
Here are the keys and values inserted
key - 1
key2 - 1
key3 - 1
key - 2
key2 - 2
key3 - 2
key1 - 3
key - 3
key2 - 3
key3 - 3
Then the set of topic keys was requested.
var options = {
id: 'consumer1',
kafkaHost: "<host:port>",
groupId: "consumergroup1",
sessionTimeout: 15000,
protocol: ['roundrobin'],
fromOffset: 'earliest'
};
var consumerGroup = new ConsumerGroup(options, topic);
consumerGroup.on('error', onError);
consumerGroup.on('message', onMessage);
consumerGroup.on('done', function(message) {
consumerGroup.close(true,function(){ });
})
function onError (error) {
console.error(error);
}
function onMessage (message) {)
console.log('%s read msg Topic="%s" Partition=%s Offset=%d HW=%d', this.client.clientId, message.topic, message.partition, message.offset, message.highWaterOffset, message.value);
}
})
consumer1 read msg Topic="atopic" Partition=0 Offset=4 highWaterOffset=10 Key=key2 value={"name":"key2","url":"2"}
consumer1 read msg Topic="atopic" Partition=0 Offset=5 highWaterOffset=10 Key=key3 value={"name":"key3","url":"2"}
consumer1 read msg Topic="atopic" Partition=0 Offset=6 highWaterOffset=10 Key=key1 value={"name":"key1","url":"3"}
consumer1 read msg Topic="atopic" Partition=0 Offset=7 highWaterOffset=10 Key=key value={"name":"key","url":"3"}
consumer1 read msg Topic="atopic" Partition=0 Offset=0 highWaterOffset=10 Key= value=
consumer1 read msg Topic="atopic" Partition=0 Offset=0 highWaterOffset=10 Key= value=
consumer1 read msg Topic="atopic" Partition=0 Offset=0 highWaterOffset=10 Key= value=
consumer1 read msg Topic="atopic" Partition=0 Offset=0 highWaterOffset=10 Key= value=
There is a high water offset which represents the latest value of 10. However the offset value the consumer sees is only 7. Somehow the compaction prevents the consumer from seeing the latest messages.
Its not clear how to avoid this constraint and allow the consumer to see the latest messages.
Any suggestions appreciated. Thanks.
HW=%d
, and your output sayshighWaterOffset
. If you have questions about the output of the code, it would make sense if you showed the code that generated it. In any case, the topic is compacted, and so you get to offset 10 by the producer, but there are only 7 values on the topic itself. – OneCricketeer