Currently, I'm using elastic search to store and query some logs. We set up a five node elastic search cluster. Among them two indexing nodes and three query nodes. In the indexing node, we have redis, logstash and elasticsearch on both two servers. The elasticsearch uses NFS storage as data store. Our requirement is to index 300 log entries/second. But the best performance I can get from elasticsearch is only 25 log entries/second! The XMX of the elasticsearch is 16G. Version of each component:
Redis: 2.8.12
logstash: 1.4.2
elasticsearch: 1.5.0
Our current index settings are like this:
{
"userlog" : {
"settings" : {
"index" : {
"index" : {
"store" : {
"type" : "mmapfs"
},
"translog" : {
"flush_threshold_ops" : "50000"
}
},
"number_of_replicas" : "1",
"translog" : {
"flush_threshold_size" : "1G",
"durability" : "async"
},
"merge" : {
"scheduler" : {
"max_thread_count" : "1"
}
},
"indexing" : {
"slowlog" : {
"threshold" : {
"index" : {
"trace" : "2s",
"info" : "5s"
}
}
}
},
"memory" : {
"index_buffer_size" : "3G"
},
"refresh_interval" : "30s",
"version" : {
"created" : "1050099"
},
"creation_date" : "1447730702943",
"search" : {
"slowlog" : {
"threshold" : {
"fetch" : {
"debug" : "500ms"
},
"query" : {
"warn" : "10s",
"trace" : "1s"
}
}
}
},
"indices" : {
"memory" : {
"index_buffer_size" : "30%"
}
},
"uuid" : "E1ttme3fSxKVD5kRHEr_MA",
"index_currency" : "32",
"number_of_shards" : "5"
}
}
}
}
Here's my logstash config:
input {
redis {
host => "eanprduserreporedis01.eao.abn-iad.ea.com"
port => "6379"
type => "redis-input"
data_type => "list"
key => "userLog"
threads => 15
}
# Second reids block begin
redis {
host => "eanprduserreporedis02.eao.abn-iad.ea.com"
port => "6379"
type => "redis-input"
data_type => "list"
key => "userLog"
threads => 15
}
# Second reids block end
}
output {
elasticsearch {
cluster => "customizedlog_prod"
index => "userlog"
workers => 30
}
stdout{}
}
A very strange thing is although currently the indexing speed is only ~20/s, the IO wait is very high almost 70%. And mostly are read traffic. Through nfsiostat, current read speed is about 200Mbps! So basically, to index every log entry, it will read about 10Mbits of data which is insane because the average length of our log entry is less than 10K. So, I took a jstack dump of the elastic search, here's the result of one RUNNING thread:
"elasticsearch[somestupidhostname][bulk][T#3]" daemon prio=10 tid=0x00007f230c109800 nid=0x79f6 runnable [0x00007f1ba85f0000]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.FileDispatcherImpl.pread0(Native Method)
at sun.nio.ch.FileDispatcherImpl.pread(FileDispatcherImpl.java:52)
at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:220)
at sun.nio.ch.IOUtil.read(IOUtil.java:197)
at sun.nio.ch.FileChannelImpl.readInternal(FileChannelImpl.java:730)
at sun.nio.ch.FileChannelImpl.read(FileChannelImpl.java:715)
at org.apache.lucene.store.NIOFSDirectory$NIOFSIndexInput.readInternal(NIOFSDirectory.java:179)
at org.apache.lucene.store.BufferedIndexInput.refill(BufferedIndexInput.java:342)
at org.apache.lucene.store.BufferedIndexInput.readByte(BufferedIndexInput.java:54)
at org.apache.lucene.store.DataInput.readVInt(DataInput.java:122)
at org.apache.lucene.store.BufferedIndexInput.readVInt(BufferedIndexInput.java:221)
at org.apache.lucene.codecs.blocktree.SegmentTermsEnumFrame.loadBlock(SegmentTermsEnumFrame.java:152)
at org.apache.lucene.codecs.blocktree.SegmentTermsEnum.seekExact(SegmentTermsEnum.java:506)
at org.elasticsearch.common.lucene.uid.PerThreadIDAndVersionLookup.lookup(PerThreadIDAndVersionLookup.java:104)
at org.elasticsearch.common.lucene.uid.Versions.loadDocIdAndVersion(Versions.java:150)
at org.elasticsearch.common.lucene.uid.Versions.loadVersion(Versions.java:161)
at org.elasticsearch.index.engine.InternalEngine.loadCurrentVersionFromIndex(InternalEngine.java:1002)
at org.elasticsearch.index.engine.InternalEngine.innerCreate(InternalEngine.java:277)
- locked <0x00000005fc76b938> (a java.lang.Object)
at org.elasticsearch.index.engine.InternalEngine.create(InternalEngine.java:256)
at org.elasticsearch.index.shard.IndexShard.create(IndexShard.java:455)
at org.elasticsearch.action.bulk.TransportShardBulkAction.shardIndexOperation(TransportShardBulkAction.java:437)
at org.elasticsearch.action.bulk.TransportShardBulkAction.shardOperationOnPrimary(TransportShardBulkAction.java:149)
at org.elasticsearch.action.support.replication.TransportShardReplicationOperationAction$AsyncShardOperationAction.performOnPrimary(TransportShardReplicationOperationAction.java:515)
at org.elasticsearch.action.support.replication.TransportShardReplicationOperationAction$AsyncShardOperationAction$1.run(TransportShardReplicationOperationAction.java:422)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Can anyone tell me what is elastic search doing and why the indexing is so slow? And is it possible to improve it?