I'm using elasticsearch 6.4. We index about 100M documents with a node-js loader using the client 15.2.0.
The results are weird, because after every index we get different number of documents.
The code create a batch and after it reached a certain size it is loaded in elasticsearch with the bulk API. To be more performant we disable the refresh. If the bulk is rejected we wait 20 seconds and try again.
We checked also that response.error is true/false assuming that response.error = true means there are not failures.
Here the code:
if (i % options.batchSize === 0) {
var previous_start = new Date();
//sleep.msleep(options.slowdown);
async.waterfall([
function (callback) {
client.bulk(
{
refresh: "false", //we do refresh only at the end
//requestTimeout: 200000,
body: batch
},
function (err, resp) {
if (err) {
console.log(err.message);
throw err;
} else if (resp.errors) {
console.log('Bulk is rejected... let\'s medidate');
// let's just wait and re-send the bulk request with increased
// timeout to be on the safe side
console.log("Waiting for 20 seconds");
sleep.msleep(20000); // -> this is blocking... time for elasticsearch to do whatever it does
// and be in a better mood to accept this bulk
client.bulk(
{
refresh: "false",
//requestTimeout: 200000,
body: batch
},
function (err, resp) {
if (err) {
console.log(err.message);
throw err;
} else if (resp.errors) {
console.log(resp);
throw resp;
// alternative would be to block again and resend
}
console.log("bulk is finally ingested...");
let theEnd = new Date();
return callback(null, theEnd);
});
} else {
let theEnd = new Date();
return callback(null, theEnd);
}
});
},
function(end, callback) {
let total_time = (end - start) / 1000;
let intermediate_time = (end - previous_start) / 1000;
indexed += options.batchSize;
console.log('Loaded %s records in %d s (%d record/s)', indexed, total_time, options.batchSize / intermediate_time);
return callback(null, total_time);
}
],
function (err, total_time) {
if (err)
console.log(err);
});
batch = [];
i = 0;
}
});
Looks like we have some silents failures. Anybody has the same issue? Any suggestion?
Moreover when looking at calling http://localhost:9200/_cat/indices?v, I get strange results on the column of the docs.deleted
, what that column means?
health status index uuid pri rep docs.count docs.deleted store.size pri.store.size
green open index_name Cqc2ABpRRs23P1DKlgaKJQ 5 0 96450728 340 24.8gb 24.8gb
sometimes this number changes during indexing, like:
green open index_name WsJPPQLcTuuiw37Vv0lfhA 5 0 21958048 6594 6.6gb 6.6gb
(then this number decreases)
I could not find any explanation about it... any help on this?
Thank you in advance