29
votes

I'm using node-cron module for scheduling tasks in Node.js application. I also want run the application in several processes using core cluster module.

Running application in several processes ends up in scheduled tasks execution in each process (e.g. if task was to send an email the email would be sent multiple times).

What are the best practices/possible ways of running cron job along with cluster module? Should I create some separate process which will handle only cron job and do not accept any requests. If yes, how can I do that in a right way?

5
You can try using a special process to verify some flag on the database so you can check if someone already take that job and not taken it again. It should take only a few lines and a very small table on the database (or a row if you already have a configuration table).Gepser -
Yes, that might be a solution. Maybe there are some example articles on that topic? Actually I would like to create separate process that would handle only cron job execution. Is that possible?epidemiya30

5 Answers

35
votes

If are using PM2, You can use an environment variable provided by PM2 itself called NODE_APP_INSTANCE which requires PM2 2.5 or greater.

NODE_APP_INSTANCE environment variable can be used to determine difference between process, for example you may want to run a cronjob only on one process, you can just do this

if(process.env.NODE_APP_INSTANCE == 0) { //schedule your cron job here since this part will be executed for only one cluster } ,

Since two processes can never have the same number.

More Info on PM2 official doc here.

18
votes

After some research I ended up with "Distributed locks using Redis" solution. There is node module for that: node-redis-warlock.

Hope this answer will be useful for someone else.

UPDATE. Minimal sample code:

var Warlock = require('node-redis-warlock'),
    redis = require('redis');

// Establish a redis client
redis = redis.createClient();

// and pass it to warlock
var warlock = new Warlock(redis);

function executeOnce (key, callback) {
    warlock.lock(key, 20000, function(err, unlock){
        if (err) {
            // Something went wrong and we weren't able to set a lock
            return;
        }

        if (typeof unlock === 'function') {
            setTimeout(function() {
                callback(unlock);
            }, 1000);
        }
    });
}

// Executes call back only once
executeOnce('every-three-hours-lock', function(unlock) {
    // Do here any stuff that should be done only once...            
    unlock();          
});

UPDATE 2. More detailed example:

const CronJob = require('cron').CronJob;
const Warlock = require('node-redis-warlock');
const redis = require('redis').createClient();
const warlock = new Warlock(redis);
const async = require('async');

function executeOnce (key, callback) {
    warlock.lock(key, 20000, function(err, unlock) {
        if (err) {
            // Something went wrong and we weren't able to set a lock
            return;
        }

        if (typeof unlock === 'function') {
            setTimeout(function() {
                callback(unlock);
            }, 1000);
        }
    });
}

function everyMinuteJobTasks (unlock) {
    async.parallel([
        sendEmailNotifications,
        updateSomething,
        // etc...
    ],
    (err) => {
        if (err) {
            logger.error(err);
        }

        unlock();
    });
}

let everyMinuteJob = new CronJob({
    cronTime: '*/1 * * * *',
    onTick: function () {
        executeOnce('every-minute-lock', everyMinuteJobTasks);
    },
    start: true,
    runOnInit: true
});

/* Actual tasks */
let sendEmailNotifications = function(done) {
    // Do stuff here
    // Call done() when finished or call done(err) if error occurred
}

let updateSomething = function(done) {
    // Do stuff here
    // Call done() when finished or call done(err) if error occurred
}

// etc...
3
votes

I actually do not like the redis approach that is also used in the cron-cluster npm plugin, because I do not want to have that redis server running on my maschine and maintain it, too.

I would like to discuss this approach with you:

Pro: we do not need to use redis Con: cron jobs are always running on the same worker

I use the message passing only for this, if you use it for other things, you want to pass the information that

if (cluster.isMaster) {
    // Count the machine's CPUs
    var cpuCount = require('os').cpus().length;;

    // Create a worker for each CPU
    for (var i = 0; i < cpuCount; i += 1) {
        cluster.fork();
    }

    cluster.on('fork', (worker) => {
        console.log("cluster forking new worker", worker.id);
    });

    // have a mainWorker that does the cron jobs.
    var mainWorkerId = null;

    cluster.on('listening', (worker, address) => {
        console.log("cluster listening new worker", worker.id);
        if(null === mainWorkerId) {
            console.log("Making worker " + worker.id + " to main worker");
            mainWorkerId = worker.id;
        worker.send({order: "startCron"});
        }
    });

    // Listen for dying workers if the mainWorker dies, make a new mainWorker
    cluster.on('exit', function (worker, code, signal) {
        console.log('Worker %d died :(', worker.id);

        if(worker.id === mainWorkerId) {
            console.log("Main Worker is dead...");
            mainWorkerId = null;
        }

        console.trace("I am here");
        console.log(worker);
        console.log(code);
        console.log(signal);
        cluster.fork();

    });
// Code to run if we're in a worker process
} else {

    // other code like setup app and stuff

    var doCron = function() {
        // setup cron jobs...
    }

    // Receive messages from the master process.
    process.on('message', function(msg) {
        console.log('Worker ' + process.pid + ' received message from master.', message);
        if(message.order == "startCron") {
            doCron();
        }
    });
}
1
votes

I also have a problem with cluster module and finally i found sample way to solve problem.

Let master cluster execute cronJob.

My project use Kue to manage jobs. When cronJob run i get a list of jobs.

index.js

global.cluster = require('cluster');

if (cluster.isMaster) {
  const cpuCount = require('os').cpus().length;
  for (let i = 0; i < cpuCount; i += 1) {
    cluster.fork();
  }
} else {
  // start your express server here
  require('./server')
}

cluster.on('exit', worker => {
  logger.warn('Worker %d died :(', worker.id);
  cluster.fork();
});

cron.js

const cron = require('cron').CronJob;

const job = new cron('* * * * *', async () => {
  if (cluster.isMaster) {
    console.log('cron trigger');
  }
});

job.start();

Hope this help.

1
votes

I think you can use the node cluster module, and there you can write your code to run in the master cluster only

const cluster = require('cluster');

if (cluster.isMaster) {
     // Write your code which you want to execute in the master cluster only
}

This is a node way to handle cluster, of course, you can use any tool like pm2 to handle this.