1
votes

In the project, I have a loop going through a list of urls. It downloads file from every url and do some post process over the downloaded file.

After the all the process done (both download process and post process), I want to execute a callback function. Because post process includes some streaming task, it has close event. If the last item can be identified, I can pass the callback function to the close event. However, since the loop is async, I can't track which item is done at last.

For now, I use a 5 second timeout to make sure the callback is executed after the whole process. Obviously, this is not sustainable. What's a good way to handle this?

loop code:

exports.processArray = (items, process, callback) => {
    var todo = items.concat();
    setTimeout(function() {
        process(todo.shift());
        if(todo.length > 0) {
          // execute download and post process each second
          // however it doesn't guarantee one start after previous one done
          setTimeout(arguments.callee, 1000);
        } else {
          setTimeout(() => {callback();}, 5000);
        }
    }, 1000);
};

processArray(
  // First param, the array
  urlList,
  // Second param, download and post process
  (url) => {
    if(url.startsWith('http')) {
      getDataReg(url, uid);
    }
    else if(url.startsWith('ftp')) {
      getDataFtp(url, uid);
    }
    else {
      console.log('not a valid resource');
    }
  },
  // Third param, callback to be executed after all done
  () => {
    Request.get(`${config.demouri}bound=${request.query.boundary};uid=${uid}`, {
      method: 'GET',
      auth: auth
    })
    .on('response', (response) => {
      console.log('response event emmits');
      zipFiles(uid)
      .then((path) => {
        reply.file(path, { confine: false, filename: uid + '.zip', mode: 'inline'}).header('Content-Disposition');
      });
    });
  }
);

Download and post process:

exports.getDataFtp = (url, uid) => {
  console.log('get into ftp');
  var usefulUrl = url.split('//')[1];
  var spliter = usefulUrl.indexOf('/');
  var host = usefulUrl.substring(0, spliter);
  var dir = usefulUrl.substring(spliter+1, usefulUrl.length);
  var client = new ftp();
  var connection = {
    host: host
  };
  var fileNameStart = dir.lastIndexOf('/') + 1;
  var fileNameEnd = dir.length;
  var fileName = dir.substring(fileNameStart, fileNameEnd);
  console.log('filename: ', fileName);

  client.on('ready', () => {
    console.log('get into ftp ready');
    client.get(dir, (err, stream) => {
      if (err) {
        console.log('get file err:', err);
        return;
      } else{
        console.log('get into ftp get');
        stream.pipe(fs.createWriteStream(datadir + `download/${uid}/${fileName}`));
        stream.on('end', () => {
          console.log('get into ftp close');
          unzipData(datadir + `download/${uid}/`, fileName, uid);
          client.end();
        });
      }
    });
  });
  client.connect(connection);
};

exports.getDataReg = (url, uid) => {
  console.log('get into http');
    var fileNameStart = url.lastIndexOf('/') + 1;
  var fileNameEnd = url.length;
  var fileName = url.substring(fileNameStart, fileNameEnd);
    var file = fs.createWriteStream(datadir + `download/${uid}/${fileName}`);
    if (url.startsWith('https')) {
    https.get(url, (response) => {
      console.log('start piping file');
      response.pipe(file);
      file.on('finish', () => {
        console.log('get into http finish');
        unzipData(datadir + `download/${uid}/`, fileName, uid);
      });
    }).on('error', (err) => { // Handle errors
      fs.unlink(datadir + `download/${uid}/${fileName}`);
      console.log('download file err: ', err);
    });
    } else {
    http.get(url, (response) => {
      console.log('start piping file');
      response.pipe(file);
      file.on('finish', () => {
        unzipData(datadir + `download/${uid}/`, fileName, uid);
      });
    }).on('error', (err) => {
      fs.unlink(datadir + `download/${uid}/${fileName}`);
      console.log('download file err: ', err);
    });
    }
};

function unzipData(path, fileName, uid) {
  console.log('get into unzip');
  console.log('creating: ', path + fileName);
    fs.createReadStream(path + fileName)
    .pipe(unzip.Extract({path: path}))
    .on('close', () => {
    console.log('get into unzip close');
    var filelist = listFile(path);
    filelist.forEach((filePath) => {
      if (!filePath.endsWith('.zip')) {
        var components = filePath.split('/');
        var component = components[components.length-1];
        mv(filePath, datadir + `processing/${uid}/${component}`, (err) => {
          if(err) {
            console.log('move file err: ');
          } else {
            console.log('move file done');
          }
        });
      }
    });
    fs.unlink(path + fileName, (err) => {});
    });
}
3
handle it by handling asynchronous code "correctly" - use callbacks or promisesJaromanda X
You can use Promise constructor within processArray() call. Not sure why nodejs has not implemented the streams standard to use Promise instead of callback. This Question has appeared several times over the past week or so.guest271314
Could you answer the question with code example?zhangjinzhou
If I use callback, when should I execute the callback? @JaromandaXzhangjinzhou

3 Answers

1
votes

What you want to do is to make all your asynchronous processes converge into a single promise that you can use to execute the callback at the correct moment.

Lets start at the point each process is complete, which I assume is in the callback passed to the mv() function in unzipData(). You want to wrap each of these asynchronous actions in a Promise that resolves in the callback and you also want to use these promises later and for that you use the .map() method to collect the promises in an array (instead of .forEach()).
Here's the code:

var promises = filelist.map((filePath) => {
  if (!filePath.endsWith('.zip')) {
    var components = filePath.split('/');
    var component = components[components.length-1];
    return new Promise((resolve, reject) =>
      mv(filePath, datadir + `processing/${uid}/${component}`, (err) => {
        if(err) {
          console.log('move file err: ');
          reject(); // Or resolve() if you want to ignore the error and not cause it to prevent the callback from executing later
        } else {
          console.log('move file done');
          resolve();
        }
      }));
  }
  return Promise.resolve();
});

(if the asynchronous action is not to be executed, a Promise that resolves immediately is returned instead)

Now, we can turn this list of Promises into a single Promise that resolves when all of the promises in the list has resolved:

var allPromise = Promise.all(promises);

Next, we need to look further up in the code. We can see that the code we've just been looking at is itself part of an event handler of an asynchronous action, i.e. fs.createReadStream(). You need to wrap that in a promise that gets resolved when the inner promises resolve and this is the promise that the unzipData() function shall return:

function unzipData(path, fileName, uid) {
  console.log('get into unzip');
  console.log('creating: ', path + fileName);
  return new Promise((outerResolve) =>
    fs.createReadStream(path + fileName)
    .pipe(unzip.Extract({path: path}))
    .on('close', () => {
      console.log('get into unzip close');
      var filelist = listFile(path);

      // Code from previous examples

      allPromise.then(outerResolve);
    }));
}

Next, we look at the functions that use unzipData(): getDataReg() and getDataFtp(). They only perform one asynchronous action so all you need to do is to make them return a promise that resolves when the promise returned by unzipData() resolves.
Simplified example:

exports.getDataReg = (url, uid) => {
  return new Promise((resolve, reject) => {

    // ...

    https.get(url, (response) => {
      response.pipe(file);
      file.on('finish', () => {
        unzipData(datadir + `download/${uid}/`, fileName, uid)
          .then(resolve);
      });
    }).on('error', (err) => { // Handle errors
      fs.unlink(datadir + `download/${uid}/${fileName}`);
      reject(); // Or resolve() if you want to ignore the error and not cause it to prevent the callback from executing later
    });

    // ...

  });
}

Finally, we get to the processArray() function and here you need to do the same thing we did to begin with: map the processes into a list of promises. First, the process function passed needs to return the promises returned by getDataReg() and getDataFtp():

// Second param, download and post process
(url) => {
  if(url.startsWith('http')) {
    return getDataReg(url, uid);
  }
  else if(url.startsWith('ftp')) {
    return getDataFtp(url, uid);
  }
  else {
    console.log('not a valid resource');
  }
  return Promise.reject(); // or Promise.resolve() if you want invalid resources to be ignored and not prevent the callback from executing later
}

Now, your processArray() function can look like this:

exports.processArray = (items, process, callback) =>
  Promise.all(items.map(process))
    .then(callback)
    .catch(() => console.log('Something went wrong somewhere'));

Your callback will get invoked when all asynchronous actions have completed, regardless of in which order they do. If any one of the promises rejects, the callback will never be executed so manage your promise rejections accordingly.

Here's a JSFiddle with the complete code: https://jsfiddle.net/upn4yqsw/

2
votes

After the all the process done (both download process and post process), I want to execute a callback function.

The interesting thing about a series of asynchronous processes is that you can never know when exactly all processes will complete. So setting a timeout for the callback is quick&dirty way to do it, but it's not reliable for sure.

You can instead use a counter to solve this problem. Let's say you have 10 operations to perform. At the beginning you set your counter to ten counter = 10 And after each process is completed, regardless how (it can either succeed or fail), you can decrement the counter by 1 like counter -= 1 and right after it you can check if the counter is 0, if so that means all processes are completed and we reached the end. You can now safely run your callback function, like if(counter === 0) callback();


If I were you, I would do something like this:

*Notice that the called process should return a promise, so that I can know when it finishes (again regardless how)

*If you need help about promises, this useful article might help you: https://howtonode.org/promises

*Oh and one more thing, you should avoid using arguments.callee, because it's deprecated. Here is why Why was the arguments.callee.caller property deprecated in JavaScript?

exports.processArray = (items, process, callback) => {
    var todo = [].concat(items);
    var counter = todo.length;

    runProcess();

    function runProcess() {
      // Check if the counter already reached 0
      if(checkCounter() === false) {
        // Nope. Counter is still > 0, which means we got work to do.
        var processPromise = process(todo.shift());

        processPromise
          .then(function() {
            // success
          })
          .catch(function() {
            // failure
          })
          .finally(function() {
            // The previous process is done. 
            // Now we can go with the next one.
            --counter;
            runProcess();
          })
      }
    };

    function checkCounter() {
      if(counter === 0) {
        callback();
        return true;
      } else {
        return false;
      }
    }
};
0
votes

In general, since nodejs does not appear to have implemented Streams Standard to be Promise based, at least from what can gather; but rather, uses an event based or callback mechanism, you can use Promise constructor within function call, to return a fulfilled Promise object when a specific event has been dispatched

const doStuff = (...args) => new Promise((resolve, reject)) => {
  /* define and do stream stuff */
  doStreamStuff.on(/* "close", "end" */, => {
    // do stuff
    resolve(/* value */)
  })
});

doStuff(/* args */)
.then(data => {})
.catch(err => {})