316
votes

Any ideas on an async directory search using fs.readdir? I realize that we could introduce recursion and call the read directory function with the next directory to read, but I'm a little worried about it not being async...

Any ideas? I've looked at node-walk which is great, but doesn't give me just the files in an array, like readdir does. Although

Looking for output like...

['file1.txt', 'file2.txt', 'dir/file3.txt']
30

30 Answers

409
votes

There are basically two ways of accomplishing this. In an async environment you'll notice that there are two kinds of loops: serial and parallel. A serial loop waits for one iteration to complete before it moves onto the next iteration - this guarantees that every iteration of the loop completes in order. In a parallel loop, all the iterations are started at the same time, and one may complete before another, however, it is much faster than a serial loop. So in this case, it's probably better to use a parallel loop because it doesn't matter what order the walk completes in, just as long as it completes and returns the results (unless you want them in order).

A parallel loop would look like this:

var fs = require('fs');
var path = require('path');
var walk = function(dir, done) {
  var results = [];
  fs.readdir(dir, function(err, list) {
    if (err) return done(err);
    var pending = list.length;
    if (!pending) return done(null, results);
    list.forEach(function(file) {
      file = path.resolve(dir, file);
      fs.stat(file, function(err, stat) {
        if (stat && stat.isDirectory()) {
          walk(file, function(err, res) {
            results = results.concat(res);
            if (!--pending) done(null, results);
          });
        } else {
          results.push(file);
          if (!--pending) done(null, results);
        }
      });
    });
  });
};

A serial loop would look like this:

var fs = require('fs');
var path = require('path');
var walk = function(dir, done) {
  var results = [];
  fs.readdir(dir, function(err, list) {
    if (err) return done(err);
    var i = 0;
    (function next() {
      var file = list[i++];
      if (!file) return done(null, results);
      file = path.resolve(dir, file);
      fs.stat(file, function(err, stat) {
        if (stat && stat.isDirectory()) {
          walk(file, function(err, res) {
            results = results.concat(res);
            next();
          });
        } else {
          results.push(file);
          next();
        }
      });
    })();
  });
};

And to test it out on your home directory (WARNING: the results list will be huge if you have a lot of stuff in your home directory):

walk(process.env.HOME, function(err, results) {
  if (err) throw err;
  console.log(results);
});

EDIT: Improved examples.

237
votes

This one uses the maximum amount of new, buzzwordy features available in node 8, including Promises, util/promisify, destructuring, async-await, map+reduce and more, making your co-workers scratch their heads as they try to figure out what is going on.

Node 8+

No external dependencies.

const { promisify } = require('util');
const { resolve } = require('path');
const fs = require('fs');
const readdir = promisify(fs.readdir);
const stat = promisify(fs.stat);

async function getFiles(dir) {
  const subdirs = await readdir(dir);
  const files = await Promise.all(subdirs.map(async (subdir) => {
    const res = resolve(dir, subdir);
    return (await stat(res)).isDirectory() ? getFiles(res) : res;
  }));
  return files.reduce((a, f) => a.concat(f), []);
}

Usage

getFiles(__dirname)
  .then(files => console.log(files))
  .catch(e => console.error(e));

Node 10.10+

Updated for node 10+ with even more whizbang:

const { resolve } = require('path');
const { readdir } = require('fs').promises;

async function getFiles(dir) {
  const dirents = await readdir(dir, { withFileTypes: true });
  const files = await Promise.all(dirents.map((dirent) => {
    const res = resolve(dir, dirent.name);
    return dirent.isDirectory() ? getFiles(res) : res;
  }));
  return Array.prototype.concat(...files);
}

Note that starting with node 11.15.0 you can use files.flat() instead of Array.prototype.concat(...files) to flatten the files array.

Node 11+

If you want to blow everybody's head up completely, you can use the following version using async iterators. In addition to being really cool, it also allows consumers to pull results one-at-a-time, making it better suited for really large directories.

const { resolve } = require('path');
const { readdir } = require('fs').promises;

async function* getFiles(dir) {
  const dirents = await readdir(dir, { withFileTypes: true });
  for (const dirent of dirents) {
    const res = resolve(dir, dirent.name);
    if (dirent.isDirectory()) {
      yield* getFiles(res);
    } else {
      yield res;
    }
  }
}

Usage has changed because the return type is now an async iterator instead of a promise

;(async () => {
  for await (const f of getFiles('.')) {
    console.log(f);
  }
})()

In case somebody is interested, I've written more about async iterators here: https://qwtel.com/posts/software/async-generators-in-the-wild/

129
votes

Just in case anyone finds it useful, I also put together a synchronous version.

var walk = function(dir) {
    var results = [];
    var list = fs.readdirSync(dir);
    list.forEach(function(file) {
        file = dir + '/' + file;
        var stat = fs.statSync(file);
        if (stat && stat.isDirectory()) { 
            /* Recurse into a subdirectory */
            results = results.concat(walk(file));
        } else { 
            /* Is a file */
            results.push(file);
        }
    });
    return results;
}

Tip: To use less resources when filtering. Filter within this function itself. E.g. Replace results.push(file); with below code. Adjust as required:

    file_type = file.split(".").pop();
    file_name = file.split(/(\\|\/)/g).pop();
    if (file_type == "json") results.push(file);
88
votes

A. Have a look at the file module. It has a function called walk:

file.walk(start, callback)

Navigates a file tree, calling callback for each directory, passing in (null, dirPath, dirs, files).

This may be for you! And yes, it is async. However, I think you would have to aggregate the full path's yourself, if you needed them.

B. An alternative, and even one of my favourites: use the unix find for that. Why do something again, that has already been programmed? Maybe not exactly what you need, but still worth checking out:

var execFile = require('child_process').execFile;
execFile('find', [ 'somepath/' ], function(err, stdout, stderr) {
  var file_list = stdout.split('\n');
  /* now you've got a list with full path file names */
});

Find has a nice build-in caching mechanism that makes subsequent searches very fast, as long as only few folder have changed.

43
votes

Another nice npm package is glob.

npm install glob

It is very powerful and should cover all your recursing needs.

Edit:

I actually wasn't perfectly happy with glob, so I created readdirp.

I'm very confident that its API makes finding files and directories recursively and applying specific filters very easy.

Read through its documentation to get a better idea of what it does and install via:

npm install readdirp

42
votes

I recommend using node-glob to accomplish that task.

var glob = require( 'glob' );  

glob( 'dirname/**/*.js', function( err, files ) {
  console.log( files );
});
15
votes

If you want to use an npm package, wrench is pretty good.

var wrench = require("wrench");

var files = wrench.readdirSyncRecursive("directory");

wrench.readdirRecursive("directory", function (error, files) {
    // live your dreams
});

EDIT (2018):
Anyone reading through in recent time: The author deprecated this package in 2015:

wrench.js is deprecated, and hasn't been updated in quite some time. I heavily recommend using fs-extra to do any extra filesystem operations.

10
votes

With Recursion

var fs = require('fs')
var path = process.cwd()
var files = []

var getFiles = function(path, files){
    fs.readdirSync(path).forEach(function(file){
        var subpath = path + '/' + file;
        if(fs.lstatSync(subpath).isDirectory()){
            getFiles(subpath, files);
        } else {
            files.push(path + '/' + file);
        }
    });     
}

Calling

getFiles(path, files)
console.log(files) // will log all files in directory
9
votes

I loved the answer from chjj above and would not have been able to create my version of the parallel loop without that start.

var fs = require("fs");

var tree = function(dir, done) {
  var results = {
        "path": dir
        ,"children": []
      };
  fs.readdir(dir, function(err, list) {
    if (err) { return done(err); }
    var pending = list.length;
    if (!pending) { return done(null, results); }
    list.forEach(function(file) {
      fs.stat(dir + '/' + file, function(err, stat) {
        if (stat && stat.isDirectory()) {
          tree(dir + '/' + file, function(err, res) {
            results.children.push(res);
            if (!--pending){ done(null, results); }
          });
        } else {
          results.children.push({"path": dir + "/" + file});
          if (!--pending) { done(null, results); }
        }
      });
    });
  });
};

module.exports = tree;

I created a Gist as well. Comments welcome. I am still starting out in the NodeJS realm so that is one way I hope to learn more.

8
votes

Use node-dir to produce exactly the output you like

var dir = require('node-dir');

dir.files(__dirname, function(err, files) {
  if (err) throw err;
  console.log(files);
  //we have an array of files now, so now we can iterate that array
  files.forEach(function(path) {
    action(null, path);
  })
});
7
votes

Async

const fs = require('fs')
const path = require('path')

const readdir = (p, done, a = [], i = 0) => fs.readdir(p, (e, d = []) =>
  d.map(f => readdir(a[a.push(path.join(p, f)) - 1], () =>
    ++i == d.length && done(a), a)).length || done(a))

readdir(__dirname, console.log)

Sync

const fs = require('fs')
const path = require('path')

const readdirSync = (p, a = []) => {
  if (fs.statSync(p).isDirectory())
    fs.readdirSync(p).map(f => readdirSync(a[a.push(path.join(p, f)) - 1], a))
  return a
}

console.log(readdirSync(__dirname))

Async readable

function readdir (currentPath, done, allFiles = [], i = 0) {
  fs.readdir(currentPath, function (e, directoryFiles = []) {
    if (!directoryFiles.length)
      return done(allFiles)
    directoryFiles.map(function (file) {
      var joinedPath = path.join(currentPath, file)
      allFiles.push(joinedPath)
      readdir(joinedPath, function () {
        i = i + 1
        if (i == directoryFiles.length)
          done(allFiles)}
      , allFiles)
    })
  })
}

readdir(__dirname, console.log)

Note: both versions will follow symlinks (same as the original fs.readdir)

4
votes

I've coded this recently, and thought it would make sense to share this here. The code makes use of the async library.

var fs = require('fs');
var async = require('async');

var scan = function(dir, suffix, callback) {
  fs.readdir(dir, function(err, files) {
    var returnFiles = [];
    async.each(files, function(file, next) {
      var filePath = dir + '/' + file;
      fs.stat(filePath, function(err, stat) {
        if (err) {
          return next(err);
        }
        if (stat.isDirectory()) {
          scan(filePath, suffix, function(err, results) {
            if (err) {
              return next(err);
            }
            returnFiles = returnFiles.concat(results);
            next();
          })
        }
        else if (stat.isFile()) {
          if (file.indexOf(suffix, file.length - suffix.length) !== -1) {
            returnFiles.push(filePath);
          }
          next();
        }
      });
    }, function(err) {
      callback(err, returnFiles);
    });
  });
};

You can use it like this:

scan('/some/dir', '.ext', function(err, files) {
  // Do something with files that ends in '.ext'.
  console.log(files);
});
4
votes

A library called Filehound is another option. It will recursively search a given directory (working directory by default). It supports various filters, callbacks, promises and sync searches.

For example, search the current working directory for all files (using callbacks):

const Filehound = require('filehound');

Filehound.create()
.find((err, files) => {
    if (err) {
        return console.error(`error: ${err}`);
    }
    console.log(files); // array of files
});

Or promises and specifying a specific directory:

const Filehound = require('filehound');

Filehound.create()
.paths("/tmp")
.find()
.each(console.log);

Consult the docs for further use cases and examples of usage: https://github.com/nspragg/filehound

Disclaimer: I'm the author.

4
votes

Using async/await, this should work:

const FS = require('fs');
const readDir = promisify(FS.readdir);
const fileStat = promisify(FS.stat);

async function getFiles(dir) {
    let files = await readDir(dir);

    let result = files.map(file => {
        let path = Path.join(dir,file);
        return fileStat(path).then(stat => stat.isDirectory() ? getFiles(path) : path);
    });

    return flatten(await Promise.all(result));
}

function flatten(arr) {
    return Array.prototype.concat(...arr);
}

You can use bluebird.Promisify or this:

/**
 * Returns a function that will wrap the given `nodeFunction`. Instead of taking a callback, the returned function will return a promise whose fate is decided by the callback behavior of the given node function. The node function should conform to node.js convention of accepting a callback as last argument and calling that callback with error as the first argument and success value on the second argument.
 *
 * @param {Function} nodeFunction
 * @returns {Function}
 */
module.exports = function promisify(nodeFunction) {
    return function(...args) {
        return new Promise((resolve, reject) => {
            nodeFunction.call(this, ...args, (err, data) => {
                if(err) {
                    reject(err);
                } else {
                    resolve(data);
                }
            })
        });
    };
};

Node 8+ has Promisify built-in

See my other answer for a generator approach that can give results even faster.

3
votes

Check out the final-fs library. It provides a readdirRecursive function:

ffs.readdirRecursive(dirPath, true, 'my/initial/path')
    .then(function (files) {
        // in the `files` variable you've got all the files
    })
    .otherwise(function (err) {
        // something went wrong
    });
3
votes

qwtel's answer variant, in TypeScript

import { resolve } from 'path';
import { readdir } from 'fs/promises';

async function* getFiles(dir: string): AsyncGenerator<string> {
    const entries = await readdir(dir, { withFileTypes: true });
    for (const entry of entries) {
        const res = resolve(dir, entry.name);
        if (entry.isDirectory()) {
            yield* getFiles(res);
        } else {
            yield res;
        }
    }
}
2
votes

Standalone promise implementation

I am using the when.js promise library in this example.

var fs = require('fs')
, path = require('path')
, when = require('when')
, nodefn = require('when/node/function');

function walk (directory, includeDir) {
    var results = [];
    return when.map(nodefn.call(fs.readdir, directory), function(file) {
        file = path.join(directory, file);
        return nodefn.call(fs.stat, file).then(function(stat) {
            if (stat.isFile()) { return results.push(file); }
            if (includeDir) { results.push(file + path.sep); }
            return walk(file, includeDir).then(function(filesInDir) {
                results = results.concat(filesInDir);
            });
        });
    }).then(function() {
        return results;
    });
};

walk(__dirname).then(function(files) {
    console.log(files);
}).otherwise(function(error) {
    console.error(error.stack || error);
});

I've included an optional parameter includeDir which will include directories in the file listing if set to true.

2
votes

klaw and klaw-sync are worth considering for this sort of thing. These were part of node-fs-extra.

2
votes

Simple, Async Promise Based


const fs = require('fs/promises');
const getDirRecursive = async (dir) => {
    try {
        const items = await fs.readdir(dir);
        let files = [];
        for (const item of items) {
            if ((await fs.lstat(`${dir}/${item}`)).isDirectory()) files = [...files, ...(await getDirRecursive(`${dir}/${item}`))];
            else files.push({file: item, path: `${dir}/${item}`, parents: dir.split("/")});
        }
        return files;
    } catch (e) {
        return e
    }
};

Usage: await getDirRecursive("./public");

1
votes

Here's yet another implementation. None of the above solutions have any limiters, and so if your directory structure is large, they're all going to thrash and eventually run out of resources.

var async = require('async');
var fs = require('fs');
var resolve = require('path').resolve;

var scan = function(path, concurrency, callback) {
    var list = [];

    var walker = async.queue(function(path, callback) {
        fs.stat(path, function(err, stats) {
            if (err) {
                return callback(err);
            } else {
                if (stats.isDirectory()) {
                    fs.readdir(path, function(err, files) {
                        if (err) {
                            callback(err);
                        } else {
                            for (var i = 0; i < files.length; i++) {
                                walker.push(resolve(path, files[i]));
                            }
                            callback();
                        }
                    });
                } else {
                    list.push(path);
                    callback();
                }
            }
        });
    }, concurrency);

    walker.push(path);

    walker.drain = function() {
        callback(list);
    }
};

Using a concurrency of 50 works pretty well, and is almost as fast as simpler implementations for small directory structures.

1
votes

The recursive-readdir module has this functionality.

1
votes

I modified Trevor Senior's Promise based answer to work with Bluebird

var fs = require('fs'),
    path = require('path'),
    Promise = require('bluebird');

var readdirAsync = Promise.promisify(fs.readdir);
var statAsync = Promise.promisify(fs.stat);
function walkFiles (directory) {
    var results = [];
    return readdirAsync(directory).map(function(file) {
        file = path.join(directory, file);
        return statAsync(file).then(function(stat) {
            if (stat.isFile()) {
                return results.push(file);
            }
            return walkFiles(file).then(function(filesInDir) {
                results = results.concat(filesInDir);
            });
        });
    }).then(function() {
        return results;
    });
}

//use
walkDir(__dirname).then(function(files) {
    console.log(files);
}).catch(function(e) {
    console.error(e); {
});
1
votes

For fun, here is a flow based version that works with highland.js streams library. It was co-authored by Victor Vu.

###
  directory >---m------> dirFilesStream >---------o----> out
                |                                 |
                |                                 |
                +--------< returnPipe <-----------+

  legend: (m)erge  (o)bserve

 + directory         has the initial file
 + dirListStream     does a directory listing
 + out               prints out the full path of the file
 + returnPipe        runs stat and filters on directories

###

_ = require('highland')
fs = require('fs')
fsPath = require('path')

directory = _(['someDirectory'])
mergePoint = _()
dirFilesStream = mergePoint.merge().flatMap((parentPath) ->
  _.wrapCallback(fs.readdir)(parentPath).sequence().map (path) ->
    fsPath.join parentPath, path
)
out = dirFilesStream
# Create the return pipe
returnPipe = dirFilesStream.observe().flatFilter((path) ->
  _.wrapCallback(fs.stat)(path).map (v) ->
    v.isDirectory()
)
# Connect up the merge point now that we have all of our streams.
mergePoint.write directory
mergePoint.write returnPipe
mergePoint.end()
# Release backpressure.  This will print files as they are discovered
out.each H.log
# Another way would be to queue them all up and then print them all out at once.
# out.toArray((files)-> console.log(files))
1
votes

Using Promises (Q) to solve this in a Functional style:

var fs = require('fs'),
    fsPath = require('path'),
    Q = require('q');

var walk = function (dir) {
  return Q.ninvoke(fs, 'readdir', dir).then(function (files) {

    return Q.all(files.map(function (file) {

      file = fsPath.join(dir, file);
      return Q.ninvoke(fs, 'lstat', file).then(function (stat) {

        if (stat.isDirectory()) {
          return walk(file);
        } else {
          return [file];
        }
      });
    }));
  }).then(function (files) {
    return files.reduce(function (pre, cur) {
      return pre.concat(cur);
    });
  });
};

It returns a promise of an array, so you can use it as:

walk('/home/mypath').then(function (files) { console.log(files); });
1
votes

I must add the Promise-based sander library to the list.

 var sander = require('sander');
 sander.lsr(directory).then( filenames => { console.log(filenames) } );
1
votes

Using bluebird promise.coroutine:

let promise = require('bluebird'),
    PC = promise.coroutine,
    fs = promise.promisifyAll(require('fs'));
let getFiles = PC(function*(dir){
    let files = [];
    let contents = yield fs.readdirAsync(dir);
    for (let i = 0, l = contents.length; i < l; i ++) {
        //to remove dot(hidden) files on MAC
        if (/^\..*/.test(contents[i])) contents.splice(i, 1);
    }
    for (let i = 0, l = contents.length; i < l; i ++) {
        let content = path.resolve(dir, contents[i]);
        let contentStat = yield fs.statAsync(content);
        if (contentStat && contentStat.isDirectory()) {
            let subFiles = yield getFiles(content);
            files = files.concat(subFiles);
        } else {
            files.push(content);
        }
    }
    return files;
});
//how to use
//easy error handling in one place
getFiles(your_dir).then(console.log).catch(err => console.log(err));
1
votes

Whoever wants a synchronous alternative to the accepted answer (I know I did):

var fs = require('fs');
var path = require('path');
var walk = function(dir) {
    let results = [], err = null, list;
    try {
        list = fs.readdirSync(dir)
    } catch(e) {
        err = e.toString();
    }
    if (err) return err;
    var i = 0;
    return (function next() {
        var file = list[i++];

        if(!file) return results;
        file = path.resolve(dir, file);
        let stat = fs.statSync(file);
        if (stat && stat.isDirectory()) {
          let res = walk(file);
          results = results.concat(res);
          return next();
        } else {
          results.push(file);
           return next();
        }

    })();

};

console.log(
    walk("./")
)
1
votes

There is a new module called cup-readdir that recursively searches directories very fast. It uses asynchronous promises and outperforms many popular modules when dealing with deep directory structures.

It can return all files in an array and sort them by their properties, but lacks features like file filtering and entering symlinked directories. This could be useful for large projects where you simply want to get every file from a directory. Here is a link to their project homepage.

1
votes

Here is a simple synchronous recursive solution

const fs = require('fs')

const getFiles = path => {
    const files = []
    for (const file of fs.readdirSync(path)) {
        const fullPath = path + '/' + file
        if(fs.lstatSync(fullPath).isDirectory())
            getFiles(fullPath).forEach(x => files.push(file + '/' + x))
        else files.push(file)
    }
    return files
}

Usage:

const files = getFiles(process.cwd())

console.log(files)

You could write it asynchronously, but there is no need. Just make sure that the input directory exists and is accessible.

0
votes

Because everyone should write his own, I made one.

walk(dir, cb, endCb) cb(file) endCb(err | null)

DIRTY

module.exports = walk;

function walk(dir, cb, endCb) {
  var fs = require('fs');
  var path = require('path');

  fs.readdir(dir, function(err, files) {
    if (err) {
      return endCb(err);
    }

    var pending = files.length;
    if (pending === 0) {
      endCb(null);
    }
    files.forEach(function(file) {
      fs.stat(path.join(dir, file), function(err, stats) {
        if (err) {
          return endCb(err)
        }

        if (stats.isDirectory()) {
          walk(path.join(dir, file), cb, function() {
            pending--;
            if (pending === 0) {
              endCb(null);
            }
          });
        } else {
          cb(path.join(dir, file));
          pending--;
          if (pending === 0) {
            endCb(null);
          }
        }
      })
    });

  });
}