Published Oct 05, 2019
[
 
]
Let’s say we want to write a module that finds the largest file within a directory
var findLargest = require('./findLargest')
findLargest('./path/to/dir', function (er, filename) {
if (er) return console.error(er)
console.log('largest file was:', filename)
})
Let’s break down the steps to accomplish this:
If an error occurs at any point, callback with that error instead. We also should never call the callback more than once.
var fs = require('fs')
var path = require('path')
module.exports = function (dir, cb) {
fs.readdir(dir, function (er, files) { // [1]
if (er) return cb(er)
var counter = files.length
var errored = false
var stats = []
files.forEach(function (file, index) {
fs.stat(path.join(dir,file), function (er, stat) { // [2]
if (errored) return
if (er) {
errored = true
return cb(er)
}
stats[index] = stat // [3]
if (--counter == 0) { // [4]
var largest = stats
.filter(function (stat) { return stat.isFile() }) // [5]
.reduce(function (prev, next) { // [6]
if (prev.size > next.size) return prev
return next
})
cb(null, files[stats.indexOf(largest)]) // [7]
}
})
})
})
}
This may be a perfectly fine approach to solving this problem. However, its tricky to manage the parallel operation and ensure we only callback once.
Our nested approach can be broken out into tree modular units:
unction getStats (paths, cb) {
var counter = paths.length
var errored = false
var stats = []
paths.forEach(function (path, index) {
fs.stat(path, function (er, stat) {
if (errored) return
if (er) {
errored = true
return cb(er)
}
stats[index] = stat
if (--counter == 0) cb(null, stats)
})
})
}
function getLargestFile (files, stats) {
var largest = stats
.filter(function (stat) { return stat.isFile() })
.reduce(function (prev, next) {
if (prev.size > next.size) return prev
return next
})
return files[stats.indexOf(largest)]
}
var fs = require('fs')
var path = require('path')
module.exports = function (dir, cb) {
fs.readdir(dir, function (er, files) {
if (er) return cb(er)
var paths = files.map(function (file) { // [1]
return path.join(dir,file)
})
getStats(paths, function (er, stats) {
if (er) return cb(er)
var largestFile = getLargestFile(files, stats)
cb(null, largestFile)
})
})
A modular approach makes the reusing and testing method easier. The main export is easier to reason about as well. However, we are still manually managing the parallel stat task.
var fs = require('fs')
var async = require('async')
var path = require('path')
module.exports = function (dir, cb) {
async.waterfall([ // [1]
function (next) {
fs.readdir(dir, next)
},
function (files, next) {
var paths =
files.map(function (file) { return path.join(dir,file) })
async.map(paths, fs.stat, function (er, stats) { // [2]
next(er, files, stats)
})
},
function (files, stats, next) {
var largest = stats
.filter(function (stat) { return stat.isFile() })
.reduce(function (prev, next) {
if (prev.size > next.size) return prev
return next
})
next(null, files[stats.indexOf(largest)])
}
], cb) // [3]
}
async.waterfall
provides a series flow of execution where data from one operation can
be passed to the next function in the series using the next callback.async.map
let us run fs.stat over a set of paths in parallel and calls back with an
array (with order maintained) of the results.The async module guarantees only one callback will be fired. It also propagates errors and manages parallelism for us.
var fs = require('fs')
var path = require('path')
var Q = require('q')
var fs_readdir = Q.denodeify(fs.readdir) // [1]
var fs_stat = Q.denodeify(fs.stat)
module.exports = function (dir) {
return fs_readdir(dir)
.then(function (files) {
var promises = files.map(function (file) {
return fs_stat(path.join(dir,file))
})
return Q.all(promises).then(function (stats) { // [2]
return [files, stats] // [3]
})
})
.then(function (data) { // [4]
var files = data[0]
var stats = data[1]
var largest = stats
.filter(function (stat) { return stat.isFile() })
.reduce(function (prev, next) {
if (prev.size > next.size) return prev
return next
})
return files[stats.indexOf(largest)]
})
Q.all
will run all the stat calls in parallel and the result array order is maintained.var findLargest = require('./findLargest')
findLargest('./path/to/dir')
.then(function (filename) {
console.log('largest file was:', filename)
})
.catch(console.error)
var co = require('co')
var thunkify = require('thunkify')
var fs = require('fs')
var path = require('path')
var readdir = thunkify(fs.readdir)
var stat = thunkify(fs.stat)
module.exports = co(function* (dir) { // [2]
var files = yield readdir(dir) // [3]
var stats = yield files.map(function (file) { // [4]
return stat(path.join(dir,file))
})
var largest = stats
.filter(function (stat) { return stat.isFile() })
.reduce(function (prev, next) {
if (prev.size > next.size) return prev
return next
})
return files[stats.indexOf(largest)] // [5]
})
co
takes a generator function which can be suspended at anytime using the yield
keyword.co
can also handle arrays a set of parallel operations to perform. A result array with
order maintained is assigned to stats.try {
var files = yield readdir(dir)
} catch (er) {
console.error('something happened whilst reading the directory')
}
References: