First, let me define what is short-cut fusion for those of you who don't know. Consider the following array transformation in JavaScript:
var a = [1,2,3,4,5].map(square).map(increment);
console.log(a);
function square(x) {
return x * x;
}
function increment(x) {
return x + 1;
}
Here we have an array, [1,2,3,4,5]
, whose elements are first squared, [1,4,9,16,25]
, and then incremented [2,5,10,17,26]
. Hence, although we don't need the intermediate array [1,4,9,16,25]
, we still create it.
Short-cut fusion is an optimization technique which can get rid of intermediate data structures by merging some functions calls into one. For example, short-cut fusion can be applied to the above code to produce:
var a = [1,2,3,4,5].map(compose(square, increment));
console.log(a);
function square(x) {
return x * x;
}
function increment(x) {
return x + 1;
}
function compose(g, f) {
return function (x) {
return f(g(x));
};
}
As you can see, the two separate map
calls have been fused into a single map
call by composing the square
and increment
functions. Hence the intermediate array is not created.
Now, I understand that libraries like Immutable.js and Lazy.js emulate lazy evaluation in JavaScript. Lazy evaluation means that results are only computed when required.
For example, consider the above code. Although we square
and increment
each element of the array, yet we may not need all the results.
Suppose we only want the first 3 results. Using Immutable.js or Lazy.js we can get the first 3 results, [2,5,10]
, without calculating the last 2 results, [17,26]
, because they are not needed.
However, lazy evaluation just delays the calculation of results until required. It does not remove intermediate data structures by fusing functions.
To make this point clear, consider the following code which emulates lazy evaluation:
var List = defclass({
constructor: function (head, tail) {
if (typeof head !== "function" || head.length > 0)
Object.defineProperty(this, "head", { value: head });
else Object.defineProperty(this, "head", { get: head });
if (typeof tail !== "function" || tail.length > 0)
Object.defineProperty(this, "tail", { value: tail });
else Object.defineProperty(this, "tail", { get: tail });
},
map: function (f) {
var l = this;
if (l === nil) return nil;
return cons(function () {
return f(l.head);
}, function () {
return l.tail.map(f);
});
},
take: function (n) {
var l = this;
if (l === nil || n === 0) return nil;
return cons(function () {
return l.head;
}, function () {
return l.tail.take(n - 1);
});
},
mapSeq: function (f) {
var l = this;
if (l === nil) return nil;
return cons(f(l.head), l.tail.mapSeq(f));
}
});
var nil = Object.create(List.prototype);
list([1,2,3,4,5])
.map(trace(square))
.map(trace(increment))
.take(3)
.mapSeq(log);
function cons(head, tail) {
return new List(head, tail);
}
function list(a) {
return toList(a, a.length, 0);
}
function toList(a, length, i) {
if (i >= length) return nil;
return cons(a[i], function () {
return toList(a, length, i + 1);
});
}
function square(x) {
return x * x;
}
function increment(x) {
return x + 1;
}
function log(a) {
console.log(a);
}
function trace(f) {
return function () {
var result = f.apply(this, arguments);
console.log(f.name, JSON.stringify([...arguments]), result);
return result;
};
}
function defclass(prototype) {
var constructor = prototype.constructor;
constructor.prototype = prototype;
return constructor;
}
As you can see, the function calls are interleaved and only the first three elements of the array are processed, proving that the results are indeed computed lazily:
square [1] 1
increment [1] 2
2
square [2] 4
increment [4] 5
5
square [3] 9
increment [9] 10
10
If lazy evaluation is not used then the result would be:
square [1] 1
square [2] 4
square [3] 9
square [4] 16
square [5] 25
increment [1] 2
increment [4] 5
increment [9] 10
increment [16] 17
increment [25] 26
2
5
10
However, if you see the source code then each function list
, map
, take
and mapSeq
returns an intermediate List
data structure. No short-cut fusion is performed.
This brings me to my main question: do libraries like Immutable.js and Lazy.js perform short-cut fusion?
The reason I ask is because according to the documentation, they “apparently” do. However, I am skeptical. I have my doubts whether they actually perform short-cut fusion.
For example, this is taken from the README.md file of Immutable.js:
Immutable
also provides a lazySeq
, allowing efficient chaining of collection methods likemap
andfilter
without creating intermediate representations. Create someSeq
withRange
andRepeat
.
So the developers of Immutable.js claim that their Seq
data structure allows efficient chaining of collection methods like map
and filter
without creating intermediate representations (i.e. they perform short-cut fusion).
However, I don't see them doing so in their code anywhere. Perhaps I can't find it because they are using ES6 and my eyes aren't all too familiar with ES6 syntax.
Furthermore, in their documentation for Lazy Seq they mention:
Seq
describes a lazy operation, allowing them to efficiently chain use of all the Iterable methods (such asmap
andfilter
).Seq is immutable — Once a Seq is created, it cannot be changed, appended to, rearranged or otherwise modified. Instead, any mutative method called on a Seq will return a new Seq.
Seq is lazy — Seq does as little work as necessary to respond to any method call.
So it is established that Seq
is indeed lazy. However, there are no examples to show that intermediate representations are indeed not created (which they claim to be doing).
Moving on to Lazy.js we have the same situation. Thankfully, Daniel Tao wrote a blog post on how Lazy.js works, in which he mentions that at its heart Lazy.js simply does function composition. He gives the following example:
Lazy.range(1, 1000)
.map(square)
.filter(multipleOf3)
.take(10)
.each(log);
function square(x) {
return x * x;
}
function multipleOf3(x) {
return x % 3 === 0;
}
function log(a) {
console.log(a);
}
<script src="https://rawgit.com/dtao/lazy.js/master/lazy.min.js"></script>
Here the map
, filter
and take
functions produce intermediate MappedSequence
, FilteredSequence
and TakeSequence
objects. These Sequence
objects are essentially iterators, which eliminate the need of intermediate arrays.
However, from what I understand, there is still no short-cut fusion taking place. The intermediate array structures are simply replaced with intermediate Sequence
structures which are not fused.
I could be wrong, but I believe that expressions like Lazy(array).map(f).map(g)
produce two separate MappedSequence
objects in which the first MappedSequence
object feeds its values to the second one, instead of the second one replacing the first one by doing the job of both (via function composition).
TLDR: Do Immutable.js and Lazy.js indeed perform short-cut fusion? As far as I know they get rid of intermediate arrays by emulating lazy evaluation via sequence objects (i.e. iterators). However, I believe that these iterators are chained: one iterator feeding its values lazily to the next. They are not merged into a single iterator. Hence they do not “eliminate intermediate representations“. They only transform arrays into constant space sequence objects.
transducers
; originally from Clojure but there is JS library as well (github.com/jlongster/transducers.js). You can easily compose functions and use it on arrays, objects, and even on immutable-js and get the high performance guarantees because transducers create no intermediate collections. Check out this blog post as well (jlongster.com/…). – Brian Park