4
votes

I have a crossfilter with the following data structure being inputted.

project | subproject | cost
data = [
["PrA", "SubPr1", 100],
["PrA", "SubPr2", 150],
["PrA", "SubPr3", 100],
["PrB", "SubPr4", 300],
["PrB", "SubPr5", 500],
["PrC", "SubPr6", 450]]

I can create a barchart that has the summed cost per project:

var ndx = crossfilter(data)
var projDim = ndx.dimension(function(d){return d.project;});
var projGroup = costDim.group().reduceSum(function(d){return d.budget;});

What I want to do is create a dc.js histogram by project cost...so {450: 2, 300: 1}, etc. As far as I can tell, crossfilter can have only attributes of each row be input for a dimension. Is there a way around this?

2
benlaird, if you found an answer, would you mind sharing it?user714403
Histograms in dc.js require a second grouping. My solution currently is to not use dc.js for the histogram. Instead, I use crossfilter to group and then d3 to create a histogram (which does not require the values to be grouped).user714403

2 Answers

4
votes

Accepting the challenge!

It is true, crossfilter does not support this kind of double-reduction, but if you are willing to accept a slight loss of efficiency, you can create "fake dimensions" and "fake groups" with the desired behavior. Luckily, dc.js doesn't use very much of the crossfilter API, so you don't have to implement too many methods.

The first part of the trick is to duplicate the dimension and group so that the new dimension and old dimension will each observe filtering on the other.

The second part is to create the fake groups and dimensions, which walk the bins of the copied group and rebin and refilter based on the values instead of the keys.

A start of a general solution is below. For some charts it is also necessary to implement group.top(), and it is usually okay to just forward that to group.all().

function values_dimension(dim, group) {
    return {
        filter: function(v) {
            if(v !== null)
                throw new Error("don't know how to do this!");
            return dim.filter(null);
        },
        filterFunction: function(f) {
            var f2 = [];
            group.all().forEach(function(kv) {
                if(f(kv.value))
                    f2.push(kv.key);
            });
            dim.filterFunction(function(k) {
                return f2.indexOf(k) >= 0;
            });
            return this;
        }
    };
}

function values_group(group) {
    return {
        all: function() {
            var byv = [];
            group.all().forEach(function(kv) {
                if(kv.value === 0)
                    return;
                byv[kv.value] = (byv[kv.value] || 0) + 1;
            });
            var all2 = [];
            byv.forEach(function(d, i) {
                all2.push({key: i, value: d});
            });
            return all2;
        }
    };
}

// duplicate the dimension & group so each will observe filtering on the other
var projDim2 = ndx.dimension(function(d){return d.project;});
var projGroup2 = projDim2.group().reduceSum(function(d){return d.budget;});
var countBudgetDim = values_dimension(projDim2, projGroup2),
    countBudgetGroup = values_group(projGroup2);

jsfiddle here: http://jsfiddle.net/gordonwoodhull/55zf7L1L/

1
votes

enter image description hereJSFillde Link

Denormalize + Map-reduce. Note the data already include the cost per project as the 4th column ( and this can be pre-calculated easily). It's a hack, but hopefully an easy one in order to get DC.js and crossfilter works without too much change.

var data = [
    ["PrA", "SubPr1", 100, 450],
    ["PrA", "SubPr2", 150, 450],
    ["PrA", "SubPr3", 200, 450],
    ["PrB", "SubPr4", 300, 800],
    ["PrB", "SubPr5", 500, 800],
    ["PrC", "SubPr6", 450, 450]
];

var newdata = data.map(function (d) {
    return {
        project: d[0],
        subproject: d[1],
        budget: d[2],
        cost: d[3]
    };
})

var ndx = crossfilter(newdata),
    costDim = ndx.dimension(function (d) {
        return d.cost;
    }),

    visitedProj = {},
    costGroup = costDim.group().reduce(function (p, v) {

        if (visitedProj[v.project]) return p;
        console.info(v.project);
        visitedProj[v.project] = true;
        return p + 1;
    }, null, function () {
        return 0;
    });

dc.rowChart("#costChart")
    .renderLabel(true)
    .dimension(costDim)
    .group(costGroup)
    .xAxis().ticks(2);

dc.renderAll();

Map-Reduce can be very powerful and the API can be accessed from here. JSFillde Link