2
votes

I'm attempting to continue building a baseball statistics dashboard and seem to have hit a wall once again.

I have an array of records representing statistics for a game and would like to produce a barchart with a single bar that represents the batting average for the selected date range in the first chart on the page.

I received help on creating the first chart which shows the cumulative hit total for the selected date range. The approach was to change the groupAll function to respond to the all() function call rather than .value() using the regularize_groupAll function.

Approach 1:

When trying to use the same approach I configured the graph/group to calculate the total contribution to the total average for each day (In this example there are three games where the average for the day for the hitter for each day is .500 but due to different totals in the hits and abs, each game contributes a different amount to the cumulative average). This is not desired because I would like the graph to show the actual average for the time period selected (If one game is selected I want the graph to show .500, same for two games, same for all three since the cumulative average for all possible date combos is .500).

var avgChart = dc.barChart("#avg-chart");

function avg(totalAbs, dim) {
  return dim.groupAll().reduceSum(function(d) {
  return (d.h / d.ab) * (d.ab / totalAbs);
 });
}

var totalAbs = abDim.groupAll().reduceSum(function(d){ return d.ab }).value();
var totalAvg = avg(totalAbs, abDim);

var regTotalAvg = regularize_groupAll(totalAvg);
avgChart
 .width(200)
 .height(HEIGHT + 30)
 .x(d3.scale.ordinal().domain(["Avg"]))
 .xUnits(dc.units.ordinal)
 .y(d3.scale.linear().domain([0, totalAvg.value()]))
 .yAxisLabel("")
 .centerBar(true)
 .dimension(abDim)
 .brushOn(false)
 .alwaysUseRounding(true)
 .group(regTotalAvg);

 avgChart.render();

Approach 2:

After looking over Reductio's documentation I thought I might be able to use the .groupAll(groupingFunction). The way to partially solve the problem is to use the function to have the games from all previous dates included in the current date's calculations. I am able to get the correct number of hits using the .sum(d.h) function but I currently can't modify the count to be the correct number (d.ab).

groupAll = dateDim.groupAll();
var dateArray = [new Date( 2016,3,4) ,new Date( 2016,3,5) ,new Date( 2016,3,6)];

reducer = reductio()
  .groupAll(function(record) {
    var datesToInclude = new Array();
    for(i = record.index; i < dateArray.length; i++) {
      datesToInclude.push(dateArray[i]);
    }
   return datesToInclude;
  })
  .count(true)
  .sum(function(d){ return d.h });

reducer(groupAll);
console.log(groupAll.value());

Approach 3:

Approach 3 was to try to create custom reduce functions and feed them to a groupAll().reduce(reduceAdd, reduceRemove, reduceInitial) function. This attempt at first did not produce a graph. After adding a .valueAccessor( function(p) {return p.value.count > 0 ? p.value.total / p.value.count : 0 }); call to the end of the function the total average was drawn but after setting breakpoints I discovered the reduceRemove function was never called after moving the brush to filter dates out.

var avgChart = dc.barChart("#avg-chart")

function reduceAdd(p, v) {
  p.count += v.ab;
  p.total += v.h;
  return p;
}

function reduceRemove(p, v) {
 p.count -= v.ab;
 p.toal -= v.h;
 return p;
}

function reduceInitial() {
 return {
  count: 0,
  total: 0
 };
}

var allAvg = dateDim.groupAll().reduce(reduceAdd, reduceRemove, reduceInitial);
var totalAvg = allAvg.value()
console.log("Total avg total hit count" + totalAvg.total)
console.log("Total avg count ab count" + totalAvg.count)
var regTotalAvg = regularize_groupAll(allAvg);

avgChart
 .width(200)
 .height(HEIGHT + 30)
 .x(d3.scale.ordinal().domain(["Avg"]))
 .xUnits(dc.units.ordinal)
 .y(d3.scale.linear().domain([0, totalAvg.total / totalAvg.count]))
 .yAxisLabel("")
 .centerBar(true)
 .dimension(dateDim)
 .brushOn(false)
 .alwaysUseRounding(true)
 .group(regTotalAvg)
 .valueAccessor(function(p) {
  return p.value.count > 0 ? p.value.total / p.value.count : 0
 });

 avgChart.render();

JSFiddle: https://jsfiddle.net/schins02/acchgsfL/

Any help would be greatly appreciated and I hope this might help me get over the hump and be able to solve problems of this nature on my own.

1

1 Answers

4
votes

When you are computing an average with Crossfilter, you should use Crossfilter to compute the components incrementally and then calculate the average itself when it is time to display the data. Using Reductio, you would do this by creating a dummy dimension that always has one value, then creating a group based on that dimension that calculates the necessary components. In your example:

var avgDim = playerData.dimension(function(d) { return true; });
var avgGroup = avgDim.group();
var reducer = reductio();
reducer.value("ab").sum("ab")
reducer.value("h").sum("h");
reducer.value("bb").sum("bb");
reducer(avgGroup);

avgChart
  .width(200)
  .height(HEIGHT + 30)
  .x(d3.scale.ordinal().domain(["Avg"]))
  .xUnits(dc.units.ordinal)
  .y(d3.scale.linear().domain([0, 1]))
  .yAxisLabel("")
  .centerBar(true)
  .dimension(avgDim)
  .brushOn(false)
  .alwaysUseRounding(true)
  .group(avgGroup)
  .valueAccessor(function(p) {
    return p.value.h.sum / ( p.value.ab.sum - p.value.bb.sum );
  });

Here's the Fiddle: https://jsfiddle.net/esjewett/qmtL6221/1/

Note that I'm not sure if your at-bat count already removes walks or not. I've assumed that it includes walks and removed them because walks are not counted as at-bats in a batting average calculation, but don't do that if your at-bat count doesn't include walks.

P.S. Looked at this a bit more and your Approach 3 would work as well. Problem is that as you note, the filter doesn't get applied to your group. This is because you defined your group on the same dimension dateDim that you are filtering on. Crossfilter doesn't apply filters to the same dimension the filter is defined on.