0
votes

We have multiple nested fields which need to be summed and then graphed almost as if it were a value of the parent document (using scripted fields is not an ideal solution for us).

Given the example index mapping:

{
  "mapping": {
    "_doc": {
      "properties": {
        "build_name": { "type": "keyword" },
        "start_ms": { "type": "date" },
        "projects": {
          "type": "nested",
          "properties": {
            "project_duration_ms": { type": "long" },
            "project_name": { "type": "keyword" }
          }
        }
      }
    }
  }
}

Example doc._source:

{
  "build_name": "example_build_1",
  "start_ms": "1611252094540",
  "projects": [
    { "project_duration_ms": "19381", project_name": "example_project_1" },
    { "project_duration_ms": "2081", "project_name": "example_project_2" }
  ]
},
{
  "build_name": "example_build_2",
  "start_ms": "1611252097638",
  "projects": [
    { "project_duration_ms": "21546", project_name": "example_project_1" },
    { "project_duration_ms": "2354", "project_name": "example_project_2" }
  ]
}

It would be ideal to get a aggregation something like:

....
  "aggregations" : {
    "builds" : {
      "total_durations" : {
        "buckets" : [
          {
            "key": "example_build_1",
            "start_ms": "1611252094540",
            "total_duration": "21462"
          },
          {
            "key": "example_build_2",
            "start_ms": "1611252097638",
            "total_duration": "23900"
          }
        }
      }
    }
  }
}
1

1 Answers

1
votes

No scripted fields necessary. This nested sum aggregation should do the trick:

{
  "size": 0,
  "aggs": {
    "builds": {
      "terms": {
        "field": "build_name"
      },
      "aggs": {
        "total_durations_parent": {
          "nested": {
            "path": "projects"
          },
          "aggs": {
            "total_durations": {
              "sum": {
                "field": "projects.project_duration_ms"
              }
            }
          }
        }
      }
    }
  }
}

Your use case is a great candidate for employing the copy_to parameter which'll put the build durations into one top-level list of longs so that the nested query won't be required when we're summing them up.

Adjust the mapping like so:

"properties": {
  "build_name": { "type": "keyword" },
  "start_ms": { "type": "date" },
  "total_duration_ms": { "type": "long" },    <--
  "projects": {
    "type": "nested",
    "properties": {
      "project_duration_ms": {
        "type": "long",
        "copy_to": "total_duration_ms"        <--
      },
      "project_name": { "type": "keyword" }
    }
  }
}

After reindexing (which is required due to the newly added field), the above query gets simplified to:

{
  "size": 0,
  "aggs": {
    "builds": {
      "terms": {
        "field": "build_name"
      },
      "aggs": {
        "total_durations": {
          "sum": {
            "field": "total_duration_ms"
          }
        }
      }
    }
  }
}