Elasticsearch aggregation of aggregation

Question

I'd like to know if there is a way to do something akin to bucket_selector but testing based on a key match instead of a numeric metric.

To give a bit more context, here is my use case:

data sample:

[
  {
    "@version": "1",
    "@timestamp": "2017-04-27T04:28:23.589Z",
    "type": "json",
    "headers": {
      "message": {
        "type": "requestactivation"
      }
    },
    "id": "668"
  },
  {
    "@version": "1",
    "@timestamp": "2017-04-27T04:32:23.589Z",
    "type": "json",
    "headers": {
      "message": {
        "type": "requestactivation"
      }
    },
    "id": "669"
  },
  {
    "@version": "1",
    "@timestamp": "2017-04-27T04:30:00.802Z",
    "type": "json",
    "headers": {
      "message": {
        "type": "activationrequested"
      }
    },
    "id": "668"
  }
]

I'd like to retrieve all ids where the last event is of type requestactivation.

I already have an aggregation that retrieves the last event type per id, but I haven't figured out how to filter buckets based on the key

Here is the query:

{
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        {
          "exists": {
            "field": "id"
          }
        },
        {
          "terms": {
            "headers.message.type": [
              "requestactivation",
              "activationrequested"
            ]
          }
        }
      ]
    }
  },
  "aggs": {
    "id": {
      "terms": {
        "field": "id",
        "size": 10000
      },
      "aggs": {
        "latest": {
          "max": {
            "field": "@timestamp"
          }
        },
        "hmtype": {
          "terms": {
            "field": "headers.message.type",
            "size": 1
          }
        }
      }
    }
  }
}

Here is a result sample:

{
  "took": 5,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "failed": 0
  },
  "hits": {
    "total": 3,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "id": {
      "doc_count_error_upper_bound": 3,
      "sum_other_doc_count": 46,
      "buckets": [
        {
          "key": "986",
          "doc_count": 4,
          "hmtype": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 2,
            "buckets": [
              {
                "key": "activationrequested",
                "doc_count": 2
              }
            ]
          },
          "latest": {
            "value": 1493238253603,
            "value_as_string": "2017-04-26T20:24:13.603Z"
          }
        },
        {
          "key": "967",
          "doc_count": 2,
          "hmtype": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 1,
            "buckets": [
              {
                "key": "requestactivation",
                "doc_count": 1
              }
            ]
          },
          "latest": {
            "value": 1493191161242,
            "value_as_string": "2017-04-26T07:19:21.242Z"
          }
        },
        {
          "key": "554",
          "doc_count": 7,
          "hmtype": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 5,
            "buckets": [
              {
                "key": "requestactivation",
                "doc_count": 5
              }
            ]
          },
          "latest": {
            "value": 1493200196871,
            "value_as_string": "2017-04-26T09:49:56.871Z"
          }
        }
      ]
    }
  }
}

All mappings are not analyzed (keyword).

Goal would be to reduce results to only the ones where key in bucket is "requestactivation".

The doc count cannot be used as activationrequest can appear more than once for an id.

Only started delving in aggregations recently, so apologies if question seems obvious, examples around didn't seem to match this specific logic.

Andrei Stefan Andrei Stefan · Accepted Answer · 2017-04-27T07:02:07

How about include used in a terms aggregation to "filter" the values included in the terms to only that relevant for the request:

{
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        {
          "exists": {
            "field": "id"
          }
        },
        {
          "terms": {
            "headers.message.type": [
              "requestactivation",
              "activationrequested"
            ]
          }
        }
      ]
    }
  },
  "aggs": {
    "id": {
      "terms": {
        "field": "id",
        "size": 10000
      },
      "aggs": {
        "latest": {
          "max": {
            "field": "@timestamp"
          }
        },
        "hmtype": {
          "filter": {
            "terms": {
              "headers.message.type": [
                "requestactivation",
                "activationrequested"
              ]
            }
          },
          "aggs": {
            "count_types": {
              "cardinality": {
                "field": "headers.message.type"
              }
            }
          }
        },
        "filter_buckets": {
          "bucket_selector": {
            "buckets_path": {
              "totalTypes":"hmtype > count_types"
            },
            "script": "params.totalTypes == 2"
          }
        }
      }
    }
  }
}

Elasticsearch aggregation of aggregation

1 Answers