20
votes

I'm trying to filter buckets for nested aggregations.

Mapping:

{
  "dev.directory.3" : {
    "mappings" : {
      "profile" : {
        "properties" : {
          "events" : {
            "type" : "nested",
            "properties" : {
              "id" : {
                "type" : "integer"
              },
              "name" : {
                "type" : "string",
                "index" : "not_analyzed"
              },
            }
          },
          "title" : {
            "type" : "string"
          }
        }
      }
    }
  }
}

Index data:

"hits" : {
    "total" : 1,
    "max_score" : 1.0,
    "hits" : [ {
      "_index" : "dev.directory.3",
      "_type" : "profile",
      "_id" : "1",
      "_score" : 1.0,
      "_source" : {
        "title" : "Project manager",
        "events" : [ 
          {
            "id" : 1,
            "name" : "Event A",
          }, 
          {
            "id" : 2,
            "name" : "Event B",
          },
          {
            "id" : 3,
            "name" : "Event C",
          },
          {
            "id" : 4,
            "name" : "Event D",
          } 
        ],
      }
    }
  ]
}

I'm using this query and aggregation definition

{
    "query": {
        "nested": {
            "path": "events",
            "query": {
                "bool": {
                    "filter": [{
                        "terms": {
                            "events.id": [1, 2]
                        }
                    }]
                }
            },
            "inner_hits": {}
        }
    },
    "aggs": {
        "events.name12": {
            "filter": {},
            "aggs": {
                "inner": {
                    "nested": {
                        "path": "events"
                    },
                    "aggs": {
                        "events.name": {
                            "terms": {
                                "field": "events.name"
                            }
                        },
                        "events.name_count": {
                            "cardinality": {
                                "field": "events.name"
                            }
                        }
                    }
                }
            }
        }
    },
    "size": 20,
    "_source": ["email", "company_name", "events"]
}

What I am getting is from aggregation result is:

"aggregations": {
    "events.name12": {
      "doc_count": 2,
      "filtered": {
        "doc_count": 4,
        "events.name": {
          "buckets": [
            {
              "key": "Event A",
              "doc_count": 1
            },
            {
              "key": "Event B",
              "doc_count": 1
            },
            {
              "key": "Event C",
              "doc_count": 1
            },
            {
              "key": "Event D",
              "doc_count": 1
            }
          ]
        },
        "events.name_count": {
          "value": 4
        }
      }
    }

I struggling with filtering buckets list only to provided event ids, so the result should be like:

"aggregations": {
    "events.name12": {
      "doc_count": 2,
      "filtered": {
        "doc_count": 2,
        "events.name": {
          "buckets": [
            {
              "key": "Event A",
              "doc_count": 1
            },
            {
              "key": "Event B",
              "doc_count": 1
            }
          ]
        },
        "events.name_count": {
          "value": 2
        }
      }
    }
1

1 Answers

32
votes

You're almost there, you simply need to add the filter on events.id in your aggregation as well, like this:

{
  "query": {
    "nested": {
      "path": "events",
      "query": {
        "bool": {
          "filter": [
            {
              "terms": {
                "events.id": [
                  1,
                  2
                ]
              }
            }
          ]
        }
      },
      "inner_hits": {}
    }
  },
  "aggs": {
    "events.name12": {
      "nested": {
        "path": "events"
      },
      "aggs": {
        "inner": {
          "filter": {
            "terms": {
              "events.id": [
                1,
                2
              ]
            }
          },
          "aggs": {
            "events.name": {
              "terms": {
                "field": "events.name"
              }
            },
            "events.name_count": {
              "cardinality": {
                "field": "events.name"
              }
            }
          }
        }
      }
    }
  },
  "size": 20,
  "_source": [
    "email",
    "company_name",
    "events"
  ]
}

The reason is that your query will correctly select all documents for which there are nested events with the specified event IDs, however, your aggregation will then work on all nested events from all selected documents. So you need to filter out all nested events from those documents that don't have the right IDs in the aggregation as well.