0
votes

I have an issue regarding combining a nested date field, the date_histogram aggregation and range query.

My whole query looks like this:

{
    "query": {
        "nested": {
            "path": "transactions",
            "query": {
                "range": {
                    "transactions.date": {
                        "gte": "2013",
                        "lte": "2014",
                        "format": "yyyy"
                    }
                }
            }
        }
    },
    "aggs" : {
        "transactions": {
            "nested": {
                "path": "transactions"
            },
            "aggs": {
                "count_per_year": {
                    "date_histogram": {
                        "field": "transactions.date",
                        "interval": "year"
                    }
                }   
            }
        }
    }
}

Which gives the following response (only showing the aggregations):

"aggregations": {
    "transactions": {
        "doc_count": 23302,
        "count_per_year": {
            "buckets": [
                {
                    "key_as_string": "2013-01-01T00:00:00.000Z",
                    "key": 1356998400000,
                    "doc_count": 181
                },
                {
                    "key_as_string": "2014-01-01T00:00:00.000Z",
                    "key": 1388534400000,
                    "doc_count": 8363
                },
                {
                    "key_as_string": "2015-01-01T00:00:00.000Z",
                    "key": 1420070400000,
                    "doc_count": 9621
                },
                {
                    "key_as_string": "2016-01-01T00:00:00.000Z",
                    "key": 1451606400000,
                    "doc_count": 5137
                }
            ]
        }
    }
}

As you can see I still get values for dates outside the specified range. And what is even stranger is that if I remove the query section, I get the following response:

"aggregations": {
    "transactions": {
        "doc_count": 65485,
        "count_per_year": {
            "buckets": [
                {
                    "key_as_string": "2013-01-01T00:00:00.000Z",
                    "key": 1356998400000,
                    "doc_count": 181
                },
                {
                    "key_as_string": "2014-01-01T00:00:00.000Z",
                    "key": 1388534400000,
                    "doc_count": 23009
                },
                {
                    "key_as_string": "2015-01-01T00:00:00.000Z",
                    "key": 1420070400000,
                    "doc_count": 27017
                },
                {
                    "key_as_string": "2016-01-01T00:00:00.000Z",
                    "key": 1451606400000,
                    "doc_count": 15278
                }
            ]
        }
    }
}

As you can see, the doc_count for all buckets (except the one corresponding to the range) have increased, which means that some values are filtered out but not all. I have checked the mapping and transactions.date is of type "date", which also is confirmed by the fact that the date_histogram aggregation is able to put all transaction into buckets. Any idea what I might be doing wrong?

I am currently running Elasticsearch 5.6.3.

1

1 Answers

0
votes

I too faced the same issue, solution i used was to use another filter aggs to narrow down the documents based on date. The following query should solve your issue:

{
    "query": {
        "nested": {
            "path": "transactions",
            "query": {
                "range": {
                    "transactions.date": {
                        "gte": "2013",
                        "lte": "2014",
                        "format": "yyyy"
                    }
                }
            }
        }
    },
    "aggs": {
        "transactions": {
            "nested": {
                "path": "transactions"
            },
            "aggs": {
                "filtering_date_range": {
                    "filter": {
                        "range": {
                            "transactions.date": {
                                "gte": "2013",
                                "lte": "2014",
                                "format": "yyyy"
                            }
                        }
                    }
                },
                "aggs": {
                    "count_per_year": {
                        "date_histogram": {
                            "field": "transactions.date",
                            "interval": "year"
                        }
                    }
                }
            }
        }
    }
}