0
votes

We have facets showing the number of results that will show when clicking the filters (and combining them). Something like this:

Filters example

Before we introduced nested objects, the following would do the job:

GET /x_v1/_search/
{
  "size": 0,
  "aggs": {
    "FilteredDescriptiveFeatures": {
      "filter": {
        "bool": {
          "must": [
            {
              "terms": {
                "breadcrumbs.categoryIds": [
                  "category"
                ]
              }
            },
            {
              "terms": {
                "products.sterile": [
                  "0"
                ]
              }
            }
          ]
        }
      },
      "aggs": {
        "DescriptiveFeatures": {
          "terms": {
            "field": "products.descriptiveFeatures",
            "size": 1000
          }
        }
      }
    }
  }
}

This gives the result:

  "aggregations": {
    "FilteredDescriptiveFeatures": {
      "doc_count": 280,
      "DescriptiveFeatures": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [
          {
            "key": "somekey",
            "doc_count": 42
          },

We needed to make products a nested object though, and I'm currently trying rewrite the above to work with this change. My attempt looks like the following. It doesn't give the correct result though, and doesn't seem properly connected to the filter.

GET /x_v2/_search/
{
  "size": 0,
  "aggs": {
    "FilteredDescriptiveFeatures": {
      "filter": {
        "bool": {
          "must": [
            {
              "terms": {
                "breadcrumbs.categoryIds": [
                  "category"
                ]
              }
            },
            {
              "nested": {
                "path": "products",
                "query": {
                  "terms": {
                    "products.sterile": [
                      "0"
                    ]
                  }
                }
              }
            }
          ]
        }
      },
      "aggs": {
        "nested": {
          "nested": {
            "path": "products"
          },
          "aggregations": {
            "DescriptiveFeatures": {
              "terms": {
                "field": "products.descriptiveFeatures",
                "size": 1000
              }
            }
          }
        }
      }
    }
  }
}

This gives the result:

  "aggregations": {
    "FilteredDescriptiveFeatures": {
      "doc_count": 280,
      "nested": {
        "doc_count": 1437,
        "DescriptiveFeatures": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "somekey",
              "doc_count": 164
            },

I've also tried to put the nested definition higher up to contain both the filter and aggs, but then the filter term breadcrumbs.categoryId, which is not in the nested object, won't work.

Is what I'm trying to do even possible? And how can it be solved?

2

2 Answers

1
votes

In your FilteredDescriptiveFeatures step, you return all documents that have one product with sterile = 0

But after in the nested step you dont specify again this filter. So all nested products are return in this step, thus you make your terms aggregations on all products, not only products with sterile = 0

You should move your sterile filter in the nested step. And like Richa points out, you need to use a reverse_nested aggregation in the final step to count elasticsearch document and not nested products sub-documents.

Could you try this query ?

{
    "size": 0,
    "aggs": {
        "filteredCategory": {
            "filter": {
                "terms": {
                    "breadcrumbs.categoryIds": [
                        "category"
                    ]
                }
            },
            "aggs": {
                "nestedProducts": {
                    "nested": {
                        "path": "products"
                    },
                    "aggs": {
                        "filteredByProductsAttributes": {
                            "filter": {
                                "terms": {
                                    "products.sterile": [
                                        "0"
                                    ]
                                }
                            },
                            "aggs": {
                                "DescriptiveFeatures": {
                                    "terms": {
                                        "field": "products.descriptiveFeatures",
                                        "size": 1000
                                    },
                                    "aggs": {
                                        "productCount": {
                                            "reverse_nested": {}
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}
1
votes

What I under stand from the description is that you want to filter your results on the basis of some Nested and Non Nested Fields and then apply aggregations on the Nested Field. I created a sample Index and data with some Nested and Non Nested Fields and created a query

Mapping

    PUT stack-557722203
    {
      "mappings": {
        "_doc": {
          "properties": {
            "category": {
              "type": "text",
              "fields": {
                "keyword": {
                  "type": "keyword",
                  "ignore_above": 256
                }
              }
            },
            "user": {
              "type": "nested",       // NESTED FIELD
              "properties": {
                "fName": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                },
                "lName": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                },
                "type": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                }
              }
            }
          }
        }
      }
    }

Sample Data

    POST _bulk
    {"index":{"_index":"stack-557722203","_id":"1","_type":"_doc"}}
    {"category":"X","user":[{"fName":"A","lName":"B","type":"X"},{"fName":"A","lName":"C","type":"X"},{"fName":"P","lName":"B","type":"Y"}]}
    {"index":{"_index":"stack-557722203","_id":"2","_type":"_doc"}}
    {"category":"X","user":[{"fName":"P","lName":"C","type":"Z"}]}
    {"index":{"_index":"stack-557722203","_id":"3","_type":"_doc"}}
    {"category":"X","user":[{"fName":"A","lName":"C","type":"Y"}]}
    {"index":{"_index":"stack-557722203","_id":"4","_type":"_doc"}}
    {"category":"Y","user":[{"fName":"A","lName":"C","type":"Y"}]}

Query

GET stack-557722203/_search
{
   "size": 0, 
   "query": {
    "bool": {
      "must": [
        {
          "nested": {
            "path": "user",
            "query": {
              "term": {
                "user.fName.keyword": {
                  "value": "A"
                }
              }
            }
          }
        },
        {
          "term": {
            "category.keyword": {
              "value": "X"
            }
          }
        }
      ]
    }
  },

  "aggs": {
    "group BylName": {
      "nested": {
        "path": "user"
      },
      "aggs": {
        "group By lName": {
         "terms": {
           "field": "user.lName.keyword",
           "size": 10
         },
         "aggs": {
           "reverse Nested": {
             "reverse_nested": {}    // NOTE THIS
           }
         }
        }
      }
    }
  }
}

Output

{
  "took": 18,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "group BylName": {
      "doc_count": 4,
      "group By lName": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [
          {
            "key": "B",
            "doc_count": 2,
            "reverse Nested": {
              "doc_count": 1
            }
          },
          {
            "key": "C",
            "doc_count": 2,
            "reverse Nested": {
              "doc_count": 2
            }
          }
        ]
      }
    }
  }
}

As per the discrepancy in data where you are getting, more documents in doc_count when you changed the mapping to Nested is because of the way Nested and Object(NonNested) documents are stored. See here to understand how are they internally stored. In order to connect them back to the root Document , you can use Reverse Nested aggregation and then you will have the same result.

Hope this helps!!