0
votes

I am new to Elasticsearch and I am looking to perform certain aggregations over the fields from an Elasticsearch 5.x index. I have an index that contains the documents with fields langs (which have nested structure) and docLang. These are dynamically mapped fields. Following are the examples documents

DOC 1:

{
   "_index":"A",
   "_type":"document",
   "_id":"1",
   "_source":{
      "text":"This is a test sentence.",
      "langs":{
         "X":{
            "en":1,
            "es":2,
            "zh":3
         },
        "Y":{
            "en":4,
            "es":5,
            "zh":6
         } 
      },
      "docLang": "en"
   }
}

DOC 2:

{
   "_index":"A",
   "_type":"document",
   "_id":"2",
   "_source":{
      "text":"This is a test sentence.",
      "langs":{
         "X":{
            "en":1,
            "es":2
         },
         "Y":{
            "en":3,
            "es":4
         } 
      },
      "docLang": "es"
   }
}

DOC 3:

{
   "_index":"A",
   "_type":"document",
   "_id":"2",
   "_source":{
      "text":"This is a test sentence.",
      "langs":{
         "X":{
            "en":1
         },
         "Y":{
            "en":2
         } 
      },
      "docLang": "en"
   }
}

I want to perform sum aggregation over the langs field in a way that for each key (X/Y) and for each language, I can get the sum across all documents in an index. Also, I want to produce the counts of documents for each type of language from docLang field.

e.g.: For above 3 documents, sum aggregation over langs field would look like below:

"langs":{  
      "X":{  
         "en":3,
         "es":4,
         "zh":3
      },
      "Y":{  
         "en":9,
         "es":9,
         "zh":6
      }
   }

And the docLang count would look like below:

 "docLang":{
    "en" : 2,
    "es" : 1
   }

Also because of some production env restrictions, I cannot use scripts in Elasticsearch. So, I was wondering if it is possible to use just field aggregation type for above fields?

1

1 Answers

1
votes
{
  "size": 0,
  "aggs": {
    "X": {
      "nested": {
        "path": "langs.X"
      },
      "aggs": {
        "X_sum_en": {
          "sum": {
            "field": "langs.X.en"
          }
        },
        "X_sum_es": {
          "sum": {
            "field": "langs.X.es"
          }
        },
        "X_sum_zh": {
          "sum": {
            "field": "langs.X.zh"
          }
        }
      }
    },
    "Y": {
      "nested": {
        "path": "langs.Y"
      },
      "aggs": {
        "Y_sum_en": {
          "sum": {
            "field": "langs.Y.en"
          }
        },
        "Y_sum_es": {
          "sum": {
            "field": "langs.Y.es"
          }
        },
        "Y_sum_zh": {
          "sum": {
            "field": "langs.Y.zh"
          }
        }
      }
    },
    "sum_docLang": {
      "terms": {
        "field": "docLang.keyword",
        "size": 10
      }
    }
  }
}

Since you didn't mention, but I think it's important. I made X and Y as nested fields:

    "langs": {
      "properties": {
        "X": {
          "type": "nested",
          "properties": {
            "en": {
              "type": "long"
            },
            "es": {
              "type": "long"
            },
            "zh": {
              "type": "long"
            }
          }
        },
        "Y": {
          "type": "nested",
          "properties": {
            "en": {
              "type": "long"
            },
            "es": {
              "type": "long"
            },
            "zh": {
              "type": "long"
            }
          }
        }
      }
    }

But, if you fields are not nested at all and here I mean actually the nested field type in Elasticsearch, a simple aggregation like this one should be enough:

{
  "size": 0,
  "aggs": {
    "X_sum_en": {
      "sum": {
        "field": "langs.X.en"
      }
    },
    "X_sum_es": {
      "sum": {
        "field": "langs.X.es"
      }
    },
    "X_sum_zh": {
      "sum": {
        "field": "langs.X.zh"
      }
    },
    "Y_sum_en": {
      "sum": {
        "field": "langs.Y.en"
      }
    },
    "Y_sum_es": {
      "sum": {
        "field": "langs.Y.es"
      }
    },
    "Y_sum_zh": {
      "sum": {
        "field": "langs.Y.zh"
      }
    },
    "sum_docLang": {
      "terms": {
        "field": "docLang.keyword",
        "size": 10
      }
    }
  }
}