2
votes

MongoDB Aggregation Framework Query: $group, $project, $addFields with $reduce.

Use Case: I have the multiple documents with nested documents array in the collection, need a result group by and the sum of each group item as cumulative volume. Also, have match parameter on year(date), if year matches then only that year document should group by and the sum of volume(nested document array) return.

Below are the documents in the collection:

{
    "_id": "1",
    "LSD": {
        "name": "TDL 05",
        "LSDNumber": "031"
    },
    "POD": [{           
            "Volume": 35.40,
            "VolUnit": "m3"
        },
        {           
            "Volume": 20.75,
            "VolUnit": "m3"
        },
        {
            "Volume": 15,
            "VolUnit": "m3"
        }
    ],
     "createdon": {
        "$date": "2014-08-02T18:49:17.000Z"
    }
},
{
    "_id": "2",
    "LSD": {
        "name": "Stock Watering",
        "LSDNumber": "01"
    },

    "POD": [{
            "Volume": 105,
            "VolUnit": "m3"
        },
        {
            "Volume": 70,
            "VolUnit": "m3"
        },
        {
            "Volume": 35,
            "VolUnit": "m3"
        }
    ],
     "createdon": {
        "$date": "2014-08-02T18:49:17.000Z"
    }
},
{
    "_id": "3",
    "LSD": {
        "name": "TDL 30 Stock Water",
        "LSDNumber": "030"
    },

    "POD": [{
        "Volume": 87,
        "VolUnit": "m3"
    }],
     "createdon": {
        "$date": "2019-08-02T18:49:17.000Z"
    }
},
{
    "_id": "4",
    "LSD": {
        "name": "TDL 30 Stock Water",
        "LSDNumber": "030"
    },
    "POD": [{
        "Volume": 25.12,
        "VolUnit": "m3"
    }],
     "createdon": {
        "$date": "2019-08-02T18:49:17.000Z"
    }
},
{
    "_id": "5",
    "LSD": {
        "name": "TDL 05",
        "LSDNumber": "031"
    },
    "POD": [
        {
            "Volume": 21,
            "VolUnit": "m3"
        }
    ],
     "createdon": {
        "$date": "2014-08-02T18:49:17.000Z"
    }
}

I have a query (C# Driver 2.0), group by "LSD.LSDNumber" and sum of "POD.Volume". No match parameter added here. This works fine.

Query:

{
    aggregate([{
        "$group": {
            "_id": "$LSD.LSDNumber",            
            "doc": {
                "$push": "$POD"
            },
            "data": {
                "$first": "$$ROOT"
            }
        }
    }, {
        "$addFields": {
            "LSDNumber": "$_id",            
            "GroupByDocCount": {
                "$size": "$doc"
            },
            "Cumulative": {
                "$reduce": {
                    "input": "$doc",
                    "initialValue": [],
                    "in": {
                        "$concatArrays": ["$$value", "$$this"]
                    }
                }
            }
        }
    }, {
        "$project": {
            "LSDNumber": 1,
            "GroupByDocCount": 1,           
            "CumulativeVol": {
                "$sum": "$Cumulative.Volume"
            }
        }
    }])
}

Below is the result.

{    
    "LSDNumber":"031",
    "GroupByDocCount": 2,
    "CumulativeVol": 92.15
},
{    
    "LSDNumber":"030",
    "GroupByDocCount": 2,
    "CumulativeVol": 112.12
},
{    
    "LSDNumber":"01",
    "GroupByDocCount": 1,
    "CumulativeVol": 210
}

However, I'd like to get the document match by year(on "createdon") date along with group by (LSD.LSDNumber) and the sum of volume (POD.Volume). For example, if the year is 2014 then bellow should be the result.

{    
    "LSDNumber":"031",
    "GroupByDocCount": 2,
    "CumulativeVol": 92.15,
    "Year": 2014
},
{    
    "LSDNumber":"01",
    "GroupByDocCount": 1,
    "CumulativeVol": 210,
    "Year": 2014
}

The query that I'm trying always returns nothing.

{
    aggregate([{
        "$project": {
            "LSDNumber": 1,
            "GroupByDocCount": 1,
            "CumulativeVol": {
                "$sum": "$Cumulative.Volume"
            },
            "year": {
                "$year": "$data.createdon"
            }
        }
    }, {
        "$match": {
            "year": 2014
        }
    }, {
        "$group": {
            "_id": "$LSD.LSDNumber",
            "year": {
                "$first": "$year"
            },
            "doc": {
                "$push": "$POD"
            },
            "data": {
                "$first": "$$ROOT"
            }
        }
    }, {
        "$addFields": {
            "LSDNumber": "$_id",
            "yearCreate": "$year",
            "GroupByDocCount": {
                "$size": "$doc"
            },
            "Cumulative": {
                "$reduce": {
                    "input": "$doc",
                    "initialValue": [],
                    "in": {
                        "$concatArrays": ["$$value", "$$this"]
                    }
                }
            }
        }
    }])
}

What's going wrong here. Any help would be appreciate!!

2
Why is the $data in "$year": "$data.createdon" in the project? Should it be just $createdon? - DaveStSomeWhere
@DaveStSomeWhere Sorry to the late reply. Anyways, corrected $createdon with $data.createdon but same result. - Harshal Yelpale

2 Answers

2
votes

You may add Year variable in $addField pipeline, then $match.

{
    "$group": {
        "_id": "$LSD.LSDNumber",            
        "doc": {
            "$push": "$POD"
        },
        "data": {
            "$first": "$$ROOT"
        }
    }
}, {
    "$addFields": {
        "LSDNumber": "$_id",            
        "GroupByDocCount": {
            "$size": "$doc"
        },
        "Cumulative": {
            "$reduce": {
                "input": "$doc",
                "initialValue": [],
                "in": {
                    "$concatArrays": ["$$value", "$$this"]
                }
            }
        },
        "Year": {
            "$year": "$data.createdon"
        }
    }
}, {
    "$match" : {"Year" : 2014}
}, {
    "$project": {
        "LSDNumber": 1,
        "GroupByDocCount": 1,           
        "CumulativeVol": {
            "$sum": "$Cumulative.Volume"
        },
        "Year" : "$Year"
    }
}

=== Result ===

/* 1 */
{
    "_id" : "01",
    "LSDNumber" : "01",
    "GroupByDocCount" : 1,
    "CumulativeVol" : 210,
    "Year" : 2014
}

/* 2 */
{
    "_id" : "031",
    "LSDNumber" : "031",
    "GroupByDocCount" : 2,
    "CumulativeVol" : 92.15,
    "Year" : 2014
}
1
votes

A bit late, but here's my answer. We just need to add one more project stage(extra) to the pipeline at the end-stage. However, @Valijon's answer meets the same requirement.

{
    aggregate([{
        "$project": {
            "LSDNumber": "$LSD.LSDNumber",
            "year": {
                "$year": "$createdon"
            },
            "PointOfDiversionVolumeDetails": 1
        }
    }, {
        "$match": {
            "year": 2014
        }
    }, {
        "$group": {
            "_id": "$LSDNumber",
            "doc": {
                "$push": "$PointOfDiversionVolumeDetails"
            }
        }
    }, {
        "$addFields": {
            "GroupByDocCount": {
                "$size": "$doc"
            },
            "Cumulative": {
                "$reduce": {
                    "input": "$doc",
                    "initialValue": [],
                    "in": {
                        "$concatArrays": ["$$value", "$$this"]
                    }
                }
            }
        }
    }, {
        "$project": {
            "CumulativeVol": {
                "$sum": "$Cumulative.Volume"
            },
            "LSDNumber": 1,
            "GroupByDocCount": 1
        }
    }, {
        "$sort": {
            "GroupByDocCount": -1
        }
    }])
}