3
votes

I'm trying to get a sorted list of items using a ranged query on a collection containing bulletin-board data. The data structure of a "thread" document is:

{
    "_id" : ObjectId("5a779b47f4fa72412126526a"),
    "title" : "necessitatibus tincidunt libris assueverit",
    "content" : "Corrumpitvenenatis cubilia adipiscing sollicitudin",
    "flagged" : false,
    "locked" : false,
    "sticky" : false,
    "lastPostAt" : ISODate("2018-02-05T06:35:24.656Z"),
    "postCount" : 42,
    "user" : ObjectId("5a779b46f4fa72412126525a"),
    "category" : ObjectId("5a779b31f4fa724121265164"),
    "createdAt" : ISODate("2018-02-04T23:46:15.852Z"),
    "updatedAt" : ISODate("2018-02-05T06:35:24.656Z")
}

The query is:

db.threads.find({
    category: ObjectId('5a779b31f4fa724121265142'), 
    _id : { $gt: ObjectId('5a779b5cf4fa724121269be8') }
}).sort({ sticky: -1, lastPostAt: -1, _id: 1 }).limit(25)

I set up the following indexes to support it:

{ category: 1, _id: 1 }
{ category: 1, _id: 1, sticky: 1, lastPostAt: 1 }
{ sticky: 1, lastPostAt: 1, _id: 1 }

In spite of this, it's still scanning hundreds of documents/keys according to execution stats:

{
    "executionStats" : {
    "executionSuccess" : true,
    "nReturned" : 772,
    "executionTimeMillis" : 17,
    "totalKeysExamined" : 772,
    "totalDocsExamined" : 772,
    "executionStages" : {
        "stage" : "SORT",
        "nReturned" : 772,
        "executionTimeMillisEstimate" : 0,
        "works" : 1547,
        "advanced" : 772,
        "needTime" : 774,
        "needYield" : 0,
        "saveState" : 33,
        "restoreState" : 33,
        "isEOF" : 1,
        "invalidates" : 0,
        "sortPattern" : {
            "sticky" : -1,
            "lastPostAt" : -1,
            "_id" : 1
        },
        "memUsage" : 1482601,
        "memLimit" : 33554432,
        "inputStage" : {
            "stage" : "SORT_KEY_GENERATOR",
            "nReturned" : 772,
            "executionTimeMillisEstimate" : 0,
            "works" : 774,
            "advanced" : 772,
            "needTime" : 1,
            "needYield" : 0,
            "saveState" : 33,
            "restoreState" : 33,
            "isEOF" : 1,
            "invalidates" : 0,
            "inputStage" : {
                "stage" : "FETCH",
                "nReturned" : 772,
                "executionTimeMillisEstimate" : 0,
                "works" : 773,
                "advanced" : 772,
                "needTime" : 0,
                "needYield" : 0,
                "saveState" : 33,
                "restoreState" : 33,
                "isEOF" : 1,
                "invalidates" : 0,
                "docsExamined" : 772,
                "alreadyHasObj" : 0,
                "inputStage" : {
                    "stage" : "IXSCAN",
                    "nReturned" : 772,
                    "executionTimeMillisEstimate" : 0,
                    "works" : 773,
                    "advanced" : 772,
                    "needTime" : 0,
                    "needYield" : 0,
                    "saveState" : 33,
                    "restoreState" : 33,
                    "isEOF" : 1,
                    "invalidates" : 0,
                    "keyPattern" : {
                        "category" : 1,
                        "_id" : 1,
                        "sticky" : 1,
                        "lastPostAt" : 1
                    },
                    "indexName" : "category_1__id_1_sticky_1_lastPostAt_1",
                    "isMultiKey" : false,
                    "multiKeyPaths" : {
                        "category" : [ ],
                        "_id" : [ ],
                        "sticky" : [ ],
                        "lastPostAt" : [ ]
                    },
                    "isUnique" : false,
                    "isSparse" : false,
                    "isPartial" : false,
                    "indexVersion" : 2,
                    "direction" : "forward",
                    "indexBounds" : {
                        "category" : [
                            "[ObjectId('5a779b31f4fa724121265142'), ObjectId('5a779b31f4fa724121265142')]"
                        ],
                        "_id" : [
                            "(ObjectId('5a779b5cf4fa724121269be8'), ObjectId('ffffffffffffffffffffffff')]"
                        ],
                        "sticky" : [
                            "[MinKey, MaxKey]"
                        ],
                        "lastPostAt" : [
                            "[MinKey, MaxKey]"
                        ]
                    },
                    "keysExamined" : 772,
                    "seeks" : 1,
                    "dupsTested" : 0,
                    "dupsDropped" : 0,
                    "seenInvalidated" : 0
                }
            }
        }
    }
}

When I take out the sorting stage, it correctly scans only 25 documents. And the keys examined (772) remains the same no matter which fields I place in the sort function.

Here is the full explain() for the sorted query:

{
    "queryPlanner" : {
        "plannerVersion" : 1,
        "namespace" : "database.threads",
        "indexFilterSet" : false,
        "parsedQuery" : {
            "$and" : [
                {
                    "category" : {
                        "$eq" : ObjectId("5a779b31f4fa724121265142")
                    }
                },
                {
                    "_id" : {
                        "$gt" : ObjectId("5a779b5cf4fa724121269be8")
                    }
                }
            ]
        },
        "winningPlan" : {
            "stage" : "SORT",
            "sortPattern" : {
                "sticky" : -1,
                "lastPostAt" : -1,
                "_id" : 1
            },
            "limitAmount" : 25,
            "inputStage" : {
                "stage" : "SORT_KEY_GENERATOR",
                "inputStage" : {
                    "stage" : "FETCH",
                    "inputStage" : {
                        "stage" : "IXSCAN",
                        "keyPattern" : {
                            "category" : 1,
                            "_id" : 1,
                            "sticky" : 1,
                            "lastPostAt" : 1
                        },
                        "indexName" : "category_1__id_1_sticky_1_lastPostAt_1",
                        "isMultiKey" : false,
                        "multiKeyPaths" : {
                            "category" : [ ],
                            "_id" : [ ],
                            "sticky" : [ ],
                            "lastPostAt" : [ ]
                        },
                        "isUnique" : false,
                        "isSparse" : false,
                        "isPartial" : false,
                        "indexVersion" : 2,
                        "direction" : "forward",
                        "indexBounds" : {
                            "category" : [
                                "[ObjectId('5a779b31f4fa724121265142'), ObjectId('5a779b31f4fa724121265142')]"
                            ],
                            "_id" : [
                                "(ObjectId('5a779b5cf4fa724121269be8'), ObjectId('ffffffffffffffffffffffff')]"
                            ],
                            "sticky" : [
                                "[MinKey, MaxKey]"
                            ],
                            "lastPostAt" : [
                                "[MinKey, MaxKey]"
                            ]
                        }
                    }
                }
            }
        },
        "rejectedPlans" : [
            {
                "stage" : "SORT",
                "sortPattern" : {
                    "sticky" : -1,
                    "lastPostAt" : -1,
                    "_id" : 1
                },
                "limitAmount" : 25,
                "inputStage" : {
                    "stage" : "SORT_KEY_GENERATOR",
                    "inputStage" : {
                        "stage" : "FETCH",
                        "filter" : {
                            "_id" : {
                                "$gt" : ObjectId("5a779b5cf4fa724121269be8")
                            }
                        },
                        "inputStage" : {
                            "stage" : "IXSCAN",
                            "keyPattern" : {
                                "category" : 1
                            },
                            "indexName" : "category_1",
                            "isMultiKey" : false,
                            "multiKeyPaths" : {
                                "category" : [ ]
                            },
                            "isUnique" : false,
                            "isSparse" : false,
                            "isPartial" : false,
                            "indexVersion" : 2,
                            "direction" : "forward",
                            "indexBounds" : {
                                "category" : [
                                    "[ObjectId('5a779b31f4fa724121265142'), ObjectId('5a779b31f4fa724121265142')]"
                                ]
                            }
                        }
                    }
                }
            },
            {
                "stage" : "SORT",
                "sortPattern" : {
                    "sticky" : -1,
                    "lastPostAt" : -1,
                    "_id" : 1
                },
                "limitAmount" : 25,
                "inputStage" : {
                    "stage" : "SORT_KEY_GENERATOR",
                    "inputStage" : {
                        "stage" : "FETCH",
                        "inputStage" : {
                            "stage" : "IXSCAN",
                            "keyPattern" : {
                                "category" : 1,
                                "_id" : 1
                            },
                            "indexName" : "category_1__id_1",
                            "isMultiKey" : false,
                            "multiKeyPaths" : {
                                "category" : [ ],
                                "_id" : [ ]
                            },
                            "isUnique" : false,
                            "isSparse" : false,
                            "isPartial" : false,
                            "indexVersion" : 2,
                            "direction" : "forward",
                            "indexBounds" : {
                                "category" : [
                                    "[ObjectId('5a779b31f4fa724121265142'), ObjectId('5a779b31f4fa724121265142')]"
                                ],
                                "_id" : [
                                    "(ObjectId('5a779b5cf4fa724121269be8'), ObjectId('ffffffffffffffffffffffff')]"
                                ]
                            }
                        }
                    }
                }
            },
            {
                "stage" : "SORT",
                "sortPattern" : {
                    "sticky" : -1,
                    "lastPostAt" : -1,
                    "_id" : 1
                },
                "limitAmount" : 25,
                "inputStage" : {
                    "stage" : "SORT_KEY_GENERATOR",
                    "inputStage" : {
                        "stage" : "FETCH",
                        "filter" : {
                            "category" : {
                                "$eq" : ObjectId("5a779b31f4fa724121265142")
                            }
                        },
                        "inputStage" : {
                            "stage" : "IXSCAN",
                            "keyPattern" : {
                                "_id" : 1
                            },
                            "indexName" : "_id_",
                            "isMultiKey" : false,
                            "multiKeyPaths" : {
                                "_id" : [ ]
                            },
                            "isUnique" : true,
                            "isSparse" : false,
                            "isPartial" : false,
                            "indexVersion" : 2,
                            "direction" : "forward",
                            "indexBounds" : {
                                "_id" : [
                                    "(ObjectId('5a779b5cf4fa724121269be8'), ObjectId('ffffffffffffffffffffffff')]"
                                ]
                            }
                        }
                    }
                }
            }
        ]
    },
    "serverInfo" : {
        "host" : "CRF-MBP.local",
        "port" : 27017,
        "version" : "3.6.2",
        "gitVersion" : "489d177dbd0f0420a8ca04d39fd78d0a2c539420"
    },
    "ok" : 1
}

And here is the full explain() for the non-sorted query:

{
    "queryPlanner" : {
        "plannerVersion" : 1,
        "namespace" : "database.threads",
        "indexFilterSet" : false,
        "parsedQuery" : {
            "$and" : [
                {
                    "category" : {
                        "$eq" : ObjectId("5a779b31f4fa724121265142")
                    }
                },
                {
                    "_id" : {
                        "$gt" : ObjectId("5a779b5cf4fa724121269be8")
                    }
                }
            ]
        },
        "winningPlan" : {
            "stage" : "LIMIT",
            "limitAmount" : 25,
            "inputStage" : {
                "stage" : "FETCH",
                "inputStage" : {
                    "stage" : "IXSCAN",
                    "keyPattern" : {
                        "category" : 1,
                        "_id" : 1,
                        "sticky" : 1,
                        "lastPostAt" : 1
                    },
                    "indexName" : "category_1__id_1_sticky_1_lastPostAt_1",
                    "isMultiKey" : false,
                    "multiKeyPaths" : {
                        "category" : [ ],
                        "_id" : [ ],
                        "sticky" : [ ],
                        "lastPostAt" : [ ]
                    },
                    "isUnique" : false,
                    "isSparse" : false,
                    "isPartial" : false,
                    "indexVersion" : 2,
                    "direction" : "forward",
                    "indexBounds" : {
                        "category" : [
                            "[ObjectId('5a779b31f4fa724121265142'), ObjectId('5a779b31f4fa724121265142')]"
                        ],
                        "_id" : [
                            "(ObjectId('5a779b5cf4fa724121269be8'), ObjectId('ffffffffffffffffffffffff')]"
                        ],
                        "sticky" : [
                            "[MinKey, MaxKey]"
                        ],
                        "lastPostAt" : [
                            "[MinKey, MaxKey]"
                        ]
                    }
                }
            }
        },
        "rejectedPlans" : [
            {
                "stage" : "LIMIT",
                "limitAmount" : 25,
                "inputStage" : {
                    "stage" : "FETCH",
                    "filter" : {
                        "_id" : {
                            "$gt" : ObjectId("5a779b5cf4fa724121269be8")
                        }
                    },
                    "inputStage" : {
                        "stage" : "IXSCAN",
                        "keyPattern" : {
                            "category" : 1
                        },
                        "indexName" : "category_1",
                        "isMultiKey" : false,
                        "multiKeyPaths" : {
                            "category" : [ ]
                        },
                        "isUnique" : false,
                        "isSparse" : false,
                        "isPartial" : false,
                        "indexVersion" : 2,
                        "direction" : "forward",
                        "indexBounds" : {
                            "category" : [
                                "[ObjectId('5a779b31f4fa724121265142'), ObjectId('5a779b31f4fa724121265142')]"
                            ]
                        }
                    }
                }
            },
            {
                "stage" : "LIMIT",
                "limitAmount" : 25,
                "inputStage" : {
                    "stage" : "FETCH",
                    "inputStage" : {
                        "stage" : "IXSCAN",
                        "keyPattern" : {
                            "category" : 1,
                            "_id" : 1
                        },
                        "indexName" : "category_1__id_1",
                        "isMultiKey" : false,
                        "multiKeyPaths" : {
                            "category" : [ ],
                            "_id" : [ ]
                        },
                        "isUnique" : false,
                        "isSparse" : false,
                        "isPartial" : false,
                        "indexVersion" : 2,
                        "direction" : "forward",
                        "indexBounds" : {
                            "category" : [
                                "[ObjectId('5a779b31f4fa724121265142'), ObjectId('5a779b31f4fa724121265142')]"
                            ],
                            "_id" : [
                                "(ObjectId('5a779b5cf4fa724121269be8'), ObjectId('ffffffffffffffffffffffff')]"
                            ]
                        }
                    }
                }
            },
            {
                "stage" : "LIMIT",
                "limitAmount" : 25,
                "inputStage" : {
                    "stage" : "FETCH",
                    "filter" : {
                        "category" : {
                            "$eq" : ObjectId("5a779b31f4fa724121265142")
                        }
                    },
                    "inputStage" : {
                        "stage" : "IXSCAN",
                        "keyPattern" : {
                            "_id" : 1
                        },
                        "indexName" : "_id_",
                        "isMultiKey" : false,
                        "multiKeyPaths" : {
                            "_id" : [ ]
                        },
                        "isUnique" : true,
                        "isSparse" : false,
                        "isPartial" : false,
                        "indexVersion" : 2,
                        "direction" : "forward",
                        "indexBounds" : {
                            "_id" : [
                                "(ObjectId('5a779b5cf4fa724121269be8'), ObjectId('ffffffffffffffffffffffff')]"
                            ]
                        }
                    }
                }
            }
        ]
    },
    "serverInfo" : {
        "host" : "CRF-MBP.local",
        "port" : 27017,
        "version" : "3.6.2",
        "gitVersion" : "489d177dbd0f0420a8ca04d39fd78d0a2c539420"
    },
    "ok" : 1
}

Does anyone have any idea why this might not fully use an index?

2
Could you post the explain() output of both the sorted and non-sorted queries?kevinadi
Also, the query seems to return 772 documents (from the nReturned number). Is this the correct number of documents you expect? Does the query without sorting only returns 25 documents?kevinadi
No, I'm only trying to retrieve the first 25 documents. The queyr without sorting does indeed only return 25 documents. I've updated my post with more information. Thanks for your help!cfitzarl
Well, it's working as it should. You are sorting before the limit, so it sorts the complete collection, then picks 25 from the sorted result.Ayush Gupta
It doesn't matter whether I sort before the limit or after (I've tried both), it always scans through 772 documents. I'm just curious if there's some way to at least reduce this number.cfitzarl

2 Answers

11
votes

The issue is that none of your indexes actually help with the sorted query. This is the reason for the high number of scanned objects and the presence of SORT_KEY_GENERATOR stage (in-memory sort, limited to 32MB).

The non-sorted query, on the other hand, can use either the { category: 1, _id: 1 } or { category: 1, _id: 1, sticky: 1, lastPostAt: 1 } indexes. Note that it's perfectly valid to use either one, since one contains the prefix of the other. See Prefixes for more details.

MongoDB find() queries typically uses only one index, so a single compound index should cater for all the parameters of your query. This would include both the parameters of find() and sort().

A good writeup of how your index should be created is available in Optimizing MongoDB Compound Indexes. Let's take the main point of the article, where the compound index ordering should be equality --> sort --> range:

Your query "shape" is:

db.collection.find({category:..., _id: {$gt:...}})
             .sort({sticky:-1, lastPostAt:-1, _id:1})
             .limit(25)

We see that:

  • category:... is equality
  • sticky:-1, lastPostAt:-1, _id:1 is sort
  • _id: {$gt:...} is range

So the compound index you need is:

{category:1, sticky:-1, lastPostAt:-1, _id:1}

Where the winning plan of the explain() output of your query with the above index shows:

"winningPlan": {
      "stage": "LIMIT",
      "limitAmount": 25,
      "inputStage": {
        "stage": "FETCH",
        "inputStage": {
          "stage": "IXSCAN",
          "keyPattern": {
            "category": 1,
            "sticky": -1,
            "lastPostAt": -1,
            "_id": 1
          },
          "indexName": "category_1_sticky_-1_lastPostAt_-1__id_1",
          "isMultiKey": false,
          "multiKeyPaths": {
            "category": [ ],
            "sticky": [ ],
            "lastPostAt": [ ],
            "_id": [ ]
          },
          "isUnique": false,
          "isSparse": false,
          "isPartial": false,
          "indexVersion": 2,
          "direction": "forward",
          "indexBounds": {
            "category": [
              "[ObjectId('5a779b31f4fa724121265142'), ObjectId('5a779b31f4fa724121265142')]"
            ],
            "sticky": [
              "[MaxKey, MinKey]"
            ],
            "lastPostAt": [
              "[MaxKey, MinKey]"
            ],
            "_id": [
              "(ObjectId('5a779b5cf4fa724121269be8'), ObjectId('ffffffffffffffffffffffff')]"
            ]
          }
        }
      }
    }

Note that the winning plan doesn't contain a SORT_KEY_GENERATOR stage. This means that the index can be fully utilized to respond to the sorted query.

1
votes

I believe there are 2 problems both having to do with your sort. These problems come straight from the documentations but if you would comment I'll help explain (and might possibly learn something myself)

The first and biggest problem is that you must sort in the order given by the index. From docs:

You can specify a sort on all the keys of the index or on a subset; however, the sort keys must be listed in the same order as they appear in the index. For example, an index key pattern { a: 1, b: 1 } can support a sort on { a: 1, b: 1 } but not on { b: 1, a: 1 }.

This means that you must sort in the order given by your winning plan: category, _id, sticky, lastPostAt (or any prefix of that order such as category, _id, sticky or category _id). If not mongodb will identify the 772 docs which are indexed using your winning plan, but will then have to comb through each key in order to assess values and provide the desired sort order. If you want to sort by the order you are curenttly querying must provide a index in that order:

The second problem is that you must sort in the direction that you provided by the index (or the inverse direction).

For a query to use a compound index for a sort, the specified sort direction for all keys in the cursor.sort() document must match the index key pattern or match the inverse of the index key pattern. For example, an index key pattern { a: 1, b: -1 } can support a sort on { a: 1, b: -1 } and { a: -1, b: 1 } but not on { a: -1, b: -1 } or {a: 1, b: 1}.

Because your indexes are all in ascending order, you would have to either sort in ascending order for all indexes, or descending order for all indexes. If not we run into the same problem in which mongo finds all the relevant docs, but has to comb through them to provide the desired order.

I believe you would get imporved functionality by providing an additional index of:

{ sticky: -1, lastPostAt: -1, _id: 1 }

or its inverse:

{ sticky: 1, lastPostAt: 1, _id: -1 }

This would create a situation where mongo uses your first index

{ category: 1, _id: 1 }

To identify potential unsorted documents, then uses the one of the new index (provided above) since they would already be sorted. Then the limit would take care of giving you your 25 docs.

I'm pretty sure this would created a covered query (a query with no docs examined). Let me know how it goes, cheers!