MongoDB Indexes

czyli po co komu B-drzewa

O czym nie będzie

  • indeksy tekstowe
  • indeksy geoprzestrzenne
  • indeksy geoprzestrzenne płaskie
  • .hint

API

> db.collection.createIndex(keys, {
>   background:              boolean,
>   collation:               document,
>   expireAfterSeconds:      integer,
>   name:                    string,
>   partialFilterExpression: document,
>   sparse:                  boolean,
>   storageEngine:           document,
>   unique:                  boolean
> })

> db.collection.dropIndex(index)

> db.collection.dropIndexes()

> db.collection.explain()

> db.collection.getIndexes()

> db.collection.reIndex()

> db.collection.totalIndexSize()

API w Meteorze

> Collection._ensureIndex(index, options)

> Collection._dropIndex(index)

> Collection.rawCollection()

Korzyść: wydajność

Korzyść: wydajność

B-drzewa!

Korzyść: wydajność

> db.getCollection('reservations').count()
430327

> db.getCollection('reservations').getIndexes()
[{
  "v" : 1,
  "key" : {
    "_id" : 1
  },
  "name" : "_id_",
  "ns" : "aleno_mytools_test.reservations"
}, ...]

> db.getCollection('reservations').find({_id: 'non-existing'})


> db.getCollection('reservations').find({'non-existing': 'non-existing'})


> db.getCollection('reservations').find({firstName: 'non-existing'})

Korzyść: wydajność

> db.getCollection('reservations').count()
430327

> db.getCollection('reservations').getIndexes()
[{
  "v" : 1,
  "key" : {
    "_id" : 1
  },
  "name" : "_id_",
  "ns" : "aleno_mytools_test.reservations"
}, ...]

> db.getCollection('reservations').find({_id: 'non-existing'})
Fetched 0 record(s) in 86ms

> db.getCollection('reservations').find({'non-existing': 'non-existing'})


> db.getCollection('reservations').find({firstName: 'non-existing'})

Korzyść: wydajność

> db.getCollection('reservations').count()
430327

> db.getCollection('reservations').getIndexes()
[{
  "v" : 1,
  "key" : {
    "_id" : 1
  },
  "name" : "_id_",
  "ns" : "aleno_mytools_test.reservations"
}, ...]

> db.getCollection('reservations').find({_id: 'non-existing'})
Fetched 0 record(s) in 86ms

> db.getCollection('reservations').find({'non-existing': 'non-existing'})
Fetched 0 record(s) in 1334ms

> db.getCollection('reservations').find({firstName: 'non-existing'})

Korzyść: wydajność

> db.getCollection('reservations').count()
430327

> db.getCollection('reservations').getIndexes()
[{
  "v" : 1,
  "key" : {
    "_id" : 1
  },
  "name" : "_id_",
  "ns" : "aleno_mytools_test.reservations"
}, ...]

> db.getCollection('reservations').find({_id: 'non-existing'})
Fetched 0 record(s) in 86ms

> db.getCollection('reservations').find({'non-existing': 'non-existing'})
Fetched 0 record(s) in 1334ms

> db.getCollection('reservations').find({firstName: 'non-existing'})
Fetched 0 record(s) in 2121ms

Korzyść: wydajność 2

> db.getCollection('reservations').count()
430327

> db.getCollection('reservations').getIndexes()
[{
  "v" : 1,
  "key" : {
    "_id" : 1
  },
  "name" : "_id_",
  "ns" : "aleno_mytools_test.reservations"
}, ...]

> db.getCollection('reservations').find().sort({_id: 1})


> db.getCollection('reservations').find().sort({firstName: 1})

Korzyść: wydajność 2

> db.getCollection('reservations').count()
430327

> db.getCollection('reservations').getIndexes()
[{
  "v" : 1,
  "key" : {
    "_id" : 1
  },
  "name" : "_id_",
  "ns" : "aleno_mytools_test.reservations"
}, ...]

> db.getCollection('reservations').find().sort({_id: 1})
3377ms

> db.getCollection('reservations').find().sort({firstName: 1})

Korzyść: wydajność 2

> db.getCollection('reservations').count()
430327

> db.getCollection('reservations').getIndexes()
[{
  "v" : 1,
  "key" : {
    "_id" : 1
  },
  "name" : "_id_",
  "ns" : "aleno_mytools_test.reservations"
}, ...]

> db.getCollection('reservations').find().sort({_id: 1})
3377ms

> db.getCollection('reservations').find().sort({firstName: 1})
186ms

Korzyść: wydajność 2

> db.getCollection('reservations').count()
430327

> db.getCollection('reservations').getIndexes()
[{
  "v" : 1,
  "key" : {
    "_id" : 1
  },
  "name" : "_id_",
  "ns" : "aleno_mytools_test.reservations"
}, ...]

> db.getCollection('reservations').find().sort({_id: 1})
3377ms

> db.getCollection('reservations').find().sort({firstName: 1})
186ms
Error: error: {
	"waitedMS" : NumberLong(0),
	"ok" : 0,
	"errmsg" : "Executor error during find command: OperationFailed: Sort operation used more than the maximum 33554432 bytes of RAM. Add an index, or specify a smaller limit.",
	"code" : 96
}

Korzyść: utrzymanie

> db.getCollection('errorMetrics').findOne({})
{
    "_id" : {
        "appId" : "rGKEqNyLcBbkEwLmy",
        "name" : "Clock discrepancy detected. Attempting re-sync.",
        "type" : "client",
        "subType" : "meteor._debug",
        "time" : ISODate("2017-09-11T14:21:00.000Z"),
        "res" : "1min"
    },
    "value" : {
        "appId" : "rGKEqNyLcBbkEwLmy",
        "name" : "Clock discrepancy detected. Attempting re-sync.",
        "type" : "client",
        "subType" : "meteor._debug",
        "startTime" : ISODate("2017-09-11T14:21:00.000Z"),
        "count" : 3.0,
        "res" : "1min",
        "subShard" : 65.0,
        "_expires" : ISODate("2017-12-17T14:21:46.872Z")
    }
}

Korzyść: utrzymanie

> db.getCollection('errorMetrics').createIndex(
>   {'value._expires': 1},
>   {background: true, expireAfterSeconds: 0}
> );

Wada: rozmiar

> db.getCollection('reservations').stats()
{
    "ns" : "aleno_mytools_test.reservations",
    "count" : 430327,
    "size" : 785148392,
    "avgObjSize" : 1824,
    "storageSize" : 279040000,
    ...
    "nindexes" : 9,
    "totalIndexSize" : 70324224,
    "indexSizes" : {
        "_id_" : 22392832,
        "mandrill._id_1" : 8515584,
        "customerId_1" : 10149888,
        "restaurantId_1_fullSelectedDate_1" : 4120576,
        "shift_1_selectedDate_1_number_-1" : 4956160,
        "state_1_selectedDate_1_restaurantId_1_shift_1" : 6377472,
        "restaurantId_1_selectedDate_1" : 3293184,
        "restaurantId_1_fullSelectedDate_1_state_1" : 4759552,
        "shift_1_selectedDate_1_state_1_restaurantId_1" : 5758976
    },
    "ok" : 1.0
}

Wada: rozmiar

> db.getCollection('reservations').stats(1024 * 1024)
{
    "ns" : "aleno_mytools_test.reservations",
    "count" : 430327,
    "size" : 748,
    "avgObjSize" : 1824,
    "storageSize" : 266,
    ...
    "nindexes" : 9,
    "totalIndexSize" : 67,
    "indexSizes" : {
        "_id_" : 21,
        "mandrill._id_1" : 8,
        "customerId_1" : 9,
        "restaurantId_1_fullSelectedDate_1" : 3,
        "shift_1_selectedDate_1_number_-1" : 4,
        "state_1_selectedDate_1_restaurantId_1_shift_1" : 6,
        "restaurantId_1_selectedDate_1" : 3,
        "restaurantId_1_fullSelectedDate_1_state_1" : 4,
        "shift_1_selectedDate_1_state_1_restaurantId_1" : 5
    },
    "ok" : 1.0
}

Kiedy?

> db.getCollection('reservations').find({_id: 1}).explain()
{
    "queryPlanner" : {
        ...
        "parsedQuery" : {
            "_id" : {
                "$eq" : 1.0
            }
        },
        "winningPlan" : {
            "stage" : "IDHACK"
        }
    },
    ...
    "ok" : 1.0
}

Kiedy?

> db.getCollection('reservations').find({'mandrill._id': 1}).explain()
{
    "queryPlanner" : {
        ...
        "parsedQuery" : {
            "mandrill._id" : {
                "$eq" : 1.0
            }
        },
        "winningPlan" : {
            "stage" : "FETCH",
            "inputStage" : {
                "stage" : "IXSCAN",
                "keyPattern" : {
                    "mandrill._id" : 1
                },
                "indexName" : "mandrill._id_1",
                "isMultiKey" : false,
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 1,
                "direction" : "forward",
                "indexBounds" : {
                    "mandrill._id" : [ 
                        "[1.0, 1.0]"
                    ]
                }
            }
        }
    },
    ...
    "ok" : 1.0
}

Kiedy?

> db.getCollection('reservations').find({'non-existing': 1}).explain()
{
    "queryPlanner" : {
        ...
        "parsedQuery" : {
            "non-existing" : {
                "$eq" : 1.0
            }
        },
        "winningPlan" : {
            "stage" : "COLLSCAN",
            "filter" : {
                "non-existing" : {
                    "$eq" : 1.0
                }
            },
            "direction" : "forward"
        }
    },
    ...
    "ok" : 1.0
}

Kiedy?

> db.getCollection('reservations').find({restaurantId: 1}).explain()
{
    "queryPlanner" : {
        ...
        "parsedQuery" : {
            "restaurantId" : {
                "$eq" : 1.0
            }
        },
        "winningPlan" : {
            "stage" : "FETCH",
            "inputStage" : {
                "stage" : "IXSCAN",
                "keyPattern" : {
                    "restaurantId" : 1,
                    "fullSelectedDate" : 1
                },
                "indexName" : "restaurantId_1_fullSelectedDate_1",
                "isMultiKey" : false,
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 1,
                "direction" : "forward",
                "indexBounds" : {
                    "restaurantId" : [ 
                        "[1.0, 1.0]"
                    ],
                    "fullSelectedDate" : [ 
                        "[MinKey, MaxKey]"
                    ]
                }
            }
        }
    },
    ...
    "ok" : 1.0
}

Kiedy?

> db.getCollection('reservations').find({_id: 1, restaurantId: 1}).explain()
{
    "queryPlanner" : {
        ...
        "parsedQuery" : {
            "$and" : [ 
                {
                    "_id" : {
                        "$eq" : 1.0
                    }
                }, 
                {
                    "restaurantId" : {
                        "$eq" : 1.0
                    }
                }
            ]
        },
        "winningPlan" : {
            "stage" : "FETCH",
            "filter" : {
                "restaurantId" : {
                    "$eq" : 1.0
                }
            },
            "inputStage" : {
                "stage" : "IXSCAN",
                "keyPattern" : {
                    "_id" : 1
                },
                "indexName" : "_id_",
                "isMultiKey" : false,
                "isUnique" : true,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 1,
                "direction" : "forward",
                "indexBounds" : {
                    "_id" : [ 
                        "[1.0, 1.0]"
                    ]
                }
            }
        }
    },
    ...
    "ok" : 1.0
}

Bonus: statystyki

> db.getCollection('errorMetrics').aggregate([{$indexStats: {}}])
[{
    "name" : "_id_",
    "key" : {
        "_id" : 1
    },
    "host" : "4fbb765d4e11:27017",
    "accesses" : {
        "ops" : NumberLong(2898585),
        "since" : ISODate("2017-10-05T12:49:37.546Z")
    }
}, {
    "name" : "value.res_1_value.appId_1_value.startTime_1",
    "key" : {
        "value.res" : 1.0,
        "value.appId" : 1.0,
        "value.startTime" : 1.0
    },
    "host" : "4fbb765d4e11:27017",
    "accesses" : {
        "ops" : NumberLong(157275),
        "since" : ISODate("2017-10-06T21:48:44.367Z")
    }
}, ...]

MongoDB Indexes

By Radosław Miernik

MongoDB Indexes

Vazco TechMeeting 2017-11-06

  • 971