Local environment for Mongodb sharding


The purpose of this topic was to setup locally a MongoDB shard, with 2 shards and then run various commands to check the actual behaviour of this system.

Environment Setup
Sharding Setup
DB Setup
Playing around
Configuring zones

Setup

The setup will have.

  • 2 replicasets, each with one primary and 2 secondaries.
  • 2 routers.
  • 3 config server.

    docker-compose.yaml
    version: '3'
    services:
    # replica set 1
      mongors1n1:
        container_name: mongors1n1
        image: mongo
        command: mongod --shardsvr --replSet mongors1 --dbpath /data/db --port 27017
        ports:
          - 27017:27017
        expose:
          - "27017"
        volumes:
          - ~/mongo_cluster/data1:/data/db
    
      mongors1n2:
        container_name: mongors1n2
        image: mongo
        command: mongod --shardsvr --replSet mongors1 --dbpath /data/db --port 27017
        ports:
          - 27027:27017
        expose:
          - "27017"
        volumes:
          - ~/mongo_cluster/data2:/data/db
    
      mongors1n3:
        container_name: mongors1n3
        image: mongo
        command: mongod --shardsvr --replSet mongors1 --dbpath /data/db --port 27017
        ports:
          - 27037:27017
        expose:
          - "27017"
    
        volumes:
          - ~/mongo_cluster/data3:/data/db
    
    # replica set 2
      mongors2n1:
        container_name: mongors2n1
        image: mongo
        command: mongod --shardsvr --replSet mongors2 --dbpath /data/db --port 27017
        ports:
          - 27047:27017
        expose:
          - "27017"
        volumes:
          - ~/mongo_cluster/data4:/data/db
    
      mongors2n2:
        container_name: mongors2n2
        image: mongo
        command: mongod --shardsvr --replSet mongors2 --dbpath /data/db --port 27017
        ports:
          - 27057:27017
        expose:
          - "27017"
        volumes:
          - ~/mongo_cluster/data5:/data/db
    
      mongors2n3:
        container_name: mongors2n3
        image: mongo
        command: mongod --shardsvr --replSet mongors2 --dbpath /data/db --port 27017
        ports:
          - 27067:27017
        expose:
          - "27017"
    
        volumes:
          - ~/mongo_cluster/data6:/data/db
    
      # mongo config server
      mongocfg1:
        container_name: mongocfg1
        image: mongo
        command: mongod --configsvr --replSet mongors1conf --dbpath /data/db --port 27017
        expose:
          - "27017"
        volumes:
          - ~/mongo_cluster/config1:/data/db
    
      mongocfg2:
        container_name: mongocfg2
        image: mongo
        command: mongod --configsvr --replSet mongors1conf --dbpath /data/db --port 27017
        expose:
          - "27017"
        volumes:
          - ~/mongo_cluster/config2:/data/db
    
      mongocfg3:
        container_name: mongocfg3
        image: mongo
        command: mongod --configsvr --replSet mongors1conf --dbpath /data/db --port 27017
    
        expose:
          - "27017"
        volumes:
          - ~/mongo_cluster/config3:/data/db
    
    # mongos router
      mongos1:
        container_name: mongos1
        image: mongo
        depends_on:
          - mongocfg1
          - mongocfg2
        command: mongos --configdb mongors1conf/mongocfg1:27017,mongocfg2:27017,mongocfg3:27017 --port 27017
        ports:
          - 27019:27017
        expose:
          - "27017"
    
      mongos2:
        container_name: mongos2
        image: mongo
        depends_on:
          - mongocfg1
          - mongocfg2
        command: mongos --configdb mongors1conf/mongocfg1:27017,mongocfg2:27017,mongocfg3:27017 --port 27017
        ports:
          - 27020:27017
        expose:
          - "27017"
                                            

    $ docker-compose up
                                            

Sharding Setup


    $ docker exec -it mongocfg1 bash -c "echo 'rs.initiate({_id: "mongors1conf",configsvr: true, members: [{ _id : 0, host : "mongocfg1" },{ _id : 1, host : "mongocfg2" }, { _id : 2, host : "mongocfg3" }]})' | mongo"
    
    # building replica shard
    $ docker exec -it mongors1n1 bash -c "echo 'rs.initiate({_id : "mongors1", members: [{ _id : 0, host : "mongors1n1" },{ _id : 1, host : "mongors1n2" },{ _id : 2, host : "mongors1n3" }]})' | mongo"
    $ docker exec -it mongors2n1 bash -c "echo 'rs.initiate({_id : "mongors2", members: [{ _id : 0, host : "mongors2n1" },{ _id : 1, host : "mongors2n2" },{ _id : 2, host : "mongors2n3" }]})' | mongo"
    
    
    # we add shard to the routers
    $ docker exec -it mongos1 bash -c "echo 'sh.addShard("mongors1/mongors1n1")' | mongo "
    $ docker exec -it mongos1 bash -c "echo 'sh.addShard("mongors2/mongors2n1")' | mongo "
                                            

DB Setup


    $ docker exec -it mongors1n1 bash -c "echo 'use testDb' | mongo"
    $ docker exec -it mongors1n1 bash -c "echo 'db.createCollection(\"testDb.testCollection\")' | mongo "
    $ docker exec -it mongos1 bash -c "echo 'sh.shardCollection(\"testDb.testCollection\", {\"shardingField\" : 1})' | mongo "
                                            

Playing around


    #login to mongos1 (router)
    $ docker exec -it mongos1 mongo
    
    #force split chunks by sharding key value.
    mongos> sh.splitAt('testDb.testCollection', {shardingField: 100});
    mongos> db.testCollection.insertOne({shardingField: 55});
    mongos> db.testCollection.insertOne({shardingField: 155});
    
    #now we have 2 shards, each storing one chunk with 1 document
    mongos> db.testCollection.insertOne({shardingField: 55})
    {
    	"acknowledged" : true,
    	"insertedId" : ObjectId("61150783c8a0ff22b82d5ae6")
    }
    
    mongos> db.testCollection.insertOne({shardingField: 155})
    {
    	"acknowledged" : true,
    	"insertedId" : ObjectId("61150788c8a0ff22b82d5ae7")
    }
    
    mongos> db.testCollection.getShardDistribution();
    
    Shard mongors2 at mongors2/mongors2n1:27017,mongors2n2:27017,mongors2n3:27017
     data : 45B docs : 1 chunks : 1
     estimated data per chunk : 45B
     estimated docs per chunk : 1
    
    Shard mongors1 at mongors1/mongors1n1:27017,mongors1n2:27017,mongors1n3:27017
     data : 45B docs : 1 chunks : 1
     estimated data per chunk : 45B
     estimated docs per chunk : 1
    
    Totals
     data : 90B docs : 2 chunks : 2
     Shard mongors2 contains 50% data, 50% docs in cluster, avg obj size on shard : 45B
     Shard mongors1 contains 50% data, 50% docs in cluster, avg obj size on shard : 45B
    
    mongos> sh.status()
    --- Sharding Status --- 
      sharding version: {
      	"_id" : 1,
      	"minCompatibleVersion" : 5,
      	"currentVersion" : 6,
      	"clusterId" : ObjectId("6114bba388d545c510cfd551")
      }
      shards:
            {  "_id" : "mongors1",  "host" : "mongors1/mongors1n1:27017,mongors1n2:27017,mongors1n3:27017",  "state" : 1,  "topologyTime" : Timestamp(1628749545, 2) }
            {  "_id" : "mongors2",  "host" : "mongors2/mongors2n1:27017,mongors2n2:27017,mongors2n3:27017",  "state" : 1,  "topologyTime" : Timestamp(1628757106, 1) }
      ................
            {  "_id" : "testDb",  "primary" : "mongors1",  "partitioned" : true,  "version" : {  ...... }
                    testDb.testCollection
                            shard key: { "shardingField" : 1 }
                            unique: false
                            balancing: true
                            chunks:
                                    mongors1	1
                                    mongors2	1
                            { "shardingField" : { "$minKey" : 1 } } -->> { "shardingField" : 100 } on : mongors2 Timestamp(2, 0) 
                            { "shardingField" : 100 } -->> { "shardingField" : { "$maxKey" : 1 } } on : mongors1 Timestamp(2, 1)
    
    mongos> db.testCollection.find({});
    { "_id" : ObjectId("61150783c8a0ff22b82d5ae6"), "shardingField" : 55 }
    { "_id" : ObjectId("61150788c8a0ff22b82d5ae7"), "shardingField" : 155 }
    
    # connect to one replica to verify what is stored (not recommended, just for testing purposes)
    $ docker exec -it mongors1n1 mongo
    
    mongors1:PRIMARY> use testDb
    switched to db testDb
    mongors1:PRIMARY> db.testCollection.find({})
    { "_id" : ObjectId("61150788c8a0ff22b82d5ae7"), "shardingField" : 155 }
    
    # connect to the other replicaset
    $ docker exec -it mongors2n1 mongo
    
    mongors2:PRIMARY> use testDb
    switched to db testDb
    mongors2:PRIMARY> db.testCollection.find({})
    { "_id" : ObjectId("61150783c8a0ff22b82d5ae6"), "shardingField" : 55 }
                                            

Configuring Zones


    mongos> sh.addShardToZone("mongors1", "testZone")
    mongos> sh.addShardToZone("mongors2", "testZone")
    mongos> sh.updateZoneKeyRange("testDb.testCollection", { shardingField: 20 }, { shardingField: 120 }, "testZone")
    mongos> sh.status()
    --- Sharding Status --- 
    ..................
    shards:
            {  "_id" : "mongors1",  "host" : "mongors1/mongors1n1:27017,mongors1n2:27017,mongors1n3:27017",  "state" : 1,  "topologyTime" : Timestamp(1628749545, 2),  "tags" : [ "testZone" ] }
            {  "_id" : "mongors2",  "host" : "mongors2/mongors2n1:27017,mongors2n2:27017,mongors2n3:27017",  "state" : 1,  "topologyTime" : Timestamp(1628757106, 1),  "tags" : [ "testZone" ] }
    .................
            databases:
    .................
            {  "_id" : "testDb",  "primary" : "mongors1",  "partitioned" : true,  "version" : {  "uuid" : UUID("f1635ad8-d9b4-4ce6-9e3f-cef945fac96a"),  "timestamp" : Timestamp(1628749617, 1),  "lastMod" : 1 } }
                    testDb.testCollection
                            shard key: { "shardingField" : 1 }
                            unique: false
                            balancing: true
                            chunks:
                                    mongors1	2
                                    mongors2	2
                            { "shardingField" : { "$minKey" : 1 } } -->> { "shardingField" : 20 } on : mongors2 Timestamp(2, 2) 
                            { "shardingField" : 20 } -->> { "shardingField" : 100 } on : mongors2 Timestamp(2, 3) 
                            { "shardingField" : 100 } -->> { "shardingField" : 120 } on : mongors1 Timestamp(2, 4) 
                            { "shardingField" : 120 } -->> { "shardingField" : { "$maxKey" : 1 } } on : mongors1 Timestamp(2, 5) 
                             tag: testZone  { "shardingField" : 20 } -->> { "shardingField" : 120 }