Steps:
- Form a working cluster of 10 node with 5 master and 5 slaves
- Kill one master with highest epoch and its corresponding slave. (e.g. :7008-Master and :7002-Slave)
- How to fix the cluster, if the :7008 and :7002 nodes are not recoverable? (I am fine with data loss for the slots served by the crashed master)
Tried following options:
- redis-trib.rb fix - getting "ERR Slot 13108 is already busy" (refer output below)
- Run cluster forget for the crashed master and slave id, then execute redis-trib.rb fix - getting "[ERR] Nodes don't agree about configuration!"
I think both the above options are not working because cluster_current_epoch is greater than all the running node's cluster_my_epoch.
How to fix the redis cluster state?
Output:
Working Cluster:
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7000 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:1
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7001 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:2
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7002 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:10
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7003 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:6
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7004 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:7
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7005 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:1
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7006 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:2
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7007 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:7
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7008 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:10
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7009 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:6
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-trib.rb check 127.0.0.1:7000
>>> Performing Cluster Check (using node 127.0.0.1:7000)
M: 1f2d3ef5b4597b460080506a2df96447dec0e0c0 127.0.0.1:7000
slots:2185-5460 (3276 slots) master
1 additional replica(s)
S: 07a4e205e7116691a568fbd6ded69c61f3dc4bdf 127.0.0.1:7006
slots: (0 slots) slave
replicates 9aae5952143d1c54b8be887f7f144b55ac491b08
M: b29bfac95b729e0cf8304a2d0479941818a05c15 127.0.0.1:7008
slots:13108-16383 (3276 slots) master
1 additional replica(s)
S: 8904962c822402a8d8e810b44d4ca7e847a573e8 127.0.0.1:7002
slots: (0 slots) slave
replicates b29bfac95b729e0cf8304a2d0479941818a05c15
M: a2dbd1143d7e354bc48f84f11c738954582a73fb 127.0.0.1:7004
slots:0-2184,5461,6555-7646 (3278 slots) master
1 additional replica(s)
S: fad0764c5bcb13b6154402c0a30bea4d541a54eb 127.0.0.1:7009
slots: (0 slots) slave
replicates 380920c0b693f2a955ec0f7226cc2df187c82dd7
M: 9aae5952143d1c54b8be887f7f144b55ac491b08 127.0.0.1:7001
slots:7647-10922 (3276 slots) master
1 additional replica(s)
S: b1dfb1eed811b84de7f2e770dcea39c5bca23ceb 127.0.0.1:7005
slots: (0 slots) slave
replicates 1f2d3ef5b4597b460080506a2df96447dec0e0c0
M: 380920c0b693f2a955ec0f7226cc2df187c82dd7 127.0.0.1:7003
slots:5462-6554,10923-13107 (3278 slots) master
1 additional replica(s)
S: dfa867e8b313ca93f525cb44fe97df8c1c5260f0 127.0.0.1:7007
slots: (0 slots) slave
replicates a2dbd1143d7e354bc48f84f11c738954582a73fb
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-trib.rb info 127.0.0.1:7000
127.0.0.1:7000 (1f2d3ef5...) -> 5979 keys | 3276 slots | 1 slaves.
127.0.0.1:7008 (b29bfac9...) -> 6009 keys | 3276 slots | 1 slaves.
127.0.0.1:7004 (a2dbd114...) -> 6006 keys | 3278 slots | 1 slaves.
127.0.0.1:7001 (9aae5952...) -> 6003 keys | 3276 slots | 1 slaves.
127.0.0.1:7003 (380920c0...) -> 6003 keys | 3278 slots | 1 slaves.
[OK] 30000 keys in 5 masters.
1.83 keys per slot on average.
root@9b1f373fcb81:/redis/redis_work#
Killed :7008 and :7002 redis nodes
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-trib.rb info 127.0.0.1:7000
127.0.0.1:7000 (1f2d3ef5...) -> 5979 keys | 3276 slots | 1 slaves.
127.0.0.1:7004 (a2dbd114...) -> 6006 keys | 3278 slots | 1 slaves.
127.0.0.1:7001 (9aae5952...) -> 6003 keys | 3276 slots | 1 slaves.
127.0.0.1:7003 (380920c0...) -> 6003 keys | 3278 slots | 1 slaves.
[OK] 23991 keys in 4 masters.
1.46 keys per slot on average.
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-trib.rb check 127.0.0.1:7000
>>> Performing Cluster Check (using node 127.0.0.1:7000)
M: 1f2d3ef5b4597b460080506a2df96447dec0e0c0 127.0.0.1:7000
slots:2185-5460 (3276 slots) master
1 additional replica(s)
S: 07a4e205e7116691a568fbd6ded69c61f3dc4bdf 127.0.0.1:7006
slots: (0 slots) slave
replicates 9aae5952143d1c54b8be887f7f144b55ac491b08
M: a2dbd1143d7e354bc48f84f11c738954582a73fb 127.0.0.1:7004
slots:0-2184,5461,6555-7646 (3278 slots) master
1 additional replica(s)
S: fad0764c5bcb13b6154402c0a30bea4d541a54eb 127.0.0.1:7009
slots: (0 slots) slave
replicates 380920c0b693f2a955ec0f7226cc2df187c82dd7
M: 9aae5952143d1c54b8be887f7f144b55ac491b08 127.0.0.1:7001
slots:7647-10922 (3276 slots) master
1 additional replica(s)
S: b1dfb1eed811b84de7f2e770dcea39c5bca23ceb 127.0.0.1:7005
slots: (0 slots) slave
replicates 1f2d3ef5b4597b460080506a2df96447dec0e0c0
M: 380920c0b693f2a955ec0f7226cc2df187c82dd7 127.0.0.1:7003
slots:5462-6554,10923-13107 (3278 slots) master
1 additional replica(s)
S: dfa867e8b313ca93f525cb44fe97df8c1c5260f0 127.0.0.1:7007
slots: (0 slots) slave
replicates a2dbd1143d7e354bc48f84f11c738954582a73fb
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[ERR] Not all 16384 slots are covered by nodes.
Trying to fix with add slots option via redis-trib
/redis/redis-4.0.2/src/redis-trib.rb fix 127.0.0.1:7000
...blah_missing_slot blah_missing_slot blah_missing_slot
...blah_missing_slot blah_missing_slot blah_missing_slot
58,16359,16360,16361,16362,16363,16364,16365,16366,16367,16368,16369,16370,16371,16372,16373,16374,16375,16376,16377,16378,16379,16380,16381,16382,16383
Fix these slots by covering with a random node? (type 'yes' to accept): yes
>>> Covering slot 13108 with 127.0.0.1:7003
/usr/local/bundle/gems/redis-3.3.3/lib/redis/client.rb:121:in `call': ERR Slot 13108 is already busy (Redis::CommandError)
from /usr/local/bundle/gems/redis-3.3.3/lib/redis.rb:2705:in `block in method_missing'
from /usr/local/bundle/gems/redis-3.3.3/lib/redis.rb:58:in `block in synchronize'
from /usr/local/lib/ruby/2.4.0/monitor.rb:214:in `mon_synchronize'
from /usr/local/bundle/gems/redis-3.3.3/lib/redis.rb:58:in `synchronize'
from /usr/local/bundle/gems/redis-3.3.3/lib/redis.rb:2704:in `method_missing'
from /redis/redis-4.0.2/src/redis-trib.rb:463:in `block in fix_slots_coverage'
from /redis/redis-4.0.2/src/redis-trib.rb:460:in `each'
from /redis/redis-4.0.2/src/redis-trib.rb:460:in `fix_slots_coverage'
from /redis/redis-4.0.2/src/redis-trib.rb:398:in `check_slots_coverage'
from /redis/redis-4.0.2/src/redis-trib.rb:361:in `check_cluster'
from /redis/redis-4.0.2/src/redis-trib.rb:1140:in `fix_cluster_cmd'
from /redis/redis-4.0.2/src/redis-trib.rb:1700:in `<main>'
root@9b1f373fcb81:/redis/redis_work#
None of the running redis node's "cluster_my_epoch" equals "cluster_current_epoch"
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7000 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:1
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7001 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:2
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7002 cluster info | grep epoch
Could not connect to Redis at 127.0.0.1:7002: Connection refused
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7003 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:6
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7004 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:7
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7005 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:1
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7006 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:2
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7007 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:7
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7008 cluster info | grep epoch
Could not connect to Redis at 127.0.0.1:7008: Connection refused
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7009 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:6
root@9b1f373fcb81:/redis/redis_work#
Cluster Nodes
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-trib.rb call 127.0.0.1:7000 cluster nodes
>>> Calling CLUSTER nodes
127.0.0.1:7000: 07a4e205e7116691a568fbd6ded69c61f3dc4bdf 127.0.0.1:7006@17006 slave 9aae5952143d1c54b8be887f7f144b55ac491b08 0 1509429143161 2 connected
b29bfac95b729e0cf8304a2d0479941818a05c15 127.0.0.1:7008@17008 master,fail - 1509428542267 1509428541654 10 disconnected 13108-16383
8904962c822402a8d8e810b44d4ca7e847a573e8 127.0.0.1:7002@17002 slave,fail b29bfac95b729e0cf8304a2d0479941818a05c15 1509428540940 1509428539208 10 disconnected
a2dbd1143d7e354bc48f84f11c738954582a73fb 127.0.0.1:7004@17004 master - 0 1509429144596 7 connected 0-2184 5461 6555-7646
fad0764c5bcb13b6154402c0a30bea4d541a54eb 127.0.0.1:7009@17009 slave 380920c0b693f2a955ec0f7226cc2df187c82dd7 0 1509429144000 6 connected
1f2d3ef5b4597b460080506a2df96447dec0e0c0 127.0.0.1:7000@17000 myself,master - 0 1509429144000 1 connected 2185-5460
9aae5952143d1c54b8be887f7f144b55ac491b08 127.0.0.1:7001@17001 master - 0 1509429144184 2 connected 7647-10922
b1dfb1eed811b84de7f2e770dcea39c5bca23ceb 127.0.0.1:7005@17005 slave 1f2d3ef5b4597b460080506a2df96447dec0e0c0 0 1509429143572 1 connected
380920c0b693f2a955ec0f7226cc2df187c82dd7 127.0.0.1:7003@17003 master - 0 1509429144697 6 connected 5462-6554 10923-13107
dfa867e8b313ca93f525cb44fe97df8c1c5260f0 127.0.0.1:7007@17007 slave a2dbd1143d7e354bc48f84f11c738954582a73fb 0 1509429145206 7 connected
127.0.0.1:7006: b29bfac95b729e0cf8304a2d0479941818a05c15 127.0.0.1:7008@17008 master,fail - 1509428542267 1509428540000 10 disconnected 13108-16383
a2dbd1143d7e354bc48f84f11c738954582a73fb 127.0.0.1:7004@17004 master - 0 1509429145005 7 connected 0-2184 5461 6555-7646
9aae5952143d1c54b8be887f7f144b55ac491b08 127.0.0.1:7001@17001 master - 0 1509429144492 2 connected 7647-10922
07a4e205e7116691a568fbd6ded69c61f3dc4bdf 127.0.0.1:7006@17006 myself,slave 9aae5952143d1c54b8be887f7f144b55ac491b08 0 1509429143000 0 connected
380920c0b693f2a955ec0f7226cc2df187c82dd7 127.0.0.1:7003@17003 master - 0 1509429145106 6 connected 5462-6554 10923-13107
8904962c822402a8d8e810b44d4ca7e847a573e8 127.0.0.1:7002@17002 slave,fail b29bfac95b729e0cf8304a2d0479941818a05c15 1509428540940 1509428540526 10 disconnected
b1dfb1eed811b84de7f2e770dcea39c5bca23ceb 127.0.0.1:7005@17005 slave 1f2d3ef5b4597b460080506a2df96447dec0e0c0 0 1509429143571 1 connected
fad0764c5bcb13b6154402c0a30bea4d541a54eb 127.0.0.1:7009@17009 slave 380920c0b693f2a955ec0f7226cc2df187c82dd7 0 1509429144000 6 connected
dfa867e8b313ca93f525cb44fe97df8c1c5260f0 127.0.0.1:7007@17007 slave a2dbd1143d7e354bc48f84f11c738954582a73fb 0 1509429144000 7 connected
1f2d3ef5b4597b460080506a2df96447dec0e0c0 127.0.0.1:7000@17000 master - 0 1509429145106 1 connected 2185-5460
127.0.0.1:7004: 1f2d3ef5b4597b460080506a2df96447dec0e0c0 127.0.0.1:7000@17000 master - 0 1509429143000 1 connected 2185-5460
fad0764c5bcb13b6154402c0a30bea4d541a54eb 127.0.0.1:7009@17009 slave 380920c0b693f2a955ec0f7226cc2df187c82dd7 0 1509429144596 6 connected
a2dbd1143d7e354bc48f84f11c738954582a73fb 127.0.0.1:7004@17004 myself,master - 0 1509429142000 7 connected 0-2184 5461 6555-7646
dfa867e8b313ca93f525cb44fe97df8c1c5260f0 127.0.0.1:7007@17007 slave a2dbd1143d7e354bc48f84f11c738954582a73fb 0 1509429144595 7 connected
9aae5952143d1c54b8be887f7f144b55ac491b08 127.0.0.1:7001@17001 master - 0 1509429144698 2 connected 7647-10922
07a4e205e7116691a568fbd6ded69c61f3dc4bdf 127.0.0.1:7006@17006 slave 9aae5952143d1c54b8be887f7f144b55ac491b08 0 1509429143571 2 connected
8904962c822402a8d8e810b44d4ca7e847a573e8 127.0.0.1:7002@17002 slave,fail b29bfac95b729e0cf8304a2d0479941818a05c15 1509428540940 1509428539000 10 disconnected
b29bfac95b729e0cf8304a2d0479941818a05c15 127.0.0.1:7008@17008 master,fail - 1509428542267 1509428541000 10 disconnected 13108-16383
380920c0b693f2a955ec0f7226cc2df187c82dd7 127.0.0.1:7003@17003 master - 0 1509429144000 6 connected 5462-6554 10923-13107
b1dfb1eed811b84de7f2e770dcea39c5bca23ceb 127.0.0.1:7005@17005 slave 1f2d3ef5b4597b460080506a2df96447dec0e0c0 0 1509429143571 1 connected
127.0.0.1:7009: dfa867e8b313ca93f525cb44fe97df8c1c5260f0 127.0.0.1:7007@17007 slave a2dbd1143d7e354bc48f84f11c738954582a73fb 0 1509429144596 7 connected
07a4e205e7116691a568fbd6ded69c61f3dc4bdf 127.0.0.1:7006@17006 slave 9aae5952143d1c54b8be887f7f144b55ac491b08 0 1509429143161 2 connected
b29bfac95b729e0cf8304a2d0479941818a05c15 127.0.0.1:7008@17008 master,fail - 1509428542267 1509428541000 10 disconnected 13108-16383
b1dfb1eed811b84de7f2e770dcea39c5bca23ceb 127.0.0.1:7005@17005 slave 1f2d3ef5b4597b460080506a2df96447dec0e0c0 0 1509429143161 1 connected
fad0764c5bcb13b6154402c0a30bea4d541a54eb 127.0.0.1:7009@17009 myself,slave 380920c0b693f2a955ec0f7226cc2df187c82dd7 0 1509429144000 0 connected
1f2d3ef5b4597b460080506a2df96447dec0e0c0 127.0.0.1:7000@17000 master - 0 1509429144000 1 connected 2185-5460
8904962c822402a8d8e810b44d4ca7e847a573e8 127.0.0.1:7002@17002 slave,fail b29bfac95b729e0cf8304a2d0479941818a05c15 1509428540940 1509428539105 10 disconnected
9aae5952143d1c54b8be887f7f144b55ac491b08 127.0.0.1:7001@17001 master - 0 1509429145106 2 connected 7647-10922
380920c0b693f2a955ec0f7226cc2df187c82dd7 127.0.0.1:7003@17003 master - 0 1509429145206 6 connected 5462-6554 10923-13107
a2dbd1143d7e354bc48f84f11c738954582a73fb 127.0.0.1:7004@17004 master - 0 1509429145000 7 connected 0-2184 5461 6555-7646
127.0.0.1:7001: 07a4e205e7116691a568fbd6ded69c61f3dc4bdf 127.0.0.1:7006@17006 slave 9aae5952143d1c54b8be887f7f144b55ac491b08 0 1509429143876 2 connected
a2dbd1143d7e354bc48f84f11c738954582a73fb 127.0.0.1:7004@17004 master - 0 1509429144000 7 connected 0-2184 5461 6555-7646
b1dfb1eed811b84de7f2e770dcea39c5bca23ceb 127.0.0.1:7005@17005 slave 1f2d3ef5b4597b460080506a2df96447dec0e0c0 0 1509429143571 1 connected
b29bfac95b729e0cf8304a2d0479941818a05c15 127.0.0.1:7008@17008 master,fail - 1509428542268 1509428541000 10 disconnected 13108-16383
fad0764c5bcb13b6154402c0a30bea4d541a54eb 127.0.0.1:7009@17009 slave 380920c0b693f2a955ec0f7226cc2df187c82dd7 0 1509429145106 6 connected
8904962c822402a8d8e810b44d4ca7e847a573e8 127.0.0.1:7002@17002 slave,fail b29bfac95b729e0cf8304a2d0479941818a05c15 1509428540940 1509428540000 10 disconnected
9aae5952143d1c54b8be887f7f144b55ac491b08 127.0.0.1:7001@17001 myself,master - 0 1509429142000 2 connected 7647-10922
380920c0b693f2a955ec0f7226cc2df187c82dd7 127.0.0.1:7003@17003 master - 0 1509429144596 6 connected 5462-6554 10923-13107
dfa867e8b313ca93f525cb44fe97df8c1c5260f0 127.0.0.1:7007@17007 slave a2dbd1143d7e354bc48f84f11c738954582a73fb 0 1509429144000 7 connected
1f2d3ef5b4597b460080506a2df96447dec0e0c0 127.0.0.1:7000@17000 master - 0 1509429144386 1 connected 2185-5460
127.0.0.1:7005: fad0764c5bcb13b6154402c0a30bea4d541a54eb 127.0.0.1:7009@17009 slave 380920c0b693f2a955ec0f7226cc2df187c82dd7 0 1509429144596 6 connected
9aae5952143d1c54b8be887f7f144b55ac491b08 127.0.0.1:7001@17001 master - 0 1509429145106 2 connected 7647-10922
b1dfb1eed811b84de7f2e770dcea39c5bca23ceb 127.0.0.1:7005@17005 myself,slave 1f2d3ef5b4597b460080506a2df96447dec0e0c0 0 1509429143000 0 connected
a2dbd1143d7e354bc48f84f11c738954582a73fb 127.0.0.1:7004@17004 master - 0 1509429144000 7 connected 0-2184 5461 6555-7646
b29bfac95b729e0cf8304a2d0479941818a05c15 127.0.0.1:7008@17008 master,fail - 1509428542267 1509428540000 10 disconnected 13108-16383
07a4e205e7116691a568fbd6ded69c61f3dc4bdf 127.0.0.1:7006@17006 slave 9aae5952143d1c54b8be887f7f144b55ac491b08 0 1509429143572 2 connected
dfa867e8b313ca93f525cb44fe97df8c1c5260f0 127.0.0.1:7007@17007 slave a2dbd1143d7e354bc48f84f11c738954582a73fb 0 1509429143161 7 connected
380920c0b693f2a955ec0f7226cc2df187c82dd7 127.0.0.1:7003@17003 master - 0 1509429143571 6 connected 5462-6554 10923-13107
1f2d3ef5b4597b460080506a2df96447dec0e0c0 127.0.0.1:7000@17000 master - 0 1509429143000 1 connected 2185-5460
8904962c822402a8d8e810b44d4ca7e847a573e8 127.0.0.1:7002@17002 slave,fail b29bfac95b729e0cf8304a2d0479941818a05c15 1509428540940 1509428539000 10 disconnected
127.0.0.1:7003: b29bfac95b729e0cf8304a2d0479941818a05c15 127.0.0.1:7008@17008 master,fail - 1509428542268 1509428541554 10 disconnected 13108-16383
fad0764c5bcb13b6154402c0a30bea4d541a54eb 127.0.0.1:7009@17009 slave 380920c0b693f2a955ec0f7226cc2df187c82dd7 0 1509429145106 6 connected
b1dfb1eed811b84de7f2e770dcea39c5bca23ceb 127.0.0.1:7005@17005 slave 1f2d3ef5b4597b460080506a2df96447dec0e0c0 0 1509429143571 1 connected
8904962c822402a8d8e810b44d4ca7e847a573e8 127.0.0.1:7002@17002 slave,fail b29bfac95b729e0cf8304a2d0479941818a05c15 1509428540940 1509428539000 10 disconnected
1f2d3ef5b4597b460080506a2df96447dec0e0c0 127.0.0.1:7000@17000 master - 0 1509429144000 1 connected 2185-5460
380920c0b693f2a955ec0f7226cc2df187c82dd7 127.0.0.1:7003@17003 myself,master - 0 1509429144000 6 connected 5462-6554 10923-13107
dfa867e8b313ca93f525cb44fe97df8c1c5260f0 127.0.0.1:7007@17007 slave a2dbd1143d7e354bc48f84f11c738954582a73fb 0 1509429144000 7 connected
9aae5952143d1c54b8be887f7f144b55ac491b08 127.0.0.1:7001@17001 master - 0 1509429144000 2 connected 7647-10922
a2dbd1143d7e354bc48f84f11c738954582a73fb 127.0.0.1:7004@17004 master - 0 1509429145000 7 connected 0-2184 5461 6555-7646
07a4e205e7116691a568fbd6ded69c61f3dc4bdf 127.0.0.1:7006@17006 slave 9aae5952143d1c54b8be887f7f144b55ac491b08 0 1509429143673 2 connected
127.0.0.1:7007: 1f2d3ef5b4597b460080506a2df96447dec0e0c0 127.0.0.1:7000@17000 master - 0 1509429145000 1 connected 2185-5460
a2dbd1143d7e354bc48f84f11c738954582a73fb 127.0.0.1:7004@17004 master - 0 1509429145106 7 connected 0-2184 5461 6555-7646
b29bfac95b729e0cf8304a2d0479941818a05c15 127.0.0.1:7008@17008 master,fail - 1509428542267 1509428541758 10 disconnected 13108-16383
fad0764c5bcb13b6154402c0a30bea4d541a54eb 127.0.0.1:7009@17009 slave 380920c0b693f2a955ec0f7226cc2df187c82dd7 0 1509429144285 6 connected
9aae5952143d1c54b8be887f7f144b55ac491b08 127.0.0.1:7001@17001 master - 0 1509429144000 2 connected 7647-10922
380920c0b693f2a955ec0f7226cc2df187c82dd7 127.0.0.1:7003@17003 master - 0 1509429145000 6 connected 5462-6554 10923-13107
b1dfb1eed811b84de7f2e770dcea39c5bca23ceb 127.0.0.1:7005@17005 slave 1f2d3ef5b4597b460080506a2df96447dec0e0c0 0 1509429143263 1 connected
dfa867e8b313ca93f525cb44fe97df8c1c5260f0 127.0.0.1:7007@17007 myself,slave a2dbd1143d7e354bc48f84f11c738954582a73fb 0 1509429144000 0 connected
07a4e205e7116691a568fbd6ded69c61f3dc4bdf 127.0.0.1:7006@17006 slave 9aae5952143d1c54b8be887f7f144b55ac491b08 0 1509429145307 2 connected
8904962c822402a8d8e810b44d4ca7e847a573e8 127.0.0.1:7002@17002 slave,fail b29bfac95b729e0cf8304a2d0479941818a05c15 1509428540940 1509428539000 10 disconnected
root@9b1f373fcb81:/redis/redis_work#
Forget the failed master and slave
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-trib.rb call 127.0.0.1:7000 cluster forget b29bfac95b729e0cf8304a2d0479941818a05c15
>>> Calling CLUSTER forget b29bfac95b729e0cf8304a2d0479941818a05c15
127.0.0.1:7000: OK
127.0.0.1:7006: OK
127.0.0.1:7004: OK
127.0.0.1:7009: OK
127.0.0.1:7001: OK
127.0.0.1:7005: OK
127.0.0.1:7003: OK
127.0.0.1:7007: OK
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-trib.rb call 127.0.0.1:7000 cluster forget 8904962c822402a8d8e810b44d4ca7e847a573e8
>>> Calling CLUSTER forget 8904962c822402a8d8e810b44d4ca7e847a573e8
127.0.0.1:7000: OK
127.0.0.1:7006: OK
127.0.0.1:7004: OK
127.0.0.1:7009: OK
127.0.0.1:7001: OK
127.0.0.1:7005: OK
127.0.0.1:7003: OK
127.0.0.1:7007: OK
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7000 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:1
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7001 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:2
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7002 cluster info | grep epoch
Could not connect to Redis at 127.0.0.1:7002: Connection refused
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7003 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:6
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7004 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:7
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7005 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:1
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7006 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:2
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7007 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:7
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7008 cluster info | grep epoch
Could not connect to Redis at 127.0.0.1:7008: Connection refused
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7009 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:6
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-trib.rb check 127.0.0.1:7000
>>> Performing Cluster Check (using node 127.0.0.1:7000)
M: 1f2d3ef5b4597b460080506a2df96447dec0e0c0 127.0.0.1:7000
slots:2185-5460 (3276 slots) master
1 additional replica(s)
S: 07a4e205e7116691a568fbd6ded69c61f3dc4bdf 127.0.0.1:7006
slots: (0 slots) slave
replicates 9aae5952143d1c54b8be887f7f144b55ac491b08
M: a2dbd1143d7e354bc48f84f11c738954582a73fb 127.0.0.1:7004
slots:0-2184,5461,6555-7646 (3278 slots) master
1 additional replica(s)
S: fad0764c5bcb13b6154402c0a30bea4d541a54eb 127.0.0.1:7009
slots: (0 slots) slave
replicates 380920c0b693f2a955ec0f7226cc2df187c82dd7
M: 9aae5952143d1c54b8be887f7f144b55ac491b08 127.0.0.1:7001
slots:7647-10922 (3276 slots) master
1 additional replica(s)
S: b1dfb1eed811b84de7f2e770dcea39c5bca23ceb 127.0.0.1:7005
slots: (0 slots) slave
replicates 1f2d3ef5b4597b460080506a2df96447dec0e0c0
M: 380920c0b693f2a955ec0f7226cc2df187c82dd7 127.0.0.1:7003
slots:5462-6554,10923-13107 (3278 slots) master
1 additional replica(s)
S: dfa867e8b313ca93f525cb44fe97df8c1c5260f0 127.0.0.1:7007
slots: (0 slots) slave
replicates a2dbd1143d7e354bc48f84f11c738954582a73fb
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[ERR] Not all 16384 slots are covered by nodes.
Fix using redis-trib.rb
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-trib.rb fix 127.0.0.1:7000
...
...
<all missing slots are added now>
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-trib.rb check 127.0.0.1:7000
...
...
[ERR] Nodes don't agree about configuration!
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7000 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:1
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7001 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:2
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7002 cluster info | grep epoch
Could not connect to Redis at 127.0.0.1:7002: Connection refused
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7003 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:6
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7004 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:7
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7005 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:1
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7006 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:2
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7007 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:7
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7008 cluster info | grep epoch
Could not connect to Redis at 127.0.0.1:7008: Connection refused
root@9b1f373fcb81:/redis/redis_work# /redis/redis-4.0.2/src/redis-cli -p 7009 cluster info | grep epoch
cluster_current_epoch:10
cluster_my_epoch:6
root@9b1f373fcb81:/redis/redis_work#