Cluster: some bias towwards FAIL/PFAIL nodes in gossip sections.

This improves PFAIL -> FAIL switch. Too late at this point in the RC
releases to add proper PFAIL/FAIL separate dictionary to do this in a
less randomized way. Tested in practice with experiments that this
helps. PFAIL -> FAIL average with 20 nodes and node-timeout set to 5
seconds takes 2.5 seconds without this commit, 1 second with this
commit.
This commit is contained in:
antirez 2015-01-30 11:54:18 +01:00
parent 69b4f00d28
commit 233729fe7f

View File

@ -2158,7 +2158,7 @@ void clusterSendPing(clusterLink *link, int type) {
clusterBuildMessageHdr(hdr,type);
/* Populate the gossip fields */
int maxiterations = wanted*2;
int maxiterations = wanted*3;
while(freshnodes > 0 && gossipcount < wanted && maxiterations--) {
dictEntry *de = dictGetRandomKey(server.cluster->nodes);
clusterNode *this = dictGetVal(de);
@ -2169,6 +2169,11 @@ void clusterSendPing(clusterLink *link, int type) {
* already, so we just gossip about other nodes. */
if (this == myself) continue;
/* Give a bias to FAIL/PFAIL nodes. */
if (maxiterations > wanted*2 &&
!(this->flags & (REDIS_NODE_PFAIL|REDIS_NODE_FAIL)))
continue;
/* In the gossip section don't include:
* 1) Nodes in HANDSHAKE state.
* 3) Nodes with the NOADDR flag set.
@ -2201,8 +2206,6 @@ void clusterSendPing(clusterLink *link, int type) {
gossip->notused2 = 0;
gossipcount++;
}
redisLog(REDIS_VERBOSE,"WANTED: %d, USED_ITER: %d, GOSSIPCOUNT: %d",
wanted, wanted*2-maxiterations, gossipcount);
/* Ready to send... fix the totlen fiend and queue the message in the
* output buffer. */