Don't stutter in send_diffs
When the master sees a burst of updates (perhaps sustained), the slaves "I_HAVE" messages can fall behind the version we've already sent, and the unpatched code would retransmit already sent diffs! This can result in substantial amplification (in a local test, 3000 ops turned into 427,000 ops). Though the number of *messages* sent was actually somewhat smaller, the ever growing message size ultimately leads to failure.
This commit is contained in:

committed by
Nico Williams

parent
2709f28a1b
commit
7680c92047
@@ -129,6 +129,7 @@ struct slave {
|
||||
krb5_auth_context ac;
|
||||
uint32_t version;
|
||||
uint32_t version_tstamp;
|
||||
uint32_t version_ack;
|
||||
time_t seen;
|
||||
unsigned long flags;
|
||||
#define SLAVE_F_DEAD 0x1
|
||||
@@ -305,6 +306,7 @@ add_slave (krb5_context context, krb5_keytab keytab, slave **root,
|
||||
krb5_warnx (context, "connection from %s", s->name);
|
||||
|
||||
s->version = 0;
|
||||
s->version_ack = 0;
|
||||
s->flags = 0;
|
||||
slave_seen(s);
|
||||
s->next = *root;
|
||||
@@ -698,7 +700,7 @@ send_diffs (kadm5_server_context *server_context, slave *s, int log_fd,
|
||||
krb5_warn(context, ret, "send_diffs: failed to send to slave");
|
||||
slave_dead(context, s);
|
||||
}
|
||||
krb5_warnx(context, "slave %s in sync already at version %ld",
|
||||
krb5_warnx(context, "slave %s version %ld already sent",
|
||||
s->name, (long)s->version);
|
||||
}
|
||||
return ret;
|
||||
@@ -903,8 +905,8 @@ process_msg (kadm5_server_context *server_context, slave *s, int log_fd,
|
||||
/* new started slave that have old log */
|
||||
if (s->version == 0 && tmp != 0) {
|
||||
if (current_version < tmp) {
|
||||
krb5_warnx(context, "Slave %s (version %u) have later version "
|
||||
"the master (version %u) OUT OF SYNC",
|
||||
krb5_warnx(context, "Slave %s (version %u) has later version "
|
||||
"than the master (version %u) OUT OF SYNC",
|
||||
s->name, tmp, current_version);
|
||||
}
|
||||
if (verbose)
|
||||
@@ -912,11 +914,8 @@ process_msg (kadm5_server_context *server_context, slave *s, int log_fd,
|
||||
s->name, s->version, tmp);
|
||||
s->version = tmp;
|
||||
}
|
||||
if (tmp < s->version) {
|
||||
krb5_warnx(context, "Slave %s claims to not have "
|
||||
"version we already sent to it", s->name);
|
||||
s->version = tmp;
|
||||
}
|
||||
if ((s->version_ack = tmp) < s->version)
|
||||
break;
|
||||
ret = send_diffs(server_context, s, log_fd, database, current_version,
|
||||
current_tstamp);
|
||||
break;
|
||||
@@ -1031,7 +1030,7 @@ write_stats(krb5_context context, slave *slaves, uint32_t current_version)
|
||||
} else
|
||||
rtbl_add_column_entry(tbl, SLAVE_ADDRESS, "<unknown>");
|
||||
|
||||
snprintf(str, sizeof(str), "%u", (unsigned)slaves->version);
|
||||
snprintf(str, sizeof(str), "%u", (unsigned)slaves->version_ack);
|
||||
rtbl_add_column_entry(tbl, SLAVE_VERSION, str);
|
||||
|
||||
if (slaves->flags & SLAVE_F_DEAD)
|
||||
|
Reference in New Issue
Block a user