Don't stutter in send_diffs
When the master sees a burst of updates (perhaps sustained), the slaves "I_HAVE" messages can fall behind the version we've already sent, and the unpatched code would retransmit already sent diffs! This can result in substantial amplification (in a local test, 3000 ops turned into 427,000 ops). Though the number of *messages* sent was actually somewhat smaller, the ever growing message size ultimately leads to failure.
This commit is contained in:

committed by
Nico Williams

parent
2709f28a1b
commit
7680c92047
@@ -129,6 +129,7 @@ struct slave {
|
|||||||
krb5_auth_context ac;
|
krb5_auth_context ac;
|
||||||
uint32_t version;
|
uint32_t version;
|
||||||
uint32_t version_tstamp;
|
uint32_t version_tstamp;
|
||||||
|
uint32_t version_ack;
|
||||||
time_t seen;
|
time_t seen;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
#define SLAVE_F_DEAD 0x1
|
#define SLAVE_F_DEAD 0x1
|
||||||
@@ -305,6 +306,7 @@ add_slave (krb5_context context, krb5_keytab keytab, slave **root,
|
|||||||
krb5_warnx (context, "connection from %s", s->name);
|
krb5_warnx (context, "connection from %s", s->name);
|
||||||
|
|
||||||
s->version = 0;
|
s->version = 0;
|
||||||
|
s->version_ack = 0;
|
||||||
s->flags = 0;
|
s->flags = 0;
|
||||||
slave_seen(s);
|
slave_seen(s);
|
||||||
s->next = *root;
|
s->next = *root;
|
||||||
@@ -698,7 +700,7 @@ send_diffs (kadm5_server_context *server_context, slave *s, int log_fd,
|
|||||||
krb5_warn(context, ret, "send_diffs: failed to send to slave");
|
krb5_warn(context, ret, "send_diffs: failed to send to slave");
|
||||||
slave_dead(context, s);
|
slave_dead(context, s);
|
||||||
}
|
}
|
||||||
krb5_warnx(context, "slave %s in sync already at version %ld",
|
krb5_warnx(context, "slave %s version %ld already sent",
|
||||||
s->name, (long)s->version);
|
s->name, (long)s->version);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
@@ -903,8 +905,8 @@ process_msg (kadm5_server_context *server_context, slave *s, int log_fd,
|
|||||||
/* new started slave that have old log */
|
/* new started slave that have old log */
|
||||||
if (s->version == 0 && tmp != 0) {
|
if (s->version == 0 && tmp != 0) {
|
||||||
if (current_version < tmp) {
|
if (current_version < tmp) {
|
||||||
krb5_warnx(context, "Slave %s (version %u) have later version "
|
krb5_warnx(context, "Slave %s (version %u) has later version "
|
||||||
"the master (version %u) OUT OF SYNC",
|
"than the master (version %u) OUT OF SYNC",
|
||||||
s->name, tmp, current_version);
|
s->name, tmp, current_version);
|
||||||
}
|
}
|
||||||
if (verbose)
|
if (verbose)
|
||||||
@@ -912,11 +914,8 @@ process_msg (kadm5_server_context *server_context, slave *s, int log_fd,
|
|||||||
s->name, s->version, tmp);
|
s->name, s->version, tmp);
|
||||||
s->version = tmp;
|
s->version = tmp;
|
||||||
}
|
}
|
||||||
if (tmp < s->version) {
|
if ((s->version_ack = tmp) < s->version)
|
||||||
krb5_warnx(context, "Slave %s claims to not have "
|
break;
|
||||||
"version we already sent to it", s->name);
|
|
||||||
s->version = tmp;
|
|
||||||
}
|
|
||||||
ret = send_diffs(server_context, s, log_fd, database, current_version,
|
ret = send_diffs(server_context, s, log_fd, database, current_version,
|
||||||
current_tstamp);
|
current_tstamp);
|
||||||
break;
|
break;
|
||||||
@@ -1031,7 +1030,7 @@ write_stats(krb5_context context, slave *slaves, uint32_t current_version)
|
|||||||
} else
|
} else
|
||||||
rtbl_add_column_entry(tbl, SLAVE_ADDRESS, "<unknown>");
|
rtbl_add_column_entry(tbl, SLAVE_ADDRESS, "<unknown>");
|
||||||
|
|
||||||
snprintf(str, sizeof(str), "%u", (unsigned)slaves->version);
|
snprintf(str, sizeof(str), "%u", (unsigned)slaves->version_ack);
|
||||||
rtbl_add_column_entry(tbl, SLAVE_VERSION, str);
|
rtbl_add_column_entry(tbl, SLAVE_VERSION, str);
|
||||||
|
|
||||||
if (slaves->flags & SLAVE_F_DEAD)
|
if (slaves->flags & SLAVE_F_DEAD)
|
||||||
|
Reference in New Issue
Block a user