krb5_sendto_kdc: failover for multiple AAAA/A RRs on one domain

We found that the libraries behaviour when dealing with domains with
more than one entry in them is slightly suboptimal.  The situation
was

kdc1		IN	A	1.2.3.4
kdc1		IN	AAAA	ff02::1

I.e. a single hostmame with both IPv6 and IPv4 addresses.  When we
run krb5_sendto_kdc on a box with only IPv4 addresses, there is a
3s delay before it fails back to the IPv4 address.  This is because
the library sets the 2nd address on each hostname to be 3s in the
future and each additional one another 3s.

We change wait_response() s.t. if one is able to make progress, we
iterate over the list of hosts and move them all 1s forward.  We
also modify submit_request() to skip hosts if host_connect() fails.
This commit is contained in:
Roland C. Dowdeswell
2019-05-10 19:13:26 +01:00
committed by Jeffrey Altman
parent 5ca229e0d9
commit d497d7e4a7

View File

@@ -329,6 +329,7 @@ static void
debug_host(krb5_context context, int level, struct host *host, const char *fmt, ...) debug_host(krb5_context context, int level, struct host *host, const char *fmt, ...)
{ {
const char *proto = "unknown"; const char *proto = "unknown";
const char *state;
char name[NI_MAXHOST], port[NI_MAXSERV]; char name[NI_MAXHOST], port[NI_MAXSERV];
char *text = NULL; char *text = NULL;
va_list ap; va_list ap;
@@ -354,8 +355,17 @@ debug_host(krb5_context context, int level, struct host *host, const char *fmt,
name, sizeof(name), port, sizeof(port), NI_NUMERICHOST) != 0) name, sizeof(name), port, sizeof(port), NI_NUMERICHOST) != 0)
name[0] = '\0'; name[0] = '\0';
_krb5_debug(context, level, "%s: %s %s:%s (%s) tid: %08x", text, switch (host->state) {
proto, name, port, host->hi->hostname, host->tid); case CONNECT: state = "CONNECT"; break;
case CONNECTING: state = "CONNECTING"; break;
case CONNECTED: state = "CONNECTED"; break;
case WAITING_REPLY: state = "WAITING_REPLY"; break;
case DEAD: state = "DEAD"; break;
default: state = "unknown"; break;
}
_krb5_debug(context, level, "%s: %s %s:%s (%s) state=%s tid: %08x", text,
proto, name, port, host->hi->hostname, state, host->tid);
free(text); free(text);
} }
@@ -896,11 +906,18 @@ submit_request(krb5_context context, krb5_sendto_ctx ctx, krb5_krbhst_info *hi)
host->tries = host->fun->ntries; host->tries = host->fun->ntries;
/* /*
* Connect directly next host, wait a host_timeout for each next address * Connect directly next host, wait a host_timeout for each next address.
* We try host_connect() here, checking the return code because as we do
* non-blocking connects, any error here indicates that the address is just
* offline. That is, it's something like "No route to host" which is not
* worth retrying. And so, we fail directly and immediately to the next
* address for this host without enqueueing the address for retries.
*/ */
if (submitted_host == 0) if (submitted_host == 0) {
host_connect(context, ctx, host); host_connect(context, ctx, host);
else { if (host->state == DEAD)
continue;
} else {
debug_host(context, 5, host, debug_host(context, 5, host,
"Queuing host in future (in %ds), its the %lu address on the same name", "Queuing host in future (in %ds), its the %lu address on the same name",
(int)(context->host_timeout * submitted_host), submitted_host + 1); (int)(context->host_timeout * submitted_host), submitted_host + 1);
@@ -908,16 +925,14 @@ submit_request(krb5_context context, krb5_sendto_ctx ctx, krb5_krbhst_info *hi)
} }
heim_array_append_value(ctx->hosts, host); heim_array_append_value(ctx->hosts, host);
heim_release(host); heim_release(host);
submitted_host++; submitted_host++;
} }
if (freeai) if (freeai)
freeaddrinfo(ai); freeaddrinfo(ai);
if (!submitted_host) if (submitted_host == 0)
return KRB5_KDC_UNREACH; return KRB5_KDC_UNREACH;
return 0; return 0;
@@ -928,7 +943,7 @@ struct wait_ctx {
krb5_sendto_ctx ctx; krb5_sendto_ctx ctx;
fd_set rfds; fd_set rfds;
fd_set wfds; fd_set wfds;
unsigned max_fd; int max_fd;
int got_reply; int got_reply;
time_t timenow; time_t timenow;
}; };
@@ -939,16 +954,16 @@ wait_setup(heim_object_t obj, void *iter_ctx, int *stop)
struct wait_ctx *wait_ctx = iter_ctx; struct wait_ctx *wait_ctx = iter_ctx;
struct host *h = (struct host *)obj; struct host *h = (struct host *)obj;
if (h->state == CONNECT) {
if (h->timeout >= wait_ctx->timenow)
return;
host_connect(wait_ctx->context, wait_ctx->ctx, h);
}
/* skip dead hosts */ /* skip dead hosts */
if (h->state == DEAD) if (h->state == DEAD)
return; return;
if (h->state == CONNECT) {
if (h->timeout < wait_ctx->timenow)
host_connect(wait_ctx->context, wait_ctx->ctx, h);
return;
}
/* if host timed out, dec tries and (retry or kill host) */ /* if host timed out, dec tries and (retry or kill host) */
if (h->timeout < wait_ctx->timenow) { if (h->timeout < wait_ctx->timenow) {
heim_assert(h->tries != 0, "tries should not reach 0"); heim_assert(h->tries != 0, "tries should not reach 0");
@@ -976,6 +991,7 @@ wait_setup(heim_object_t obj, void *iter_ctx, int *stop)
FD_SET(h->fd, &wait_ctx->wfds); FD_SET(h->fd, &wait_ctx->wfds);
break; break;
default: default:
debug_host(wait_ctx->context, 5, h, "invalid sendto host state");
heim_abort("invalid sendto host state"); heim_abort("invalid sendto host state");
} }
if (h->fd > wait_ctx->max_fd) if (h->fd > wait_ctx->max_fd)
@@ -989,6 +1005,15 @@ wait_filter_dead(heim_object_t obj, void *ctx)
return (int)((h->state == DEAD) ? true : false); return (int)((h->state == DEAD) ? true : false);
} }
static void
wait_accelerate(heim_object_t obj, void *ctx, int *stop)
{
struct host *h = (struct host *)obj;
if (h->state == CONNECT && h->timeout > 0)
h->timeout--;
}
static void static void
wait_process(heim_object_t obj, void *ctx, int *stop) wait_process(heim_object_t obj, void *ctx, int *stop)
{ {
@@ -1022,7 +1047,7 @@ wait_response(krb5_context context, int *action, krb5_sendto_ctx ctx)
wait_ctx.ctx = ctx; wait_ctx.ctx = ctx;
FD_ZERO(&wait_ctx.rfds); FD_ZERO(&wait_ctx.rfds);
FD_ZERO(&wait_ctx.wfds); FD_ZERO(&wait_ctx.wfds);
wait_ctx.max_fd = 0; wait_ctx.max_fd = -1;
/* oh, we have a reply, it must be a plugin that got it for us */ /* oh, we have a reply, it must be a plugin that got it for us */
if (ctx->response.length) { if (ctx->response.length) {
@@ -1048,6 +1073,17 @@ wait_response(krb5_context context, int *action, krb5_sendto_ctx ctx)
return 0; return 0;
} }
if (wait_ctx.max_fd == -1) {
/*
* If we don't find a host which can make progress, then
* we accelerate the process by moving all of the contestants
* up by 1s.
*/
_krb5_debug(context, 5, "wait_response: moving the contestants forward");
heim_array_iterate_f(ctx->hosts, &wait_ctx, wait_accelerate);
return 0;
}
tv.tv_sec = 1; tv.tv_sec = 1;
tv.tv_usec = 0; tv.tv_usec = 0;