krb5_sendto_kdc: failover for multiple AAAA/A RRs on one domain
We found that the libraries behaviour when dealing with domains with more than one entry in them is slightly suboptimal. The situation was kdc1 IN A 1.2.3.4 kdc1 IN AAAA ff02::1 I.e. a single hostmame with both IPv6 and IPv4 addresses. When we run krb5_sendto_kdc on a box with only IPv4 addresses, there is a 3s delay before it fails back to the IPv4 address. This is because the library sets the 2nd address on each hostname to be 3s in the future and each additional one another 3s. We change wait_response() s.t. if one is able to make progress, we iterate over the list of hosts and move them all 1s forward. We also modify submit_request() to skip hosts if host_connect() fails.
This commit is contained in:
		 Roland C. Dowdeswell
					Roland C. Dowdeswell
				
			
				
					committed by
					
						 Jeffrey Altman
						Jeffrey Altman
					
				
			
			
				
	
			
			
			 Jeffrey Altman
						Jeffrey Altman
					
				
			
						parent
						
							5ca229e0d9
						
					
				
				
					commit
					d497d7e4a7
				
			| @@ -329,6 +329,7 @@ static void | ||||
| debug_host(krb5_context context, int level, struct host *host, const char *fmt, ...) | ||||
| { | ||||
|     const char *proto = "unknown"; | ||||
|     const char *state; | ||||
|     char name[NI_MAXHOST], port[NI_MAXSERV]; | ||||
|     char *text = NULL; | ||||
|     va_list ap; | ||||
| @@ -354,8 +355,17 @@ debug_host(krb5_context context, int level, struct host *host, const char *fmt, | ||||
| 		    name, sizeof(name), port, sizeof(port), NI_NUMERICHOST) != 0) | ||||
| 	name[0] = '\0'; | ||||
|  | ||||
|     _krb5_debug(context, level, "%s: %s %s:%s (%s) tid: %08x", text, | ||||
| 		proto, name, port, host->hi->hostname, host->tid); | ||||
|     switch (host->state) { | ||||
|     case CONNECT:	state = "CONNECT";		break; | ||||
|     case CONNECTING:	state = "CONNECTING";		break; | ||||
|     case CONNECTED:	state = "CONNECTED";		break; | ||||
|     case WAITING_REPLY:	state = "WAITING_REPLY";	break; | ||||
|     case DEAD:		state = "DEAD";			break; | ||||
|     default:		state = "unknown";		break; | ||||
|     } | ||||
|  | ||||
|     _krb5_debug(context, level, "%s: %s %s:%s (%s) state=%s tid: %08x", text, | ||||
| 		proto, name, port, host->hi->hostname, state, host->tid); | ||||
|     free(text); | ||||
| } | ||||
|  | ||||
| @@ -896,11 +906,18 @@ submit_request(krb5_context context, krb5_sendto_ctx ctx, krb5_krbhst_info *hi) | ||||
| 	host->tries = host->fun->ntries; | ||||
|  | ||||
| 	/* | ||||
| 	 * Connect directly next host, wait a host_timeout for each next address | ||||
| 	 * Connect directly next host, wait a host_timeout for each next address. | ||||
| 	 * We try host_connect() here, checking the return code because as we do | ||||
| 	 * non-blocking connects, any error here indicates that the address is just | ||||
| 	 * offline.  That is, it's something like "No route to host" which is not | ||||
| 	 * worth retrying.  And so, we fail directly and immediately to the next | ||||
| 	 * address for this host without enqueueing the address for retries. | ||||
| 	 */ | ||||
| 	if (submitted_host == 0) | ||||
| 	if (submitted_host == 0) { | ||||
| 	    host_connect(context, ctx, host); | ||||
| 	else { | ||||
| 	    if (host->state == DEAD) | ||||
| 		continue; | ||||
| 	} else { | ||||
| 	    debug_host(context, 5, host, | ||||
| 		       "Queuing host in future (in %ds), its the %lu address on the same name", | ||||
| 		       (int)(context->host_timeout * submitted_host), submitted_host + 1); | ||||
| @@ -908,16 +925,14 @@ submit_request(krb5_context context, krb5_sendto_ctx ctx, krb5_krbhst_info *hi) | ||||
| 	} | ||||
|  | ||||
| 	heim_array_append_value(ctx->hosts, host); | ||||
|  | ||||
| 	heim_release(host); | ||||
|  | ||||
| 	submitted_host++; | ||||
|     } | ||||
|  | ||||
|     if (freeai) | ||||
| 	freeaddrinfo(ai); | ||||
|  | ||||
|     if (!submitted_host) | ||||
|     if (submitted_host == 0) | ||||
| 	return KRB5_KDC_UNREACH; | ||||
|  | ||||
|     return 0; | ||||
| @@ -928,7 +943,7 @@ struct wait_ctx { | ||||
|     krb5_sendto_ctx ctx; | ||||
|     fd_set rfds; | ||||
|     fd_set wfds; | ||||
|     unsigned max_fd; | ||||
|     int max_fd; | ||||
|     int got_reply; | ||||
|     time_t timenow; | ||||
| }; | ||||
| @@ -939,16 +954,16 @@ wait_setup(heim_object_t obj, void *iter_ctx, int *stop) | ||||
|     struct wait_ctx *wait_ctx = iter_ctx; | ||||
|     struct host *h = (struct host *)obj; | ||||
|  | ||||
|     if (h->state == CONNECT) { | ||||
| 	if (h->timeout >= wait_ctx->timenow) | ||||
| 	    return; | ||||
| 	host_connect(wait_ctx->context, wait_ctx->ctx, h); | ||||
|     } | ||||
|  | ||||
|     /* skip dead hosts */ | ||||
|     if (h->state == DEAD) | ||||
| 	return; | ||||
|  | ||||
|     if (h->state == CONNECT) { | ||||
| 	if (h->timeout < wait_ctx->timenow) | ||||
| 	    host_connect(wait_ctx->context, wait_ctx->ctx, h); | ||||
| 	return; | ||||
|     } | ||||
|  | ||||
|     /* if host timed out, dec tries and (retry or kill host) */ | ||||
|     if (h->timeout < wait_ctx->timenow) { | ||||
| 	heim_assert(h->tries != 0, "tries should not reach 0"); | ||||
| @@ -976,6 +991,7 @@ wait_setup(heim_object_t obj, void *iter_ctx, int *stop) | ||||
| 	FD_SET(h->fd, &wait_ctx->wfds); | ||||
| 	break; | ||||
|     default: | ||||
| 	debug_host(wait_ctx->context, 5, h, "invalid sendto host state"); | ||||
| 	heim_abort("invalid sendto host state"); | ||||
|     } | ||||
|     if (h->fd > wait_ctx->max_fd) | ||||
| @@ -989,6 +1005,15 @@ wait_filter_dead(heim_object_t obj, void *ctx) | ||||
|     return (int)((h->state == DEAD) ? true : false); | ||||
| } | ||||
|  | ||||
| static void | ||||
| wait_accelerate(heim_object_t obj, void *ctx, int *stop) | ||||
| { | ||||
|     struct host *h = (struct host *)obj; | ||||
|  | ||||
|     if (h->state == CONNECT && h->timeout > 0) | ||||
| 	h->timeout--; | ||||
| } | ||||
|  | ||||
| static void | ||||
| wait_process(heim_object_t obj, void *ctx, int *stop) | ||||
| { | ||||
| @@ -1022,7 +1047,7 @@ wait_response(krb5_context context, int *action, krb5_sendto_ctx ctx) | ||||
|     wait_ctx.ctx = ctx; | ||||
|     FD_ZERO(&wait_ctx.rfds); | ||||
|     FD_ZERO(&wait_ctx.wfds); | ||||
|     wait_ctx.max_fd = 0; | ||||
|     wait_ctx.max_fd = -1; | ||||
|  | ||||
|     /* oh, we have a reply, it must be a plugin that got it for us */ | ||||
|     if (ctx->response.length) { | ||||
| @@ -1048,6 +1073,17 @@ wait_response(krb5_context context, int *action, krb5_sendto_ctx ctx) | ||||
| 	return 0; | ||||
|     } | ||||
|  | ||||
|     if (wait_ctx.max_fd == -1) { | ||||
| 	/* | ||||
| 	 * If we don't find a host which can make progress, then | ||||
| 	 * we accelerate the process by moving all of the contestants | ||||
| 	 * up by 1s. | ||||
| 	 */ | ||||
| 	_krb5_debug(context, 5, "wait_response: moving the contestants forward"); | ||||
| 	heim_array_iterate_f(ctx->hosts, &wait_ctx, wait_accelerate); | ||||
| 	return 0; | ||||
|     } | ||||
|  | ||||
|     tv.tv_sec = 1; | ||||
|     tv.tv_usec = 0; | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user