ipropd-master/slave: enhancements and bug fixes

- fix int/uint confusion and use unsigned integral types for time
 - improve messages
 - add --verbose option
 - attempt transaction recovery in ipropd-master during idle times
 - begin hardening daemons against dying at the slightest provocation
 - better recovery from various errors
 - daemons now restart automatically in most of the many error cases
   where the daemons still die
This commit is contained in:
Nicolas Williams
2016-02-03 17:35:16 -06:00
parent 20df2c8706
commit ebc1ad34ba
5 changed files with 531 additions and 254 deletions

View File

@@ -68,4 +68,13 @@ enum iprop_cmd { I_HAVE = 1,
extern sig_atomic_t exit_flag; extern sig_atomic_t exit_flag;
void setup_signal(void); void setup_signal(void);
enum ipropd_exit_code {
IPROPD_DONE = 0,
IPROPD_RESTART = 1,
IPROPD_RESTART_SLOW = 2,
IPROPD_FATAL = 3,
};
int restarter(krb5_context, size_t *);
#endif /* __IPROP_H__ */ #endif /* __IPROP_H__ */

View File

@@ -32,7 +32,11 @@
*/ */
#include "iprop.h" #include "iprop.h"
RCSID("$Id$");
#if defined(HAVE_FORK) && defined(HAVE_WAITPID)
#include <sys/types.h>
#include <sys/wait.h>
#endif
sig_atomic_t exit_flag; sig_atomic_t exit_flag;
@@ -71,3 +75,191 @@ setup_signal(void)
#endif #endif
#endif #endif
} }
/*
* Fork a child to run the service, and restart it if it dies.
*
* Returns -1 if not supported, else a file descriptor that the service
* should select() for. Any events on that file descriptor should cause
* the caller to exit immediately, as that means that the restarter
* exited.
*
* The service's normal exit status values should be should be taken
* from enum ipropd_exit_code. IPROPD_FATAL causes the restarter to
* stop restarting the service and to exit.
*
* A count of restarts is output via the `countp' argument, if it is
* non-NULL. This is useful for testing this function (e.g., kill the
* restarter after N restarts and check that the child gets the signal
* sent to it).
*
* This requires fork() and waitpid() (otherwise returns -1). Ignoring
* SIGCHLD, of course, would be bad.
*
* We could support this on Windows by spawning a child with mostly the
* same arguments as the restarter process.
*/
int
restarter(krb5_context context, size_t *countp)
{
#if defined(HAVE_FORK) && defined(HAVE_WAITPID)
struct timeval tmout;
pid_t pid;
pid_t wpid = -1;
int status;
int fds[2];
int fds2[2];
size_t count = 0;
fd_set readset;
fds[0] = -1;
fds[1] = -1;
fds2[0] = -1;
fds2[1] = -1;
signal(SIGCHLD, SIG_DFL);
while (!exit_flag) {
/* Close the pipe ends we keep open */
if (fds[1] != -1)
(void) close(fds[1]);
if (fds2[0] != -1)
(void) close(fds2[1]);
/* A pipe so the child can detect the parent's death */
if (pipe(fds) == -1) {
krb5_err(context, 1, errno,
"Could not setup pipes in service restarter");
}
/* A pipe so the parent can detect the child's death */
if (pipe(fds2) == -1) {
krb5_err(context, 1, errno,
"Could not setup pipes in service restarter");
}
fflush(stdout);
fflush(stderr);
pid = fork();
if (pid == -1)
krb5_err(context, 1, errno, "Could not fork in service restarter");
if (pid == 0) {
if (countp != NULL)
*countp = count;
(void) close(fds[1]);
(void) close(fds2[0]);
return fds[0];
}
count++;
(void) close(fds[0]);
(void) close(fds2[1]);
do {
wpid = waitpid(pid, &status, 0);
} while (wpid == -1 && errno == EINTR && !exit_flag);
if (wpid == -1 && errno == EINTR)
break; /* We were signaled; gotta kill the child and exit */
if (wpid == -1) {
if (errno != ECHILD) {
warn("waitpid() failed; killing restarter's child process");
kill(pid, SIGTERM);
}
krb5_err(context, 1, errno, "restarter failed waiting for child");
}
assert(wpid == pid);
wpid = -1;
pid = -1;
if (WIFEXITED(status)) {
switch (WEXITSTATUS(status)) {
case IPROPD_DONE:
exit(0);
case IPROPD_RESTART_SLOW:
if (exit_flag)
exit(1);
krb5_warnx(context, "Waiting 2 minutes to restart");
sleep(120);
continue;
case IPROPD_FATAL:
krb5_errx(context, WEXITSTATUS(status),
"Sockets and pipes not supported for "
"iprop log files");
case IPROPD_RESTART:
default:
if (exit_flag)
exit(1);
/* Add exponential backoff (with max backoff)? */
krb5_warnx(context, "Waiting 30 seconds to restart");
sleep(30);
continue;
}
}
/* else */
krb5_warnx(context, "Child was killed; waiting 30 seconds to restart");
sleep(30);
}
if (pid == -1)
exit(0); /* No dead child to reap; done */
assert(pid > 0);
if (wpid != pid) {
warnx("Interrupted; killing child (pid %ld) with %d",
(long)pid, exit_flag);
krb5_warnx(context, "Interrupted; killing child (pid %ld) with %d",
(long)pid, exit_flag);
kill(pid, exit_flag);
/* Wait up to one second for the child */
tmout.tv_sec = 1;
tmout.tv_usec = 0;
FD_ZERO(&readset);
FD_SET(fds2[0], &readset);
/* We don't care why select() returns */
(void) select(fds2[0] + 1, &readset, NULL, NULL, &tmout);
/*
* We haven't reaped the child yet; if it's a zombie, then
* SIGKILLing it won't hurt. If it's not a zombie yet, well,
* we're out of patience.
*/
kill(pid, SIGKILL);
do {
wpid = waitpid(pid, &status, 0);
} while (wpid != pid && errno == EINTR);
if (wpid == -1)
krb5_err(context, 1, errno, "restarter failed waiting for child");
}
/* Finally, the child is dead and reaped */
if (WIFEXITED(status))
exit(WEXITSTATUS(status));
if (WIFSIGNALED(status)) {
switch (WTERMSIG(status)) {
case SIGTERM:
case SIGXCPU:
case SIGINT:
exit(0);
default:
/*
* Attempt to set the same exit status for the parent as for
* the child.
*/
kill(getpid(), WTERMSIG(status));
/*
* We can get past the self-kill if we inherited a SIG_IGN
* disposition that the child reset to SIG_DFL.
*/
}
}
exit(1);
#else
if (countp != NULL)
*countp = 0;
errno = ENOTSUP;
return -1;
#endif
}

View File

@@ -36,6 +36,8 @@
static krb5_log_facility *log_facility; static krb5_log_facility *log_facility;
static int verbose;
const char *slave_stats_file; const char *slave_stats_file;
const char *slave_time_missing = "2 min"; const char *slave_time_missing = "2 min";
const char *slave_time_gone = "5 min"; const char *slave_time_gone = "5 min";
@@ -125,7 +127,7 @@ struct slave {
char *name; char *name;
krb5_auth_context ac; krb5_auth_context ac;
uint32_t version; uint32_t version;
int32_t version_tstamp; uint32_t version_tstamp;
time_t seen; time_t seen;
unsigned long flags; unsigned long flags;
#define SLAVE_F_DEAD 0x1 #define SLAVE_F_DEAD 0x1
@@ -331,7 +333,7 @@ dump_one (krb5_context context, HDB *db, hdb_entry_ex *entry, void *v)
ret = ENOMEM; ret = ENOMEM;
goto done; goto done;
} }
krb5_store_int32(sp, ONE_PRINC); krb5_store_uint32(sp, ONE_PRINC);
krb5_storage_free(sp); krb5_storage_free(sp);
ret = krb5_store_data(dump, data); ret = krb5_store_data(dump, data);
@@ -372,15 +374,15 @@ write_dump (krb5_context context, krb5_storage *dump,
ret = hdb_create (context, &db, database); ret = hdb_create (context, &db, database);
if (ret) if (ret)
krb5_err (context, 1, ret, "hdb_create: %s", database); krb5_err (context, IPROPD_RESTART, ret, "hdb_create: %s", database);
ret = db->hdb_open (context, db, O_RDONLY, 0); ret = db->hdb_open (context, db, O_RDONLY, 0);
if (ret) if (ret)
krb5_err (context, 1, ret, "db->open"); krb5_err (context, IPROPD_RESTART, ret, "db->open");
sp = krb5_storage_from_mem (buf, 4); sp = krb5_storage_from_mem (buf, 4);
if (sp == NULL) if (sp == NULL)
krb5_errx (context, 1, "krb5_storage_from_mem"); krb5_errx (context, IPROPD_RESTART, "krb5_storage_from_mem");
krb5_store_int32 (sp, TELL_YOU_EVERYTHING); krb5_store_uint32 (sp, TELL_YOU_EVERYTHING);
krb5_storage_free (sp); krb5_storage_free (sp);
data.data = buf; data.data = buf;
@@ -403,9 +405,9 @@ write_dump (krb5_context context, krb5_storage *dump,
sp = krb5_storage_from_mem (buf, 8); sp = krb5_storage_from_mem (buf, 8);
if (sp == NULL) if (sp == NULL)
krb5_errx (context, 1, "krb5_storage_from_mem"); krb5_errx (context, IPROPD_RESTART, "krb5_storage_from_mem");
krb5_store_int32 (sp, NOW_YOU_HAVE); krb5_store_uint32 (sp, NOW_YOU_HAVE);
krb5_store_int32 (sp, current_version); krb5_store_uint32 (sp, current_version);
krb5_storage_free (sp); krb5_storage_free (sp);
data.length = 8; data.length = 8;
@@ -459,7 +461,7 @@ write_dump (krb5_context context, krb5_storage *dump,
static int static int
send_complete (krb5_context context, slave *s, const char *database, send_complete (krb5_context context, slave *s, const char *database,
uint32_t current_version, uint32_t oldest_version, uint32_t current_version, uint32_t oldest_version,
int32_t initial_log_tstamp) uint32_t initial_log_tstamp)
{ {
krb5_error_code ret; krb5_error_code ret;
krb5_storage *dump = NULL; krb5_storage *dump = NULL;
@@ -527,6 +529,9 @@ send_complete (krb5_context context, slave *s, const char *database,
vno >= oldest_version && vno <= current_version) vno >= oldest_version && vno <= current_version)
break; break;
if (verbose)
krb5_warnx(context, "send_complete: dumping HDB");
/* /*
* Otherwise, we may need to write a new dump file. We * Otherwise, we may need to write a new dump file. We
* obtain an exclusive lock on the fd. Because this is * obtain an exclusive lock on the fd. Because this is
@@ -561,7 +566,7 @@ send_complete (krb5_context context, slave *s, const char *database,
if (fstat(fd, &st) == -1) { if (fstat(fd, &st) == -1) {
ret = errno; ret = errno;
krb5_warn(context, ret, "write_dump: could not stat dump file"); krb5_warn(context, ret, "send_complete: could not stat dump file");
goto done; goto done;
} }
@@ -648,7 +653,7 @@ send_are_you_there (krb5_context context, slave *s)
slave_dead(context, s); slave_dead(context, s);
return 1; return 1;
} }
krb5_store_int32 (sp, ARE_YOU_THERE); krb5_store_uint32 (sp, ARE_YOU_THERE);
krb5_storage_free (sp); krb5_storage_free (sp);
ret = krb5_write_priv_message(context, s->ac, &s->fd, &data); ret = krb5_write_priv_message(context, s->ac, &s->fd, &data);
@@ -665,12 +670,12 @@ send_are_you_there (krb5_context context, slave *s)
static int static int
send_diffs (kadm5_server_context *server_context, slave *s, int log_fd, send_diffs (kadm5_server_context *server_context, slave *s, int log_fd,
const char *database, uint32_t current_version, const char *database, uint32_t current_version,
int32_t current_tstamp) uint32_t current_tstamp)
{ {
krb5_context context = server_context->context; krb5_context context = server_context->context;
krb5_storage *sp; krb5_storage *sp;
uint32_t ver, initial_version, initial_version2; uint32_t ver, initial_version, initial_version2;
int32_t initial_tstamp, initial_tstamp2; uint32_t initial_tstamp, initial_tstamp2;
enum kadm_ops op; enum kadm_ops op;
uint32_t len; uint32_t len;
off_t right, left; off_t right, left;
@@ -679,7 +684,7 @@ send_diffs (kadm5_server_context *server_context, slave *s, int log_fd,
int ret = 0; int ret = 0;
if (s->flags & SLAVE_F_DEAD) { if (s->flags & SLAVE_F_DEAD) {
krb5_warnx(context, "not sending diffs to a dead slave"); krb5_warnx(context, "not sending diffs to dead slave %s", s->name);
return 0; return 0;
} }
@@ -688,8 +693,8 @@ send_diffs (kadm5_server_context *server_context, slave *s, int log_fd,
sp = krb5_storage_from_mem(buf, 4); sp = krb5_storage_from_mem(buf, 4);
if (sp == NULL) if (sp == NULL)
krb5_errx(context, 1, "krb5_storage_from_mem"); krb5_errx(context, IPROPD_RESTART, "krb5_storage_from_mem");
krb5_store_int32(sp, YOU_HAVE_LAST_VERSION); krb5_store_uint32(sp, YOU_HAVE_LAST_VERSION);
krb5_storage_free(sp); krb5_storage_free(sp);
data.data = buf; data.data = buf;
data.length = 4; data.length = 4;
@@ -703,7 +708,8 @@ send_diffs (kadm5_server_context *server_context, slave *s, int log_fd,
return ret; return ret;
} }
krb5_warnx(context, "sending diffs to a live-seeming slave"); if (verbose)
krb5_warnx(context, "sending diffs to live-seeming slave %s", s->name);
/* /*
* XXX The code that makes the diffs should be made a separate function, * XXX The code that makes the diffs should be made a separate function,
@@ -748,7 +754,7 @@ send_diffs (kadm5_server_context *server_context, slave *s, int log_fd,
for (;;) { for (;;) {
ret = kadm5_log_previous (context, sp, &ver, NULL, &op, &len); ret = kadm5_log_previous (context, sp, &ver, NULL, &op, &len);
if (ret) if (ret)
krb5_err(context, 1, ret, krb5_err(context, IPROPD_RESTART, ret,
"send_diffs: failed to find previous entry"); "send_diffs: failed to find previous entry");
left = krb5_storage_seek(sp, -16, SEEK_CUR); left = krb5_storage_seek(sp, -16, SEEK_CUR);
if (left == (off_t)-1) { if (left == (off_t)-1) {
@@ -836,7 +842,7 @@ send_diffs (kadm5_server_context *server_context, slave *s, int log_fd,
send_are_you_there(context, s); send_are_you_there(context, s);
return 1; return 1;
} }
krb5_store_int32 (sp, FOR_YOU); krb5_store_uint32 (sp, FOR_YOU);
krb5_storage_free(sp); krb5_storage_free(sp);
ret = krb5_write_priv_message(context, s->ac, &s->fd, &data); ret = krb5_write_priv_message(context, s->ac, &s->fd, &data);
@@ -851,7 +857,7 @@ send_diffs (kadm5_server_context *server_context, slave *s, int log_fd,
s->version = current_version; s->version = current_version;
krb5_warnx(context, "slave is now up to date"); krb5_warnx(context, "slave %s is now up to date (%u)", s->name, s->version);
return 0; return 0;
} }
@@ -859,13 +865,13 @@ send_diffs (kadm5_server_context *server_context, slave *s, int log_fd,
static int static int
process_msg (kadm5_server_context *server_context, slave *s, int log_fd, process_msg (kadm5_server_context *server_context, slave *s, int log_fd,
const char *database, uint32_t current_version, const char *database, uint32_t current_version,
int32_t current_tstamp) uint32_t current_tstamp)
{ {
krb5_context context = server_context->context; krb5_context context = server_context->context;
int ret = 0; int ret = 0;
krb5_data out; krb5_data out;
krb5_storage *sp; krb5_storage *sp;
int32_t tmp; uint32_t tmp;
ret = krb5_read_priv_message(context, s->ac, &s->fd, &out); ret = krb5_read_priv_message(context, s->ac, &s->fd, &out);
if(ret) { if(ret) {
@@ -879,37 +885,41 @@ process_msg (kadm5_server_context *server_context, slave *s, int log_fd,
krb5_data_free(&out); krb5_data_free(&out);
return 1; return 1;
} }
if (krb5_ret_int32(sp, &tmp) != 0) { if (krb5_ret_uint32(sp, &tmp) != 0) {
krb5_warnx(context, "process_msg: client send too short command"); krb5_warnx(context, "process_msg: client send too short command");
krb5_data_free(&out); krb5_data_free(&out);
return 1; return 1;
} }
switch (tmp) { switch (tmp) {
case I_HAVE : case I_HAVE :
ret = krb5_ret_int32(sp, &tmp); ret = krb5_ret_uint32(sp, &tmp);
if (ret != 0) { if (ret != 0) {
krb5_warnx(context, "process_msg: client send too I_HAVE data"); krb5_warnx(context, "process_msg: client send too little I_HAVE data");
break; break;
} }
/* new started slave that have old log */ /* new started slave that have old log */
if (s->version == 0 && tmp != 0) { if (s->version == 0 && tmp != 0) {
if (current_version < (uint32_t)tmp) { if (current_version < tmp) {
krb5_warnx(context, "Slave %s (version %lu) have later version " krb5_warnx(context, "Slave %s (version %u) have later version "
"the master (version %lu) OUT OF SYNC", "the master (version %u) OUT OF SYNC",
s->name, (unsigned long)tmp, s->name, tmp, current_version);
(unsigned long)current_version);
} }
if (verbose)
krb5_warnx(context, "slave %s updated from %u to %u",
s->name, s->version, tmp);
s->version = tmp; s->version = tmp;
} }
if ((uint32_t)tmp < s->version) { if (tmp < s->version) {
krb5_warnx(context, "Slave claims to not have " krb5_warnx(context, "Slave %s claims to not have "
"version we already sent to it"); "version we already sent to it", s->name);
s->version = tmp; s->version = tmp;
} }
ret = send_diffs(server_context, s, log_fd, database, current_version, ret = send_diffs(server_context, s, log_fd, database, current_version,
current_tstamp); current_tstamp);
break; break;
case I_AM_HERE : case I_AM_HERE :
if (verbose)
krb5_warnx(context, "slave %s is there", s->name);
break; break;
case ARE_YOU_THERE: case ARE_YOU_THERE:
case FOR_YOU : case FOR_YOU :
@@ -1073,6 +1083,7 @@ static struct getargs args[] = {
"private argument, do not use", NULL }, "private argument, do not use", NULL },
{ "hostname", 0, arg_string, rk_UNCONST(&master_hostname), { "hostname", 0, arg_string, rk_UNCONST(&master_hostname),
"hostname of master (if not same as hostname)", "hostname" }, "hostname of master (if not same as hostname)", "hostname" },
{ "verbose", 0, arg_flag, &verbose, NULL, NULL },
{ "version", 0, arg_flag, &version_flag, NULL, NULL }, { "version", 0, arg_flag, &version_flag, NULL, NULL },
{ "help", 0, arg_flag, &help_flag, NULL, NULL } { "help", 0, arg_flag, &help_flag, NULL, NULL }
}; };
@@ -1090,11 +1101,13 @@ main(int argc, char **argv)
int log_fd; int log_fd;
slave *slaves = NULL; slave *slaves = NULL;
uint32_t current_version = 0, old_version = 0; uint32_t current_version = 0, old_version = 0;
int32_t current_tstamp = 0; uint32_t current_tstamp = 0;
krb5_keytab keytab; krb5_keytab keytab;
char **files; char **files;
int aret; int aret;
int optidx = 0; int optidx = 0;
int restarter_fd = -1;
struct stat st;
setprogname(argv[0]); setprogname(argv[0]);
@@ -1173,8 +1186,9 @@ main(int argc, char **argv)
krb5_err (context, 1, errno, "open %s", krb5_err (context, 1, errno, "open %s",
server_context->log_context.log_file); server_context->log_context.log_file);
signal_fd = make_signal_socket (context); if (fstat(log_fd, &st) == -1)
listen_fd = make_listen_socket (context, port_str); krb5_err(context, 1, errno, "stat %s",
server_context->log_context.log_file);
if (flock(log_fd, LOCK_SH) == -1) if (flock(log_fd, LOCK_SH) == -1)
krb5_err(context, 1, errno, "shared flock %s", krb5_err(context, 1, errno, "shared flock %s",
@@ -1183,10 +1197,14 @@ main(int argc, char **argv)
&current_version, &current_tstamp); &current_version, &current_tstamp);
flock(log_fd, LOCK_UN); flock(log_fd, LOCK_UN);
signal_fd = make_signal_socket (context);
listen_fd = make_listen_socket (context, port_str);
krb5_warnx(context, "ipropd-master started at version: %lu", krb5_warnx(context, "ipropd-master started at version: %lu",
(unsigned long)current_version); (unsigned long)current_version);
roken_detach_finish(NULL, daemon_child); roken_detach_finish(NULL, daemon_child);
restarter_fd = restarter(context, NULL);
while (exit_flag == 0){ while (exit_flag == 0){
slave *p; slave *p;
@@ -1194,10 +1212,12 @@ main(int argc, char **argv)
int max_fd = 0; int max_fd = 0;
struct timeval to = {30, 0}; struct timeval to = {30, 0};
uint32_t vers; uint32_t vers;
struct stat st2;;
#ifndef NO_LIMIT_FD_SETSIZE #ifndef NO_LIMIT_FD_SETSIZE
if (signal_fd >= FD_SETSIZE || listen_fd >= FD_SETSIZE) if (signal_fd >= FD_SETSIZE || listen_fd >= FD_SETSIZE ||
krb5_errx (context, 1, "fd too large"); restarter_fd >= FD_SETSIZE)
krb5_errx (context, IPROPD_RESTART, "fd too large");
#endif #endif
FD_ZERO(&readset); FD_ZERO(&readset);
@@ -1205,6 +1225,10 @@ main(int argc, char **argv)
max_fd = max(max_fd, signal_fd); max_fd = max(max_fd, signal_fd);
FD_SET(listen_fd, &readset); FD_SET(listen_fd, &readset);
max_fd = max(max_fd, listen_fd); max_fd = max(max_fd, listen_fd);
if (restarter_fd > -1) {
FD_SET(restarter_fd, &readset);
max_fd = max(max_fd, restarter_fd);
}
for (p = slaves; p != NULL; p = p->next) { for (p = slaves; p != NULL; p = p->next) {
if (p->flags & SLAVE_F_DEAD) if (p->flags & SLAVE_F_DEAD)
@@ -1219,7 +1243,7 @@ main(int argc, char **argv)
if (errno == EINTR) if (errno == EINTR)
continue; continue;
else else
krb5_err (context, 1, errno, "select"); krb5_err (context, IPROPD_RESTART, errno, "select");
} }
if (stat(server_context->log_context.log_file, &st2) == -1) { if (stat(server_context->log_context.log_file, &st2) == -1) {
@@ -1232,15 +1256,15 @@ main(int argc, char **argv)
log_fd = open(server_context->log_context.log_file, O_RDONLY, 0); log_fd = open(server_context->log_context.log_file, O_RDONLY, 0);
if (log_fd < 0) if (log_fd < 0)
krb5_err(context, 1, 1, "open %s", krb5_err(context, 1, IPROPD_RESTART_SLOW, "open %s",
server_context->log_context.log_file); server_context->log_context.log_file);
if (fstat(log_fd, &st) == -1) if (fstat(log_fd, &st) == -1)
krb5_err(context, 1, errno, "stat %s", krb5_err(context, IPROPD_RESTART_SLOW, errno, "stat %s",
server_context->log_context.log_file); server_context->log_context.log_file);
if (flock(log_fd, LOCK_SH) == -1) if (flock(log_fd, LOCK_SH) == -1)
krb5_err(context, 1, errno, "shared flock %s", krb5_err(context, IPROPD_RESTART, errno, "shared flock %s",
server_context->log_context.log_file); server_context->log_context.log_file);
kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_LAST, kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_LAST,
&current_version, &current_tstamp); &current_version, &current_tstamp);
@@ -1252,10 +1276,9 @@ main(int argc, char **argv)
if (kadm5_log_init_nb(server_context) == 0) if (kadm5_log_init_nb(server_context) == 0)
kadm5_log_end(server_context); kadm5_log_end(server_context);
if (flock(log_fd, LOCK_SH) == -1) { if (flock(log_fd, LOCK_SH) == -1)
krb5_err(context, 1, errno, krb5_err(context, IPROPD_RESTART, errno,
"could not lock log file"); "could not lock log file");
}
kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_LAST, kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_LAST,
&current_version, &current_tstamp); &current_version, &current_tstamp);
flock(log_fd, LOCK_UN); flock(log_fd, LOCK_UN);
@@ -1274,6 +1297,11 @@ main(int argc, char **argv)
} }
} }
if (ret && FD_ISSET(restarter_fd, &readset)) {
exit_flag = SIGTERM;
break;
}
if (ret && FD_ISSET(signal_fd, &readset)) { if (ret && FD_ISSET(signal_fd, &readset)) {
#ifndef NO_UNIX_SOCKETS #ifndef NO_UNIX_SOCKETS
struct sockaddr_un peer_addr; struct sockaddr_un peer_addr;
@@ -1291,7 +1319,7 @@ main(int argc, char **argv)
assert(ret >= 0); assert(ret >= 0);
old_version = current_version; old_version = current_version;
if (flock(log_fd, LOCK_SH) == -1) if (flock(log_fd, LOCK_SH) == -1)
krb5_err(context, 1, errno, "shared flock %s", krb5_err(context, IPROPD_RESTART, errno, "shared flock %s",
server_context->log_context.log_file); server_context->log_context.log_file);
kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_LAST, kadm5_log_get_version_fd(server_context, log_fd, LOG_VERSION_LAST,
&current_version, &current_tstamp); &current_version, &current_tstamp);

View File

@@ -37,6 +37,8 @@ RCSID("$Id$");
static const char *config_name = "ipropd-slave"; static const char *config_name = "ipropd-slave";
static int verbose;
static krb5_log_facility *log_facility; static krb5_log_facility *log_facility;
static char five_min[] = "5 min"; static char five_min[] = "5 min";
static char *server_time_lost = five_min; static char *server_time_lost = five_min;
@@ -164,18 +166,123 @@ ihave(krb5_context context, krb5_auth_context auth_context,
krb5_data data; krb5_data data;
sp = krb5_storage_from_mem(buf, 8); sp = krb5_storage_from_mem(buf, 8);
krb5_store_int32(sp, I_HAVE); krb5_store_uint32(sp, I_HAVE);
krb5_store_int32(sp, version); krb5_store_uint32(sp, version);
krb5_storage_free(sp); krb5_storage_free(sp);
data.length = 8; data.length = 8;
data.data = buf; data.data = buf;
if (verbose)
krb5_warnx(context, "telling master we are at %u", version);
ret = krb5_write_priv_message(context, auth_context, &fd, &data); ret = krb5_write_priv_message(context, auth_context, &fd, &data);
if (ret) if (ret)
krb5_warn(context, ret, "krb5_write_message"); krb5_warn(context, ret, "krb5_write_message");
return ret; return ret;
} }
static int
append_to_log_file(krb5_context context,
kadm5_server_context *server_context,
krb5_storage *sp, off_t start, ssize_t slen)
{
size_t len;
ssize_t sret;
off_t log_off;
int ret, ret2;
void *buf;
if (verbose)
krb5_warnx(context, "appending diffs to log");
if (slen == 0)
return 0;
if (slen < 0)
return EINVAL;
len = slen;
if (len != slen)
return EOVERFLOW;
buf = malloc(len);
if (buf == NULL && len != 0) {
krb5_warn(context, errno, "malloc: no memory");
return ENOMEM;
}
if (krb5_storage_seek(sp, start, SEEK_SET) != start) {
krb5_errx(context, IPROPD_RESTART,
"krb5_storage_seek() failed"); /* can't happen */
}
sret = krb5_storage_read(sp, buf, len);
if (sret < 0)
return errno;
if (len != (size_t)sret) {
/* Can't happen */
krb5_errx(context, IPROPD_RESTART,
"short krb5_storage_read() from memory buffer");
}
log_off = lseek(server_context->log_context.log_fd, 0, SEEK_CUR);
/*
* Use net_write() so we get an errno if less that len bytes were
* written.
*/
sret = net_write(server_context->log_context.log_fd, buf, len);
free(buf);
if (sret != slen)
ret = errno;
else
ret = fsync(server_context->log_context.log_fd);
if (ret == 0)
return 0;
/*
* Attempt to recover from this. First, truncate the log file
* and reset the fd offset. Failure to do this -> unlink the
* log file and re-create it. Since we're the slave, we ought to be
* able to recover from the log being unlinked...
*/
if (ftruncate(server_context->log_context.log_fd, log_off) == -1 ||
lseek(server_context->log_context.log_fd, log_off, SEEK_SET) == -1) {
(void) kadm5_log_end(server_context);
if (unlink(server_context->log_context.log_file) == -1) {
krb5_err(context, IPROPD_FATAL, errno,
"Failed to recover from failure to write log "
"entries from master to disk");
}
ret2 = kadm5_log_init(server_context);
if (ret2) {
krb5_err(context, IPROPD_RESTART_SLOW, ret2,
"Failed to initialize log to recover from "
"failure to write log entries from master to disk");
}
}
if (ret == ENOSPC || ret == EDQUOT || ret == EFBIG) {
/* Unlink the file in these cases. */
krb5_warn(context, IPROPD_RESTART_SLOW,
"Failed to write log entries from master to disk");
(void) kadm5_log_end(server_context);
if (unlink(server_context->log_context.log_file) == -1) {
krb5_err(context, IPROPD_FATAL, errno,
"Failed to recover from failure to write log "
"entries from master to disk");
}
ret2 = kadm5_log_init(server_context);
if (ret2) {
krb5_err(context, IPROPD_RESTART_SLOW, ret2,
"Failed to initialize log to recover from "
"failure to write log entries from master to disk");
}
return ret;
}
/*
* All other errors we treat as fatal here. This includes, for
* example, EIO and EPIPE (sorry, can't log to pipes nor sockets).
*/
krb5_err(context, IPROPD_FATAL, ret,
"Failed to write log entries from master to disk");
}
static int static int
receive_loop (krb5_context context, receive_loop (krb5_context context,
krb5_storage *sp, krb5_storage *sp,
@@ -183,22 +290,31 @@ receive_loop (krb5_context context,
{ {
int ret; int ret;
off_t left, right, off; off_t left, right, off;
size_t mlen; uint32_t len, vers;
void *buf;
int32_t len, vers, vers2; if (verbose)
ssize_t sret, smlen; krb5_warnx(context, "receiving diffs");
/* /*
* Seek to the first entry in the message from the master that is * Seek to the first entry in the message from the master that is
* past the current version of the local database. * past the current version of the local database.
*/ */
do { do {
int32_t timestamp, tmp; uint32_t timestamp;
uint32_t op;
if (krb5_ret_int32(sp, &vers) != 0 || /*
krb5_ret_int32(sp, &timestamp) != 0 || * TODO We could do more to validate the entries from the master
krb5_ret_int32(sp, &tmp) != 0 || * here. And we could use/reuse more kadm5_log_*() code here.
krb5_ret_int32(sp, &len) != 0) { *
* Alternatively we should trust that the master sent us exactly
* what we needed and just write this to the log file and let
* kadm5_log_recover() do the rest.
*/
if (krb5_ret_uint32(sp, &vers) != 0 ||
krb5_ret_uint32(sp, &timestamp) != 0 ||
krb5_ret_uint32(sp, &op) != 0 ||
krb5_ret_uint32(sp, &len) != 0) {
/* /*
* This shouldn't happen. Reconnecting probably won't help * This shouldn't happen. Reconnecting probably won't help
@@ -208,19 +324,20 @@ receive_loop (krb5_context context,
krb5_warnx(context, "iprop entries from master were truncated"); krb5_warnx(context, "iprop entries from master were truncated");
return EINVAL; return EINVAL;
} }
if (len < 0) { if (vers > server_context->log_context.version) {
krb5_warnx(context, "master sent entry with negative length for"
"version %ld", (long)vers);
return EINVAL;
}
if ((uint32_t)vers > server_context->log_context.version)
break; break;
}
off = krb5_storage_seek(sp, 0, SEEK_CUR); off = krb5_storage_seek(sp, 0, SEEK_CUR);
if (krb5_storage_seek(sp, len + 8, SEEK_CUR) != off + len + 8) { if (krb5_storage_seek(sp, len + 8, SEEK_CUR) != off + len + 8) {
krb5_warnx(context, "iprop entries from master were truncated"); krb5_warnx(context, "iprop entries from master were truncated");
return 0; return 0;
} }
} while((uint32_t)vers <= server_context->log_context.version); if (verbose) {
krb5_warnx(context, "diff contains old log record version "
"%u %lld %u length %u",
vers, (long long)timestamp, op, len);
}
} while(vers <= server_context->log_context.version);
/* /*
* Read the remaining entries into memory... * Read the remaining entries into memory...
@@ -233,161 +350,38 @@ receive_loop (krb5_context context,
return EINVAL; return EINVAL;
} }
mlen = (size_t)(right - left);
smlen = right - left;
buf = malloc (mlen);
if (buf == NULL && mlen != 0) {
krb5_warn(context, errno, "malloc: no memory");
return ENOMEM;
}
/* /*
* ...and then write them out to the on-disk log. * ...and then write them out to the on-disk log.
*/ */
/* NOTE: We haven't validated the entries yet */
if (krb5_storage_seek(sp, left, SEEK_SET) != left) ret = append_to_log_file(context, server_context, sp, left, right - left);
krb5_errx(context, 1, "krb5_storage_seek() failed"); if (ret)
sret = krb5_storage_read(sp, buf, mlen); return ret;
if (sret < 0)
return errno;
if (mlen != (size_t)sret)
krb5_errx(context, 1, "short krb5_storage_read() from memory buffer");
sret = write(server_context->log_context.log_fd, buf, mlen);
if (sret != smlen) {
/* This is probably ENOSPC. We can't recover. */
krb5_err(context, 1, errno, "Failed to write log to disk");
}
ret = fsync(server_context->log_context.log_fd);
if (ret) {
/* This is also probably ENOSPC. We can't recover. */
krb5_err(context, 1, errno, "Failed to sync log to disk");
}
free(buf);
/* /*
* Go back to the startpoint and commit the entries to the HDB. * Replay the new entries.
*/ */
krb5_storage_seek(sp, left, SEEK_SET); if (verbose)
krb5_warnx(context, "replaying entries from master");
ret = kadm5_log_recover(server_context, kadm_recover_replay); ret = kadm5_log_recover(server_context, kadm_recover_replay);
if (ret) { if (ret) {
krb5_warn(context, ret, "replay of entries from master failed"); krb5_warn(context, ret, "replay failed");
return ret; return ret;
} }
for (;;) { ret = kadm5_log_get_version(server_context, &vers);
int32_t len2, timestamp, tmp; if (ret) {
off_t cur, cur2; krb5_warn(context, ret,
enum kadm_ops op; "could not get log version after applying diffs!");
return ret;
}
if (verbose)
krb5_warnx(context, "slave at version %u", vers);
if (krb5_ret_int32(sp, &vers) != 0) if (vers != server_context->log_context.version) {
break; krb5_warnx(context, "slave's log_context version (%u) is "
ret = krb5_ret_int32(sp, &timestamp); "inconsistent with log's version (%u)",
if (ret) { server_context->log_context.version, vers);
krb5_warnx(context, "entry %ld: too short", (long)vers);
return EINVAL;
}
ret = krb5_ret_int32(sp, &tmp);
if (ret) {
krb5_warnx(context, "entry %ld: too short", (long)vers);
return EINVAL;
}
op = tmp;
ret = krb5_ret_int32(sp, &len);
if (ret) {
krb5_warnx(context, "entry %ld: too short", (long)vers);
return EINVAL;
}
if (len < 0) {
krb5_warnx(context, "entry %ld: negative length (%ld); "
"master is confused", (long)vers, (long)len);
return EINVAL;
}
cur = krb5_storage_seek(sp, 0, SEEK_CUR);
krb5_warnx(context, "replaying entry %d", (int)vers);
/*
* kadm5_log_replay() returns errors from among others, the HDB
* layer, which can return errors from the actual DBs, some of
* which return -1 and set errno, and some of which return
* system error codes.
*/
ret = kadm5_log_replay(server_context,
op, vers, len, sp);
if (ret == -1 && errno != 0)
ret = errno;
if (ret) {
const char *s = krb5_get_error_message(server_context->context, ret);
/*
* XXX We don't really know here whether the error is
* recoverable or not. Some HDB errors might be safe to
* ignore, and others will not be (e.g., any resulting from
* ENOSPC), but we can't tell which is which, particularly
* as errors from the databases are not mapped to HDB_ERR_*.
*
* We do our best to die if the error is not recoverable.
*/
switch (ret) {
#ifdef EDQUOT
case EDQUOT:
#endif
case ENOSPC:
case EPIPE:
case EINTR:
case EFBIG:
case EIO:
krb5_err(context, 1, ret, "kadm5_log_replay: %ld. Fatal write "
"error: %s (%d)", (long)vers,
s ? s : "unknown error", ret);
}
krb5_warnx(context,
"kadm5_log_replay: %ld. Replay failed. "
"Database out of sync?: %s (%d)",
(long)vers, s ? s : "unknown error", ret);
krb5_free_error_message(context, s);
}
{
/*
* Make sure that kadm5_log_replay() read the whole entry
* from sp and left the sp offset at the start of the
* trailer.
*/
cur2 = krb5_storage_seek(sp, 0, SEEK_CUR);
if (cur + len != cur2)
krb5_errx(context, 1,
"kadm5_log_reply version: %ld didn't read the whole entry",
(long)vers);
}
if (krb5_ret_int32(sp, &len2) != 0) {
krb5_warnx(context, "entry %ld: postamble too short; "
"master is confused", (long)vers);
return EINVAL;
}
if(krb5_ret_int32(sp, &vers2) != 0) {
krb5_warnx(context, "entry %ld: postamble too short; "
"master is confused", (long)vers);
return EINVAL;
}
if (len != len2) {
krb5_warnx(context, "entry %ld: len != len2; master is "
"confused", (long)vers);
return EINVAL;
}
if (vers != vers2) {
krb5_warnx(context, "entry %ld: vers != vers2; master is "
"confused", (long)vers);
return EINVAL;
}
/*
* Update version after each replay.
*/
server_context->log_context.version = vers;
kadm5_log_update_uber(server_context);
} }
return 0; return 0;
@@ -404,7 +398,7 @@ receive(krb5_context context,
server_context->db, server_context->db,
O_RDWR | O_CREAT, 0600); O_RDWR | O_CREAT, 0600);
if (ret) if (ret)
krb5_err(context, 1, ret, "db->open"); krb5_err(context, IPROPD_RESTART_SLOW, ret, "db->open");
ret2 = receive_loop(context, sp, server_context); ret2 = receive_loop(context, sp, server_context);
if (ret2) if (ret2)
@@ -412,7 +406,7 @@ receive(krb5_context context,
ret = server_context->db->hdb_close(context, server_context->db); ret = server_context->db->hdb_close(context, server_context->db);
if (ret) if (ret)
krb5_err(context, 1, ret, "db->close"); krb5_err(context, IPROPD_RESTART_SLOW, ret, "db->close");
return ret2; return ret2;
} }
@@ -427,19 +421,22 @@ send_im_here(krb5_context context, int fd,
ret = krb5_data_alloc(&data, 4); ret = krb5_data_alloc(&data, 4);
if (ret) if (ret)
krb5_err(context, 1, ret, "send_im_here"); krb5_err(context, IPROPD_RESTART, ret, "send_im_here");
sp = krb5_storage_from_data (&data); sp = krb5_storage_from_data (&data);
if (sp == NULL) if (sp == NULL)
krb5_errx(context, 1, "krb5_storage_from_data"); krb5_errx(context, IPROPD_RESTART, "krb5_storage_from_data");
krb5_store_int32(sp, I_AM_HERE); krb5_store_uint32(sp, I_AM_HERE);
krb5_storage_free(sp); krb5_storage_free(sp);
ret = krb5_write_priv_message(context, auth_context, &fd, &data); ret = krb5_write_priv_message(context, auth_context, &fd, &data);
krb5_data_free(&data); krb5_data_free(&data);
if (ret) if (ret)
krb5_err(context, 1, ret, "krb5_write_priv_message"); krb5_err(context, IPROPD_RESTART, ret, "krb5_write_priv_message");
if (verbose)
krb5_warnx(context, "pinged master");
return; return;
} }
@@ -447,13 +444,16 @@ send_im_here(krb5_context context, int fd,
static void static void
reinit_log(krb5_context context, reinit_log(krb5_context context,
kadm5_server_context *server_context, kadm5_server_context *server_context,
int32_t vno) uint32_t vno)
{ {
krb5_error_code ret; krb5_error_code ret;
if (verbose)
krb5_warnx(context, "truncating log on slave");
ret = kadm5_log_reinit(server_context); ret = kadm5_log_reinit(server_context);
if (ret) if (ret)
krb5_err(context, 1, ret, "kadm5_log_reinit"); krb5_err(context, IPROPD_RESTART_SLOW, ret, "kadm5_log_reinit");
} }
@@ -464,8 +464,8 @@ receive_everything(krb5_context context, int fd,
{ {
int ret; int ret;
krb5_data data; krb5_data data;
int32_t vno = 0; uint32_t vno = 0;
int32_t opcode; uint32_t opcode;
krb5_storage *sp; krb5_storage *sp;
char *dbname; char *dbname;
@@ -475,22 +475,22 @@ receive_everything(krb5_context context, int fd,
ret = asprintf(&dbname, "%s-NEW", server_context->db->hdb_name); ret = asprintf(&dbname, "%s-NEW", server_context->db->hdb_name);
if (ret == -1) if (ret == -1)
krb5_err(context, 1, ENOMEM, "asprintf"); krb5_err(context, IPROPD_RESTART, ENOMEM, "asprintf");
ret = hdb_create(context, &mydb, dbname); ret = hdb_create(context, &mydb, dbname);
if(ret) if(ret)
krb5_err(context,1, ret, "hdb_create"); krb5_err(context, IPROPD_RESTART, ret, "hdb_create");
free(dbname); free(dbname);
ret = hdb_set_master_keyfile(context, ret = hdb_set_master_keyfile(context,
mydb, server_context->config.stash_file); mydb, server_context->config.stash_file);
if(ret) if(ret)
krb5_err(context,1, ret, "hdb_set_master_keyfile"); krb5_err(context, IPROPD_RESTART, ret, "hdb_set_master_keyfile");
/* I really want to use O_EXCL here, but given that I can't easily clean /* I really want to use O_EXCL here, but given that I can't easily clean
up on error, I won't */ up on error, I won't */
ret = mydb->hdb_open(context, mydb, O_RDWR | O_CREAT | O_TRUNC, 0600); ret = mydb->hdb_open(context, mydb, O_RDWR | O_CREAT | O_TRUNC, 0600);
if (ret) if (ret)
krb5_err(context, 1, ret, "db->open"); krb5_err(context, IPROPD_RESTART, ret, "db->open");
sp = NULL; sp = NULL;
krb5_data_zero(&data); krb5_data_zero(&data);
@@ -504,8 +504,8 @@ receive_everything(krb5_context context, int fd,
sp = krb5_storage_from_data(&data); sp = krb5_storage_from_data(&data);
if (sp == NULL) if (sp == NULL)
krb5_errx(context, 1, "krb5_storage_from_data"); krb5_errx(context, IPROPD_RESTART, "krb5_storage_from_data");
krb5_ret_int32(sp, &opcode); krb5_ret_uint32(sp, &opcode);
if (opcode == ONE_PRINC) { if (opcode == ONE_PRINC) {
krb5_data fake_data; krb5_data fake_data;
hdb_entry_ex entry; hdb_entry_ex entry;
@@ -519,12 +519,12 @@ receive_everything(krb5_context context, int fd,
ret = hdb_value2entry(context, &fake_data, &entry.entry); ret = hdb_value2entry(context, &fake_data, &entry.entry);
if (ret) if (ret)
krb5_err(context, 1, ret, "hdb_value2entry"); krb5_err(context, IPROPD_RESTART, ret, "hdb_value2entry");
ret = mydb->hdb_store(server_context->context, ret = mydb->hdb_store(server_context->context,
mydb, mydb,
0, &entry); 0, &entry);
if (ret) if (ret)
krb5_err(context, 1, ret, "hdb_store"); krb5_err(context, IPROPD_RESTART_SLOW, ret, "hdb_store");
hdb_free_entry(context, &entry); hdb_free_entry(context, &entry);
krb5_data_free(&data); krb5_data_free(&data);
@@ -535,20 +535,21 @@ receive_everything(krb5_context context, int fd,
} while (opcode == ONE_PRINC); } while (opcode == ONE_PRINC);
if (opcode != NOW_YOU_HAVE) if (opcode != NOW_YOU_HAVE)
krb5_errx(context, 1, "receive_everything: strange %d", opcode); krb5_errx(context, IPROPD_RESTART_SLOW,
"receive_everything: strange %d", opcode);
krb5_ret_int32(sp, &vno); krb5_ret_uint32(sp, &vno);
krb5_storage_free(sp); krb5_storage_free(sp);
reinit_log(context, server_context, vno); reinit_log(context, server_context, vno);
ret = mydb->hdb_close(context, mydb); ret = mydb->hdb_close(context, mydb);
if (ret) if (ret)
krb5_err(context, 1, ret, "db->close"); krb5_err(context, IPROPD_RESTART_SLOW, ret, "db->close");
ret = mydb->hdb_rename(context, mydb, server_context->db->hdb_name); ret = mydb->hdb_rename(context, mydb, server_context->db->hdb_name);
if (ret) if (ret)
krb5_err(context, 1, ret, "db->rename"); krb5_err(context, IPROPD_RESTART_SLOW, ret, "db->rename");
server_context->log_context.version = vno; server_context->log_context.version = vno;
@@ -558,11 +559,11 @@ receive_everything(krb5_context context, int fd,
krb5_data_free(&data); krb5_data_free(&data);
if (ret) if (ret)
krb5_err(context, 1, ret, "db->close"); krb5_err(context, IPROPD_RESTART_SLOW, ret, "db->close");
ret = mydb->hdb_destroy(context, mydb); ret = mydb->hdb_destroy(context, mydb);
if (ret) if (ret)
krb5_err(context, 1, ret, "db->destroy"); krb5_err(context, IPROPD_RESTART, ret, "db->destroy");
krb5_warnx(context, "receive complete database, version %ld", (long)vno); krb5_warnx(context, "receive complete database, version %ld", (long)vno);
return ret; return ret;
@@ -639,6 +640,7 @@ static struct getargs args[] = {
"private argument, do not use", NULL }, "private argument, do not use", NULL },
{ "hostname", 0, arg_string, rk_UNCONST(&slave_str), { "hostname", 0, arg_string, rk_UNCONST(&slave_str),
"hostname of slave (if not same as hostname)", "hostname" }, "hostname of slave (if not same as hostname)", "hostname" },
{ "verbose", 0, arg_flag, &verbose, NULL, NULL },
{ "version", 0, arg_flag, &version_flag, NULL, NULL }, { "version", 0, arg_flag, &version_flag, NULL, NULL },
{ "help", 0, arg_flag, &help_flag, NULL, NULL } { "help", 0, arg_flag, &help_flag, NULL, NULL }
}; };
@@ -671,6 +673,7 @@ main(int argc, char **argv)
time_t reconnect_max; time_t reconnect_max;
time_t reconnect; time_t reconnect;
time_t before = 0; time_t before = 0;
int restarter_fd = -1;
const char *master; const char *master;
@@ -783,11 +786,23 @@ main(int argc, char **argv)
slave_status(context, status_file, "ipropd-slave started"); slave_status(context, status_file, "ipropd-slave started");
roken_detach_finish(NULL, daemon_child); roken_detach_finish(NULL, daemon_child);
restarter_fd = restarter(context, NULL);
while (!exit_flag) { while (!exit_flag) {
struct timeval to;
time_t now, elapsed; time_t now, elapsed;
fd_set readset;
int connected = FALSE; int connected = FALSE;
#ifndef NO_LIMIT_FD_SETSIZE
if (restarter_fd >= FD_SETSIZE)
krb5_errx(context, IPROPD_RESTART, "fd too large");
#endif
FD_ZERO(&readset);
if (restarter_fd > -1)
FD_SET(restarter_fd, &readset);
now = time(NULL); now = time(NULL);
elapsed = now - before; elapsed = now - before;
@@ -795,7 +810,12 @@ main(int argc, char **argv)
time_t left = reconnect - elapsed; time_t left = reconnect - elapsed;
krb5_warnx(context, "sleeping %d seconds before " krb5_warnx(context, "sleeping %d seconds before "
"retrying to connect", (int)left); "retrying to connect", (int)left);
sleep(left); to.tv_sec = left;
to.tv_usec = 0;
if (select(restarter_fd + 1, &readset, NULL, NULL, &to) == 1) {
exit_flag = SIGTERM;
continue;
}
} }
before = now; before = now;
@@ -813,6 +833,8 @@ main(int argc, char **argv)
krb5_cc_destroy(context, ccache); krb5_cc_destroy(context, ccache);
get_creds(context, keytab_str, &ccache, master); get_creds(context, keytab_str, &ccache, master);
} }
if (verbose)
krb5_warnx(context, "authenticating to master");
ret = krb5_sendauth (context, &auth_context, &master_fd, ret = krb5_sendauth (context, &auth_context, &master_fd,
IPROP_VERSION, NULL, server, IPROP_VERSION, NULL, server,
AP_OPTS_MUTUAL_REQUIRED, NULL, NULL, AP_OPTS_MUTUAL_REQUIRED, NULL, NULL,
@@ -832,27 +854,34 @@ main(int argc, char **argv)
connected = TRUE; connected = TRUE;
if (verbose)
krb5_warnx(context, "connected to master");
slave_status(context, status_file, "connected to master, waiting instructions"); slave_status(context, status_file, "connected to master, waiting instructions");
while (connected && !exit_flag) { while (connected && !exit_flag) {
krb5_data out; krb5_data out;
krb5_storage *sp; krb5_storage *sp;
int32_t tmp; uint32_t tmp;
fd_set readset; int max_fd;
struct timeval to;
#ifndef NO_LIMIT_FD_SETSIZE #ifndef NO_LIMIT_FD_SETSIZE
if (master_fd >= FD_SETSIZE) if (master_fd >= FD_SETSIZE)
krb5_errx (context, 1, "fd too large"); krb5_errx(context, IPROPD_RESTART, "fd too large");
if (restarter_fd >= FD_SETSIZE)
krb5_errx(context, IPROPD_RESTART, "fd too large");
max_fd = max(restarter_fd, master_fd);
#endif #endif
FD_ZERO(&readset); FD_ZERO(&readset);
FD_SET(master_fd, &readset); FD_SET(master_fd, &readset);
if (restarter_fd != -1)
FD_SET(restarter_fd, &readset);
to.tv_sec = time_before_lost; to.tv_sec = time_before_lost;
to.tv_usec = 0; to.tv_usec = 0;
ret = select (master_fd + 1, ret = select (max_fd + 1,
&readset, NULL, NULL, &to); &readset, NULL, NULL, &to);
if (ret < 0) { if (ret < 0) {
if (errno == EINTR) if (errno == EINTR)
@@ -867,6 +896,18 @@ main(int argc, char **argv)
continue; continue;
} }
if (FD_ISSET(restarter_fd, &readset)) {
if (verbose)
krb5_warnx(context, "slave restarter exited");
exit_flag = SIGTERM;
}
if (!FD_ISSET(master_fd, &readset))
continue;
if (verbose)
krb5_warnx(context, "message from master");
ret = krb5_read_priv_message(context, auth_context, &master_fd, &out); ret = krb5_read_priv_message(context, auth_context, &master_fd, &out);
if (ret) { if (ret) {
krb5_warn(context, ret, "krb5_read_priv_message"); krb5_warn(context, ret, "krb5_read_priv_message");
@@ -876,8 +917,8 @@ main(int argc, char **argv)
sp = krb5_storage_from_mem (out.data, out.length); sp = krb5_storage_from_mem (out.data, out.length);
if (sp == NULL) if (sp == NULL)
krb5_err(context, 1, errno, "krb5_storage_from_mem"); krb5_err(context, IPROPD_RESTART, errno, "krb5_storage_from_mem");
ret = krb5_ret_int32(sp, &tmp); ret = krb5_ret_uint32(sp, &tmp);
if (ret == HEIM_ERR_EOF) { if (ret == HEIM_ERR_EOF) {
krb5_warn(context, ret, "master sent zero-length message"); krb5_warn(context, ret, "master sent zero-length message");
connected = FALSE; connected = FALSE;
@@ -891,11 +932,13 @@ main(int argc, char **argv)
ret = kadm5_log_init(server_context); ret = kadm5_log_init(server_context);
if (ret) { if (ret) {
krb5_err(context, 1, ret, "kadm5_log_init while handling a " krb5_err(context, IPROPD_RESTART, ret, "kadm5_log_init while "
"message from the master"); "handling a message from the master");
} }
switch (tmp) { switch (tmp) {
case FOR_YOU : case FOR_YOU :
if (verbose)
krb5_warnx(context, "master sent us diffs");
ret2 = receive(context, sp, server_context); ret2 = receive(context, sp, server_context);
if (ret2) if (ret2)
krb5_warn(context, ret, krb5_warn(context, ret,
@@ -913,6 +956,8 @@ main(int argc, char **argv)
is_up_to_date(context, status_file, server_context); is_up_to_date(context, status_file, server_context);
break; break;
case TELL_YOU_EVERYTHING : case TELL_YOU_EVERYTHING :
if (verbose)
krb5_warnx(context, "master sent us a full dump");
ret = receive_everything(context, master_fd, server_context, ret = receive_everything(context, master_fd, server_context,
auth_context); auth_context);
if (ret == 0) { if (ret == 0) {
@@ -925,6 +970,8 @@ main(int argc, char **argv)
is_up_to_date(context, status_file, server_context); is_up_to_date(context, status_file, server_context);
break; break;
case ARE_YOU_THERE : case ARE_YOU_THERE :
if (verbose)
krb5_warnx(context, "master sent us a ping");
is_up_to_date(context, status_file, server_context); is_up_to_date(context, status_file, server_context);
ret = ihave(context, auth_context, master_fd, ret = ihave(context, auth_context, master_fd,
server_context->log_context.version); server_context->log_context.version);
@@ -934,6 +981,8 @@ main(int argc, char **argv)
send_im_here(context, master_fd, auth_context); send_im_here(context, master_fd, auth_context);
break; break;
case YOU_HAVE_LAST_VERSION: case YOU_HAVE_LAST_VERSION:
if (verbose)
krb5_warnx(context, "master tells us we are up to date");
is_up_to_date(context, status_file, server_context); is_up_to_date(context, status_file, server_context);
break; break;
case NOW_YOU_HAVE : case NOW_YOU_HAVE :

View File

@@ -186,12 +186,11 @@ RCSID("$Id$");
* Preserves sp's offset on failure where possible. * Preserves sp's offset on failure where possible.
*/ */
static kadm5_ret_t static kadm5_ret_t
get_header(krb5_storage *sp, int peek, uint32_t *verp, int32_t *tstampp, get_header(krb5_storage *sp, int peek, uint32_t *verp, uint32_t *tstampp,
enum kadm_ops *opp, uint32_t *lenp) enum kadm_ops *opp, uint32_t *lenp)
{ {
krb5_error_code ret; krb5_error_code ret;
uint32_t op, len; uint32_t tstamp, op, len;
int32_t tstamp;
off_t off, new_off; off_t off, new_off;
if (tstampp == NULL) if (tstampp == NULL)
@@ -212,7 +211,7 @@ get_header(krb5_storage *sp, int peek, uint32_t *verp, int32_t *tstampp,
} }
if (ret) if (ret)
goto log_corrupt; goto log_corrupt;
ret = krb5_ret_int32(sp, tstampp); ret = krb5_ret_uint32(sp, tstampp);
if (ret) if (ret)
goto log_corrupt; goto log_corrupt;
@@ -330,7 +329,7 @@ seek_next(krb5_storage *sp)
krb5_error_code ret; krb5_error_code ret;
uint32_t ver, ver2, len, len2; uint32_t ver, ver2, len, len2;
enum kadm_ops op; enum kadm_ops op;
int32_t tstamp; uint32_t tstamp;
off_t off, off_len, new_off; off_t off, off_len, new_off;
off = krb5_storage_seek(sp, 0, SEEK_CUR); off = krb5_storage_seek(sp, 0, SEEK_CUR);
@@ -457,11 +456,11 @@ static krb5_storage *log_goto_first(kadm5_server_context *, int);
*/ */
kadm5_ret_t kadm5_ret_t
kadm5_log_get_version_fd(kadm5_server_context *server_context, int fd, kadm5_log_get_version_fd(kadm5_server_context *server_context, int fd,
int which, uint32_t *ver, int32_t *tstamp) int which, uint32_t *ver, uint32_t *tstamp)
{ {
kadm5_ret_t ret; kadm5_ret_t ret;
krb5_storage *sp; krb5_storage *sp;
int32_t tmp; uint32_t tmp;
if (fd == -1) if (fd == -1)
return 0; /* /dev/null */ return 0; /* /dev/null */
@@ -1522,7 +1521,7 @@ log_update_uber(kadm5_server_context *context, off_t off)
ret = krb5_store_uint64(mem_sp, off); ret = krb5_store_uint64(mem_sp, off);
if (ret) if (ret)
goto out; goto out;
ret = krb5_store_int32(mem_sp, log_context->last_time); ret = krb5_store_uint32(mem_sp, log_context->last_time);
if (ret) if (ret)
goto out; goto out;
ret = krb5_store_uint32(mem_sp, log_context->version); ret = krb5_store_uint32(mem_sp, log_context->version);
@@ -1809,7 +1808,7 @@ kadm5_log_foreach(kadm5_server_context *context,
for (;;) { for (;;) {
uint32_t ver, ver2, len, len2; uint32_t ver, ver2, len, len2;
int32_t tstamp; uint32_t tstamp;
time_t timestamp; time_t timestamp;
enum kadm_ops op; enum kadm_ops op;
@@ -1977,7 +1976,7 @@ kadm5_log_goto_end(kadm5_server_context *server_context, int fd)
krb5_storage *sp; krb5_storage *sp;
enum kadm_ops op; enum kadm_ops op;
uint32_t ver, len; uint32_t ver, len;
int32_t tstamp; uint32_t tstamp;
uint64_t off; uint64_t off;
if (fd == -1) { if (fd == -1) {
@@ -2075,7 +2074,7 @@ kadm5_log_previous(krb5_context context,
krb5_error_code ret; krb5_error_code ret;
off_t oldoff; off_t oldoff;
uint32_t ver2, len2; uint32_t ver2, len2;
int32_t tstamp; uint32_t tstamp;
oldoff = krb5_storage_seek(sp, 0, SEEK_CUR); oldoff = krb5_storage_seek(sp, 0, SEEK_CUR);
if (oldoff == -1) if (oldoff == -1)