Two-phase HDB commit via iprop log, + GC for log
We used to update the iprop log and HDB in different orders depending on the kadm5 operation, which then led to various race conditions. The iprop log now functions as a two-phase commit (with roll forward) log for HDB changes. The log is auto-truncated, keeping the latest entries that fit in a configurable maximum number of bytes (defaults to 50MB). See the log-max-size parameter description in krb5.conf(5). The iprop log format and the protocol remain backwards-compatible with earlier versions of Heimdal. This is NOT a flag-day; there is NO need to update all the slaves at once with the master, though it is advisable in general. Rolling upgrades and downgrades should work. The sequence of updates is now (with HDB and log open and locked): a) check that the HDB operation will succeed if attempted, b) append to iprop log and fsync() it, c) write to HDB (which should fsync()), d) mark last log record committed (no fsync in this case). Every kadm5 write operation recover transactions not yet confirmed as committed, thus there can be at most one unconfirmed commit on a master KDC. Reads via kadm5_get_principal() also attempt to lock the log, and if successful, recover unconfirmed transactions; readers must have write access and must win any race to lock the iprop log. The ipropd-master daemon also attempts to recover unconfirmed transactions when idle. The log now starts with a nop record whose payload records the offset of the logical end of the log: the end of the last confirmed committed transaction. This is kown as the "uber record". Its purpose is two-fold: act as the confirmation of committed transactions, and provide an O(1) method of finding the end of the log (i.e., without having to traverse the entire log front to back). Two-phase commit makes all kadm5 writes single-operation atomic transactions (though some kadm5 operations, such as renames of principals, and changes to principals' aliases, use multiple low-level HDB write operations, but still all in one transaction). One can still hold a lock on the HDB across many operations (e.g., by using the lock command in a kadmin -l or calling kadm5_lock()) in order to push multiple transactions in sequence, but this sequence will not be atomic if the process or host crashes in the middle. As before, HDB writes which do not go through the kadm5 API are excluded from all of this, but there should be no such writes. Lastly, the iprop-log(1) command is enhanced as follows: - The dump, last-version, truncate, and replay sub-commands now have an option to not lock the log. This is useful for inspecting a running system's log file, especially on slave KDCs. - The dump, last-version, truncate, and replay sub-commands now take an optional iprop log file positional argument, so that they may be used to inspect log files other than the running system's configured/default log file. Extensive code review and some re-writing for clarity by Viktor Dukhovni.
This commit is contained in:
@@ -321,6 +321,22 @@ _hdb_store(krb5_context context, HDB *db, unsigned flags, hdb_entry_ex *entry)
|
||||
if (code)
|
||||
return code;
|
||||
|
||||
if ((flags & HDB_F_PRECHECK) && (flags & HDB_F_REPLACE))
|
||||
return 0;
|
||||
|
||||
if ((flags & HDB_F_PRECHECK)) {
|
||||
code = hdb_principal2key(context, entry->entry.principal, &key);
|
||||
if (code)
|
||||
return code;
|
||||
code = db->hdb__get(context, db, key, &value);
|
||||
krb5_data_free(&key);
|
||||
if (code == 0)
|
||||
krb5_data_free(&value);
|
||||
if (code == HDB_ERR_NOENTRY)
|
||||
return 0;
|
||||
return code ? code : HDB_ERR_EXISTS;
|
||||
}
|
||||
|
||||
if(entry->entry.generation == NULL) {
|
||||
struct timeval t;
|
||||
entry->entry.generation = malloc(sizeof(*entry->entry.generation));
|
||||
@@ -360,13 +376,32 @@ _hdb_store(krb5_context context, HDB *db, unsigned flags, hdb_entry_ex *entry)
|
||||
}
|
||||
|
||||
krb5_error_code
|
||||
_hdb_remove(krb5_context context, HDB *db, krb5_const_principal principal)
|
||||
_hdb_remove(krb5_context context, HDB *db,
|
||||
unsigned flags, krb5_const_principal principal)
|
||||
{
|
||||
krb5_data key;
|
||||
krb5_data key, value;
|
||||
int code;
|
||||
|
||||
hdb_principal2key(context, principal, &key);
|
||||
|
||||
if ((flags & HDB_F_PRECHECK)) {
|
||||
/*
|
||||
* We don't check that we can delete the aliases because we
|
||||
* assume that the DB is consistent. If we did check for alias
|
||||
* consistency we'd also have to provide a way to fsck the DB,
|
||||
* otherwise admins would have no way to recover -- papering
|
||||
* over this here is less work, but we really ought to provide
|
||||
* an HDB fsck.
|
||||
*/
|
||||
code = db->hdb__get(context, db, key, &value);
|
||||
krb5_data_free(&key);
|
||||
if (code == 0) {
|
||||
krb5_data_free(&value);
|
||||
return 0;
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
code = hdb_remove_aliases(context, db, &key);
|
||||
if (code) {
|
||||
krb5_data_free(&key);
|
||||
|
25
lib/hdb/db.c
25
lib/hdb/db.c
@@ -211,6 +211,7 @@ DB__put(krb5_context context, HDB *db, int replace,
|
||||
k.size = key.length;
|
||||
v.data = value.data;
|
||||
v.size = value.length;
|
||||
krb5_clear_error_message(context);
|
||||
code = (*d->put)(d, &k, &v, replace ? 0 : R_NOOVERWRITE);
|
||||
if(code < 0) {
|
||||
code = errno;
|
||||
@@ -219,9 +220,15 @@ DB__put(krb5_context context, HDB *db, int replace,
|
||||
return code;
|
||||
}
|
||||
if(code == 1) {
|
||||
krb5_clear_error_message(context);
|
||||
return HDB_ERR_EXISTS;
|
||||
}
|
||||
code = (*d->sync)(d, 0);
|
||||
if (code == -1) {
|
||||
code = errno;
|
||||
krb5_set_error_message(context, code, "Database %s put sync error: %s",
|
||||
db->hdb_name, strerror(code));
|
||||
return code;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -233,15 +240,23 @@ DB__del(krb5_context context, HDB *db, krb5_data key)
|
||||
krb5_error_code code;
|
||||
k.data = key.data;
|
||||
k.size = key.length;
|
||||
krb5_clear_error_message(context);
|
||||
code = (*d->del)(d, &k, 0);
|
||||
if(code == 1) {
|
||||
if (code == 1)
|
||||
return HDB_ERR_NOENTRY;
|
||||
if (code < 0) {
|
||||
code = errno;
|
||||
krb5_set_error_message(context, code, "Database %s put error: %s",
|
||||
krb5_set_error_message(context, code, "Database %s del error: %s",
|
||||
db->hdb_name, strerror(code));
|
||||
return code;
|
||||
}
|
||||
code = (*d->sync)(d, 0);
|
||||
if (code == -1) {
|
||||
code = errno;
|
||||
krb5_set_error_message(context, code, "Database %s del sync error: %s",
|
||||
db->hdb_name, strerror(code));
|
||||
return code;
|
||||
}
|
||||
if(code < 0)
|
||||
return errno;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -235,8 +235,46 @@ DB__put(krb5_context context, HDB *db, int replace,
|
||||
code = (*d->put)(d, NULL, &k, &v, replace ? 0 : DB_NOOVERWRITE);
|
||||
if(code == DB_KEYEXIST)
|
||||
return HDB_ERR_EXISTS;
|
||||
if(code)
|
||||
return errno;
|
||||
if (code) {
|
||||
/*
|
||||
* Berkeley DB 3 and up have a terrible error reporting
|
||||
* interface...
|
||||
*
|
||||
* DB->err() doesn't output a string.
|
||||
* DB->set_errcall()'s callback function doesn't have a void *
|
||||
* argument that can be used to place the error somewhere.
|
||||
*
|
||||
* The only thing we could do is fopen()/fdopen() a file, set it
|
||||
* with DB->set_errfile(), then call DB->err(), then read the
|
||||
* message from the file, unset it with DB->set_errfile(), close
|
||||
* it and delete it. That's a lot of work... so we don't do it.
|
||||
*/
|
||||
if (code == EACCES || code == ENOSPC || code == EINVAL) {
|
||||
krb5_set_error_message(context, code,
|
||||
"Database %s put error: %s",
|
||||
db->hdb_name, strerror(code));
|
||||
} else {
|
||||
code = HDB_ERR_UK_SERROR;
|
||||
krb5_set_error_message(context, code,
|
||||
"Database %s put error: unknown (%d)",
|
||||
db->hdb_name, code);
|
||||
}
|
||||
return code;
|
||||
}
|
||||
code = (*d->sync)(d, 0);
|
||||
if (code) {
|
||||
if (code == EACCES || code == ENOSPC || code == EINVAL) {
|
||||
krb5_set_error_message(context, code,
|
||||
"Database %s put sync error: %s",
|
||||
db->hdb_name, strerror(code));
|
||||
} else {
|
||||
code = HDB_ERR_UK_SERROR;
|
||||
krb5_set_error_message(context, code,
|
||||
"Database %s put sync error: unknown (%d)",
|
||||
db->hdb_name, code);
|
||||
}
|
||||
return code;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -253,8 +291,33 @@ DB__del(krb5_context context, HDB *db, krb5_data key)
|
||||
code = (*d->del)(d, NULL, &k, 0);
|
||||
if(code == DB_NOTFOUND)
|
||||
return HDB_ERR_NOENTRY;
|
||||
if(code)
|
||||
if (code) {
|
||||
if (code == EACCES || code == ENOSPC || code == EINVAL) {
|
||||
krb5_set_error_message(context, code,
|
||||
"Database %s del error: %s",
|
||||
db->hdb_name, strerror(code));
|
||||
} else {
|
||||
code = HDB_ERR_UK_SERROR;
|
||||
krb5_set_error_message(context, code,
|
||||
"Database %s del error: unknown (%d)",
|
||||
db->hdb_name, code);
|
||||
}
|
||||
return code;
|
||||
}
|
||||
code = (*d->sync)(d, 0);
|
||||
if (code) {
|
||||
if (code == EACCES || code == ENOSPC || code == EINVAL) {
|
||||
krb5_set_error_message(context, code,
|
||||
"Database %s del sync error: %s",
|
||||
db->hdb_name, strerror(code));
|
||||
} else {
|
||||
code = HDB_ERR_UK_SERROR;
|
||||
krb5_set_error_message(context, code,
|
||||
"Database %s del sync error: unknown (%d)",
|
||||
db->hdb_name, code);
|
||||
}
|
||||
return code;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -1735,6 +1735,9 @@ LDAP_store(krb5_context context, HDB * db, unsigned flags,
|
||||
LDAPMessage *msg = NULL, *e = NULL;
|
||||
char *dn = NULL, *name = NULL;
|
||||
|
||||
if ((flags & HDB_F_PRECHECK))
|
||||
return 0; /* we can't guarantee whether we'll be able to perform it */
|
||||
|
||||
ret = LDAP_principal2message(context, db, entry->entry.principal, &msg);
|
||||
if (ret == 0)
|
||||
e = ldap_first_entry(HDB2LDAP(db), msg);
|
||||
@@ -1806,13 +1809,17 @@ LDAP_store(krb5_context context, HDB * db, unsigned flags,
|
||||
}
|
||||
|
||||
static krb5_error_code
|
||||
LDAP_remove(krb5_context context, HDB *db, krb5_const_principal principal)
|
||||
LDAP_remove(krb5_context context, HDB *db,
|
||||
unsigned flags, krb5_const_principal principal)
|
||||
{
|
||||
krb5_error_code ret;
|
||||
LDAPMessage *msg, *e;
|
||||
char *dn = NULL;
|
||||
int rc, limit = LDAP_NO_LIMIT;
|
||||
|
||||
if ((flags & HDB_F_PRECHECK))
|
||||
return 0; /* we can't guarantee whether we'll be able to perform it */
|
||||
|
||||
ret = LDAP_principal2message(context, db, principal, &msg);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
@@ -950,8 +950,24 @@ mdb_store(krb5_context context, HDB *db, unsigned flags, hdb_entry_ex *entry)
|
||||
krb5_data line = { 0, 0 };
|
||||
krb5_data kdb_ent = { 0, 0 };
|
||||
krb5_data key = { 0, 0 };
|
||||
krb5_data value = { 0, 0 };
|
||||
ssize_t sz;
|
||||
|
||||
if ((flags & HDB_F_PRECHECK) && (flags & HDB_F_REPLACE))
|
||||
return 0;
|
||||
|
||||
if ((flags & HDB_F_PRECHECK)) {
|
||||
ret = mdb_principal2key(context, entry->entry.principal, &key);
|
||||
if (ret) return ret;
|
||||
code = db->hdb__get(context, db, key, &value);
|
||||
krb5_data_free(&key);
|
||||
if (code == 0)
|
||||
krb5_data_free(&value);
|
||||
if (code == HDB_ERR_NOENTRY)
|
||||
return 0;
|
||||
return code ? code : HDB_ERR_EXISTS;
|
||||
}
|
||||
|
||||
sp = krb5_storage_emem();
|
||||
if (!sp) return ENOMEM;
|
||||
ret = _hdb_set_master_key_usage(context, db, 0); /* MIT KDB uses KU 0 */
|
||||
@@ -989,11 +1005,22 @@ out:
|
||||
}
|
||||
|
||||
static krb5_error_code
|
||||
mdb_remove(krb5_context context, HDB *db, krb5_const_principal principal)
|
||||
mdb_remove(krb5_context context, HDB *db,
|
||||
unsigned flags, krb5_const_principal principal)
|
||||
{
|
||||
krb5_error_code code;
|
||||
krb5_data key;
|
||||
|
||||
if ((flags & HDB_F_PRECHECK)) {
|
||||
code = db->hdb__get(context, db, key, &value);
|
||||
krb5_data_free(&key);
|
||||
if (code == 0) {
|
||||
krb5_data_free(&value);
|
||||
return 0;
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
mdb_principal2key(context, principal, &key);
|
||||
code = db->hdb__del(context, db, key);
|
||||
krb5_data_free(&key);
|
||||
|
@@ -672,24 +672,24 @@ hdb_sqlite_store(krb5_context context, HDB *db, unsigned flags,
|
||||
goto rollback;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
commit:
|
||||
|
||||
krb5_data_free(&value);
|
||||
|
||||
sqlite3_clear_bindings(get_ids);
|
||||
sqlite3_reset(get_ids);
|
||||
|
||||
if ((flags & HDB_F_PRECHECK)) {
|
||||
(void) hdb_sqlite_exec_stmt(context, hsdb, "ROLLBACK", ret);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = hdb_sqlite_exec_stmt(context, hsdb, "COMMIT", EINVAL);
|
||||
if(ret != SQLITE_OK)
|
||||
krb5_warnx(context, "hdb-sqlite: COMMIT problem: %d: %s",
|
||||
ret, sqlite3_errmsg(hsdb->db));
|
||||
|
||||
return ret;
|
||||
return ret == SQLITE_OK ? 0 : HDB_ERR_UK_SERROR;
|
||||
|
||||
rollback:
|
||||
|
||||
krb5_warnx(context, "hdb-sqlite: store rollback problem: %d: %s",
|
||||
ret, sqlite3_errmsg(hsdb->db));
|
||||
|
||||
@@ -865,27 +865,63 @@ hdb_sqlite_rename(krb5_context context, HDB *db, const char *new_name)
|
||||
*/
|
||||
static krb5_error_code
|
||||
hdb_sqlite_remove(krb5_context context, HDB *db,
|
||||
krb5_const_principal principal)
|
||||
unsigned flags, krb5_const_principal principal)
|
||||
{
|
||||
krb5_error_code ret;
|
||||
hdb_sqlite_db *hsdb = (hdb_sqlite_db*)(db->hdb_db);
|
||||
sqlite3_stmt *get_ids = hsdb->get_ids;
|
||||
sqlite3_stmt *rm = hsdb->remove;
|
||||
|
||||
bind_principal(context, principal, rm, 1);
|
||||
|
||||
ret = hdb_sqlite_exec_stmt(context, hsdb,
|
||||
"BEGIN IMMEDIATE TRANSACTION", EINVAL);
|
||||
if (ret != SQLITE_OK) {
|
||||
(void) hdb_sqlite_exec_stmt(context, hsdb, "ROLLBACK", EINVAL);
|
||||
ret = HDB_ERR_UK_SERROR;
|
||||
krb5_set_error_message(context, ret,
|
||||
"SQLite BEGIN TRANSACTION failed: %s",
|
||||
sqlite3_errmsg(hsdb->db));
|
||||
return ret;
|
||||
}
|
||||
|
||||
if ((flags & HDB_F_PRECHECK)) {
|
||||
ret = bind_principal(context, principal, get_ids, 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = hdb_sqlite_step(context, hsdb->db, get_ids);
|
||||
sqlite3_clear_bindings(get_ids);
|
||||
sqlite3_reset(get_ids);
|
||||
if (ret == SQLITE_DONE) {
|
||||
(void) hdb_sqlite_exec_stmt(context, hsdb, "ROLLBACK", EINVAL);
|
||||
return HDB_ERR_NOENTRY;
|
||||
}
|
||||
}
|
||||
|
||||
ret = hdb_sqlite_step(context, hsdb->db, rm);
|
||||
sqlite3_clear_bindings(rm);
|
||||
sqlite3_reset(rm);
|
||||
if (ret != SQLITE_DONE) {
|
||||
ret = EINVAL;
|
||||
(void) hdb_sqlite_exec_stmt(context, hsdb, "ROLLBACK", ret);
|
||||
ret = HDB_ERR_UK_SERROR;
|
||||
krb5_set_error_message(context, ret,
|
||||
"sqlite remove failed: %d",
|
||||
ret);
|
||||
} else
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
sqlite3_clear_bindings(rm);
|
||||
sqlite3_reset(rm);
|
||||
if ((flags & HDB_F_PRECHECK)) {
|
||||
(void) hdb_sqlite_exec_stmt(context, hsdb, "ROLLBACK", EINVAL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
ret = hdb_sqlite_exec_stmt(context, hsdb, "COMMIT", HDB_ERR_UK_SERROR);
|
||||
if (ret != SQLITE_OK)
|
||||
krb5_warnx(context, "hdb-sqlite: COMMIT problem: %ld: %s",
|
||||
(long)HDB_ERR_UK_SERROR, sqlite3_errmsg(hsdb->db));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -65,6 +65,7 @@ enum hdb_lockop{ HDB_RLOCK, HDB_WLOCK };
|
||||
#define HDB_F_ALL_KVNOS 2048 /* we want all the keys, live or not */
|
||||
#define HDB_F_FOR_AS_REQ 4096 /* fetch is for a AS REQ */
|
||||
#define HDB_F_FOR_TGS_REQ 8192 /* fetch is for a TGS REQ */
|
||||
#define HDB_F_PRECHECK 16384 /* check that the operation would succeed */
|
||||
|
||||
/* hdb_capability_flags */
|
||||
#define HDB_CAP_F_HANDLE_ENTERPRISE_PRINCIPAL 1
|
||||
@@ -154,7 +155,7 @@ typedef struct HDB {
|
||||
* Remove an entry from the database.
|
||||
*/
|
||||
krb5_error_code (*hdb_remove)(krb5_context, struct HDB*,
|
||||
krb5_const_principal);
|
||||
unsigned, krb5_const_principal);
|
||||
/**
|
||||
* As part of iteration, fetch one entry
|
||||
*/
|
||||
@@ -186,25 +187,33 @@ typedef struct HDB {
|
||||
/**
|
||||
* Get an hdb_entry from a classical DB backend
|
||||
*
|
||||
* If the database is a classical DB (ie BDB, NDBM, GDBM, etc)
|
||||
* backend, this function will take a principal key (krb5_data)
|
||||
* and return all data related to principal in the return
|
||||
* krb5_data. The returned encoded entry is of type hdb_entry or
|
||||
* hdb_entry_alias.
|
||||
* This function takes a principal key (krb5_data) and returns all
|
||||
* data related to principal in the return krb5_data. The returned
|
||||
* encoded entry is of type hdb_entry or hdb_entry_alias.
|
||||
*/
|
||||
krb5_error_code (*hdb__get)(krb5_context, struct HDB*,
|
||||
krb5_data, krb5_data*);
|
||||
/**
|
||||
* Store an hdb_entry from a classical DB backend
|
||||
*
|
||||
* Same discussion as in @ref HDB::hdb__get
|
||||
* This function takes a principal key (krb5_data) and encoded
|
||||
* hdb_entry or hdb_entry_alias as the data to store.
|
||||
*
|
||||
* For a file-based DB, this must synchronize to disk when done.
|
||||
* This is sub-optimal for kadm5_s_rename_principal(), and for
|
||||
* kadm5_s_modify_principal() when using principal aliases; to
|
||||
* improve this so that only one fsync() need be done
|
||||
* per-transaction will require HDB API extensions.
|
||||
*/
|
||||
krb5_error_code (*hdb__put)(krb5_context, struct HDB*, int,
|
||||
krb5_data, krb5_data);
|
||||
/**
|
||||
* Delete and hdb_entry from a classical DB backend
|
||||
*
|
||||
* Same discussion as in @ref HDB::hdb__get
|
||||
* This function takes a principal key (krb5_data) naming the record
|
||||
* to delete.
|
||||
*
|
||||
* Same discussion as in @ref HDB::hdb__put
|
||||
*/
|
||||
krb5_error_code (*hdb__del)(krb5_context, struct HDB*, krb5_data);
|
||||
/**
|
||||
@@ -263,7 +272,7 @@ typedef struct HDB {
|
||||
krb5_error_code (*hdb_check_s4u2self)(krb5_context, struct HDB *, hdb_entry_ex *, krb5_const_principal);
|
||||
}HDB;
|
||||
|
||||
#define HDB_INTERFACE_VERSION 8
|
||||
#define HDB_INTERFACE_VERSION 9
|
||||
|
||||
struct hdb_method {
|
||||
int version;
|
||||
|
Reference in New Issue
Block a user