Add private text file binary search API to libheimbase

This commit is contained in:
Nicolas Williams
2011-11-28 22:03:28 -06:00
parent aea02876e7
commit 659c761213
10 changed files with 1197 additions and 1 deletions

13
appl/dbutils/Makefile.am Normal file
View File

@@ -0,0 +1,13 @@
# $Id$
include $(top_srcdir)/Makefile.am.common
bin_PROGRAMS = bsearch
bsearch_SOURCES = bsearch.c
man_MANS = bsearch.1
EXTRA_DIST = NTMakefile $(man_MANS)
LDADD = $(LIB_roken) $(LIB_vers) $(LIB_heimbase)

35
appl/dbutils/NTMakefile Normal file
View File

@@ -0,0 +1,35 @@
########################################################################
#
# Copyright (c) 2009, Secure Endpoints Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
RELDIR=appl\dbutils
!include ../../windows/NTMakefile.w32

114
appl/dbutils/bsearch.1 Normal file
View File

@@ -0,0 +1,114 @@
.\"
.\" Copyright (c) 2011, Secure Endpoints Inc.
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\"
.\" - Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\"
.\" - Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in
.\" the documentation and/or other materials provided with the
.\" distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
.\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
.\" FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
.\" COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
.\" INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
.\" OF THE POSSIBILITY OF SUCH DAMAGE.
.\"
.Dd November 30, 2011
.Dt BSEARCH 1
.Os KTH-KRB
.Sh NAME
.Nm bsearch
.Nd manages one-time passwords
.Sh SYNOPSIS
.Nm bsearch
.Op Fl KVvh
.Op Fl b Ar block-size
.Op Fl m Ar max-cache-size
.Ar file
.Ar [key ...]
.Sh DESCRIPTION
The
.Nm
program performs binary searches of
.Ar file
which must be a sorted flat text file.
.Pp
Each line is a record. Each record starts with a key
that is optionally followed by whitespace and a value.
Whitespace may be quoted with a backslash, but newline
and carriage-return characters must be quoted in some
other manner (e.g., as backslash-n and backslash-r).
Escapes are not interpreted nor removed.
.Pp
If no key arguments are given on the comman-line, then
keys will be read from standard input.
.Pp
By default only values are printed to standard output.
Use the -K option to also print keys. The exit status
will be non-zero if any key lookups fail.
.Pp
Options are:
.Bl -tag -width Ds
.It Fl K
Print keys.
.It Fl V
Don't print values.
.It Fl h
Print usage and exit.
.It Fl v
Print statistic and debug information to standard
error.
.Ar file
A sorted flat text file. NOTE: use the "C" locale for
sorting this file, as in "LC_ALL=C sort -u -o file
file".
.It Fl h
For getting a help message.
.It Fl m
Set
.Ar max-cache-size
as the maximum cache size. If the
.Ar file
is smaller than this size then the whole file will be
read into memory, else the program will read blocks.
Defaults to 1MB.
.It Fl b
Set
.Ar block-size
as the block size for block-wise I/O. This must be a
power of 2, must be no smaller than 512 and no larger
than 1MB. Defaults to the
.Ar file's
filesystem's preferred blocksize.
.El
.Sh EXAMPLES
.Bd -literal -offset indent
$ env LC_ALL=C sort -o /tmp/words /usr/share/dict/words
$ bsearch -Kv /tmp/words day
Using whole-file method
Key day found at offset 327695 in 12 loops and 0 reads
day
$
.Sh NOTES
.Pp
Records must not be longer than one block's size.
.Pp
Flat text files must be sorted in the "C" locale. In
some systems the default locale may result in
case-insensitive sorting by the sort command.
.Sh SEE ALSO
.Xr sort 1

225
appl/dbutils/bsearch.c Normal file
View File

@@ -0,0 +1,225 @@
/*
* Copyright (c) 2011, Secure Endpoints Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <errno.h>
#include <roken.h>
#include <heimbase.h>
#include <getarg.h>
#include <vers.h>
int help_flag;
int version_flag;
int verbose_flag;
int print_keys_flag;
int no_values_flag;
int block_size_int;
int max_size_int;
struct getargs args[] = {
{ "print-keys", 'K', arg_flag, &print_keys_flag,
"print keys", NULL },
{ "no-values", 'V', arg_flag, &no_values_flag,
"don't print values", NULL },
{ "verbose", 'v', arg_flag, &verbose_flag,
"print statistics and informative messages", NULL },
{ "help", 'h', arg_flag, &help_flag,
"print usage message", NULL },
{ "block-size", 'b', arg_integer, &block_size_int,
"block size", "integer" },
{ "max-cache-size", 'm', arg_integer, &max_size_int,
"maximum cache size", "integer" },
{ "version", '\0', arg_flag, &version_flag, NULL, NULL }
};
static int num_args = sizeof(args) / sizeof(args[0]);
static void
usage(const char *progname, int status)
{
arg_printusage(args, num_args, progname, "\n"
"\tThis program does a binary search of the given file for the\n"
"\tgiven keys. Two binary search algorithms are implemented\n"
"\twhole-file and block-wise.\n\n"
"\tIf keys are not given as arguments keys are read from stdin.\n\n"
"\tExit status will be 1 for errors, 2 if any keys are not found,\n"
"\tand 0 if all keys are found.\n\n"
"\tOptions:\n"
"\t\t-K \tPrint keys\n"
"\t\t-V \tDon't print values\n"
"\t\t-b size\tUse block-wise search with give blocksize\n"
"\t\t-m size\tRead DB in if its size is less than given\n"
"\t\t-v \tVerbose (includes count of reads and comparisons)\n"
"\t\t-h \tPrint usage message and exit\n"
"\tIf blocksize is not given, empty, or zero then the\n"
"\tfilesystem's block size (st_blksize) will be used.\n"
"\tBlock sizes should be powers of two, and larger than 256.\n"
"\tIf the max file size is not given or empty then the max\n"
"\tfile size for non-block-wise search will be 1MB.\n"
"\tKeys from stdin must not be longer than 1023 bytes.\n\n"
);
exit(status);
}
#define MAX_BLOCK_SIZE (1024 * 1024)
#define DEFAULT_MAX_FILE_SIZE (1024 * 1024)
int
main(int argc, char **argv)
{
char keybuf[1024];
char *progname = argv[0];
char *fname;
char *key = keybuf;
char *value;
char *p;
bsearch_file_handle bfh = NULL;
size_t num;
size_t loc; /* index where record is located or to be inserted */
size_t loops; /* number of loops/comparisons needed for lookup */
size_t reads = 0; /* number of reads needed for a lookup */
size_t failures = 0; /* number of lookup failures -- for exit status */
size_t block_size = 0;
size_t max_size = 0;
int optidx = 0;
int blockwise;
int ret = 0;
if (getarg(args, num_args, argc, argv, &optidx))
usage(progname, 1);
if (version_flag) {
print_version(NULL);
return 0;
}
if (help_flag)
usage(progname, 0);
if (block_size_int != 0 && block_size_int < 512) {
fprintf(stderr, "Invalid block size: too small\n");
return 1;
}
if (block_size_int > 0) {
/* Check that block_size is a power of 2 */
num = block_size_int;
while (num) {
if ((num % 2) && (num >> 1)) {
fprintf(stderr, "Invalid block size: must be power "
"of two\n");
return 1;
}
num >>= 1;
}
if (block_size_int > MAX_BLOCK_SIZE)
fprintf(stderr, "Invalid block size: too large\n");
block_size = block_size_int;
}
if (max_size_int < 0)
usage(progname, 1);
max_size = max_size_int;
argc -= optind;
argv += optind;
if (argc == 0)
usage(progname, 1);
fname = argv[0];
argc--;
argv++;
ret = __bsearch_file_open(fname, max_size, block_size, &bfh, &reads);
if (ret != 0) {
perror("bsearch_file_open");
return 1;
}
__bsearch_file_info(bfh, &block_size, &max_size, &blockwise);
if (verbose_flag && blockwise) {
fprintf(stderr, "Using block-wise method with block size %lu and "
"cache size %lu\n",
(long unsigned)block_size, (long unsigned)max_size);
} else if (verbose_flag) {
fprintf(stderr, "Using whole-file method\n");
}
for (;;) {
loops = 0; /* reset stats */
/* Eww */
if (argc) {
key = *(argv++);
if (!key)
break;
} else {
if (!fgets(keybuf, sizeof (keybuf), stdin))
break;
p = strchr(key, '\n');
if (!p)
break;
*p = '\0';
if (!*key)
continue;
}
ret = __bsearch_file(bfh, key, &value, &loc, &loops, &reads);
if (ret != 0) {
if (ret > 0) {
fprintf(stderr, "Error: %s\n", strerror(ret));
__bsearch_file_close(&bfh);
return 1;
}
if (verbose_flag)
fprintf(stderr, "Key %s not found in %lu loops and %lu reads; "
"insert at %lu\n", key, (long unsigned)loops,
(long unsigned)reads, (long unsigned)loc);
failures++;
continue;
}
if (verbose_flag)
fprintf(stderr, "Key %s found at offset %lu in %lu loops and "
"%lu reads\n", key, (long unsigned)loc,
(long unsigned)loops, (long unsigned)reads);
if (print_keys_flag && !no_values_flag && value)
printf("%s %s\n", key, value);
else if (print_keys_flag)
printf("%s\n", key);
else if (no_values_flag && value)
printf("%s\n", value);
free(value);
}
if (failures)
return 2;
__bsearch_file_close(&bfh);
return 0;
}