Add private text file binary search API to libheimbase
This commit is contained in:
@@ -10,6 +10,7 @@ dir_dce = dceutils
|
|||||||
endif
|
endif
|
||||||
SUBDIRS = \
|
SUBDIRS = \
|
||||||
afsutil \
|
afsutil \
|
||||||
|
dbutils \
|
||||||
ftp \
|
ftp \
|
||||||
login \
|
login \
|
||||||
$(dir_otp) \
|
$(dir_otp) \
|
||||||
@@ -26,4 +27,4 @@ SUBDIRS = \
|
|||||||
kf \
|
kf \
|
||||||
$(dir_dce)
|
$(dir_dce)
|
||||||
|
|
||||||
EXTRA_DIST = NTMakefile
|
EXTRA_DIST = NTMakefile
|
||||||
|
13
appl/dbutils/Makefile.am
Normal file
13
appl/dbutils/Makefile.am
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# $Id$
|
||||||
|
|
||||||
|
include $(top_srcdir)/Makefile.am.common
|
||||||
|
|
||||||
|
bin_PROGRAMS = bsearch
|
||||||
|
|
||||||
|
bsearch_SOURCES = bsearch.c
|
||||||
|
|
||||||
|
man_MANS = bsearch.1
|
||||||
|
|
||||||
|
EXTRA_DIST = NTMakefile $(man_MANS)
|
||||||
|
|
||||||
|
LDADD = $(LIB_roken) $(LIB_vers) $(LIB_heimbase)
|
35
appl/dbutils/NTMakefile
Normal file
35
appl/dbutils/NTMakefile
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
########################################################################
|
||||||
|
#
|
||||||
|
# Copyright (c) 2009, Secure Endpoints Inc.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
#
|
||||||
|
# - Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
#
|
||||||
|
# - Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||||
|
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||||
|
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
#
|
||||||
|
|
||||||
|
RELDIR=appl\dbutils
|
||||||
|
|
||||||
|
!include ../../windows/NTMakefile.w32
|
||||||
|
|
114
appl/dbutils/bsearch.1
Normal file
114
appl/dbutils/bsearch.1
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
.\"
|
||||||
|
.\" Copyright (c) 2011, Secure Endpoints Inc.
|
||||||
|
.\" All rights reserved.
|
||||||
|
.\"
|
||||||
|
.\" Redistribution and use in source and binary forms, with or without
|
||||||
|
.\" modification, are permitted provided that the following conditions
|
||||||
|
.\" are met:
|
||||||
|
.\"
|
||||||
|
.\" - Redistributions of source code must retain the above copyright
|
||||||
|
.\" notice, this list of conditions and the following disclaimer.
|
||||||
|
.\"
|
||||||
|
.\" - Redistributions in binary form must reproduce the above copyright
|
||||||
|
.\" notice, this list of conditions and the following disclaimer in
|
||||||
|
.\" the documentation and/or other materials provided with the
|
||||||
|
.\" distribution.
|
||||||
|
.\"
|
||||||
|
.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
.\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
.\" FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||||
|
.\" COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||||
|
.\" INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||||
|
.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||||
|
.\" OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
.\"
|
||||||
|
.Dd November 30, 2011
|
||||||
|
.Dt BSEARCH 1
|
||||||
|
.Os KTH-KRB
|
||||||
|
.Sh NAME
|
||||||
|
.Nm bsearch
|
||||||
|
.Nd manages one-time passwords
|
||||||
|
.Sh SYNOPSIS
|
||||||
|
.Nm bsearch
|
||||||
|
.Op Fl KVvh
|
||||||
|
.Op Fl b Ar block-size
|
||||||
|
.Op Fl m Ar max-cache-size
|
||||||
|
.Ar file
|
||||||
|
.Ar [key ...]
|
||||||
|
.Sh DESCRIPTION
|
||||||
|
The
|
||||||
|
.Nm
|
||||||
|
program performs binary searches of
|
||||||
|
.Ar file
|
||||||
|
which must be a sorted flat text file.
|
||||||
|
.Pp
|
||||||
|
Each line is a record. Each record starts with a key
|
||||||
|
that is optionally followed by whitespace and a value.
|
||||||
|
Whitespace may be quoted with a backslash, but newline
|
||||||
|
and carriage-return characters must be quoted in some
|
||||||
|
other manner (e.g., as backslash-n and backslash-r).
|
||||||
|
Escapes are not interpreted nor removed.
|
||||||
|
.Pp
|
||||||
|
If no key arguments are given on the comman-line, then
|
||||||
|
keys will be read from standard input.
|
||||||
|
.Pp
|
||||||
|
By default only values are printed to standard output.
|
||||||
|
Use the -K option to also print keys. The exit status
|
||||||
|
will be non-zero if any key lookups fail.
|
||||||
|
.Pp
|
||||||
|
Options are:
|
||||||
|
.Bl -tag -width Ds
|
||||||
|
.It Fl K
|
||||||
|
Print keys.
|
||||||
|
.It Fl V
|
||||||
|
Don't print values.
|
||||||
|
.It Fl h
|
||||||
|
Print usage and exit.
|
||||||
|
.It Fl v
|
||||||
|
Print statistic and debug information to standard
|
||||||
|
error.
|
||||||
|
.Ar file
|
||||||
|
A sorted flat text file. NOTE: use the "C" locale for
|
||||||
|
sorting this file, as in "LC_ALL=C sort -u -o file
|
||||||
|
file".
|
||||||
|
.It Fl h
|
||||||
|
For getting a help message.
|
||||||
|
.It Fl m
|
||||||
|
Set
|
||||||
|
.Ar max-cache-size
|
||||||
|
as the maximum cache size. If the
|
||||||
|
.Ar file
|
||||||
|
is smaller than this size then the whole file will be
|
||||||
|
read into memory, else the program will read blocks.
|
||||||
|
Defaults to 1MB.
|
||||||
|
.It Fl b
|
||||||
|
Set
|
||||||
|
.Ar block-size
|
||||||
|
as the block size for block-wise I/O. This must be a
|
||||||
|
power of 2, must be no smaller than 512 and no larger
|
||||||
|
than 1MB. Defaults to the
|
||||||
|
.Ar file's
|
||||||
|
filesystem's preferred blocksize.
|
||||||
|
.El
|
||||||
|
.Sh EXAMPLES
|
||||||
|
.Bd -literal -offset indent
|
||||||
|
$ env LC_ALL=C sort -o /tmp/words /usr/share/dict/words
|
||||||
|
$ bsearch -Kv /tmp/words day
|
||||||
|
Using whole-file method
|
||||||
|
Key day found at offset 327695 in 12 loops and 0 reads
|
||||||
|
day
|
||||||
|
$
|
||||||
|
.Sh NOTES
|
||||||
|
.Pp
|
||||||
|
Records must not be longer than one block's size.
|
||||||
|
.Pp
|
||||||
|
Flat text files must be sorted in the "C" locale. In
|
||||||
|
some systems the default locale may result in
|
||||||
|
case-insensitive sorting by the sort command.
|
||||||
|
.Sh SEE ALSO
|
||||||
|
.Xr sort 1
|
225
appl/dbutils/bsearch.c
Normal file
225
appl/dbutils/bsearch.c
Normal file
@@ -0,0 +1,225 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2011, Secure Endpoints Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* - Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* - Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||||
|
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||||
|
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||||
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||||
|
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <strings.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <roken.h>
|
||||||
|
#include <heimbase.h>
|
||||||
|
#include <getarg.h>
|
||||||
|
#include <vers.h>
|
||||||
|
|
||||||
|
int help_flag;
|
||||||
|
int version_flag;
|
||||||
|
int verbose_flag;
|
||||||
|
int print_keys_flag;
|
||||||
|
int no_values_flag;
|
||||||
|
int block_size_int;
|
||||||
|
int max_size_int;
|
||||||
|
|
||||||
|
struct getargs args[] = {
|
||||||
|
{ "print-keys", 'K', arg_flag, &print_keys_flag,
|
||||||
|
"print keys", NULL },
|
||||||
|
{ "no-values", 'V', arg_flag, &no_values_flag,
|
||||||
|
"don't print values", NULL },
|
||||||
|
{ "verbose", 'v', arg_flag, &verbose_flag,
|
||||||
|
"print statistics and informative messages", NULL },
|
||||||
|
{ "help", 'h', arg_flag, &help_flag,
|
||||||
|
"print usage message", NULL },
|
||||||
|
{ "block-size", 'b', arg_integer, &block_size_int,
|
||||||
|
"block size", "integer" },
|
||||||
|
{ "max-cache-size", 'm', arg_integer, &max_size_int,
|
||||||
|
"maximum cache size", "integer" },
|
||||||
|
{ "version", '\0', arg_flag, &version_flag, NULL, NULL }
|
||||||
|
};
|
||||||
|
|
||||||
|
static int num_args = sizeof(args) / sizeof(args[0]);
|
||||||
|
|
||||||
|
static void
|
||||||
|
usage(const char *progname, int status)
|
||||||
|
{
|
||||||
|
arg_printusage(args, num_args, progname, "\n"
|
||||||
|
"\tThis program does a binary search of the given file for the\n"
|
||||||
|
"\tgiven keys. Two binary search algorithms are implemented\n"
|
||||||
|
"\twhole-file and block-wise.\n\n"
|
||||||
|
"\tIf keys are not given as arguments keys are read from stdin.\n\n"
|
||||||
|
"\tExit status will be 1 for errors, 2 if any keys are not found,\n"
|
||||||
|
"\tand 0 if all keys are found.\n\n"
|
||||||
|
"\tOptions:\n"
|
||||||
|
"\t\t-K \tPrint keys\n"
|
||||||
|
"\t\t-V \tDon't print values\n"
|
||||||
|
"\t\t-b size\tUse block-wise search with give blocksize\n"
|
||||||
|
"\t\t-m size\tRead DB in if its size is less than given\n"
|
||||||
|
"\t\t-v \tVerbose (includes count of reads and comparisons)\n"
|
||||||
|
"\t\t-h \tPrint usage message and exit\n"
|
||||||
|
"\tIf blocksize is not given, empty, or zero then the\n"
|
||||||
|
"\tfilesystem's block size (st_blksize) will be used.\n"
|
||||||
|
"\tBlock sizes should be powers of two, and larger than 256.\n"
|
||||||
|
"\tIf the max file size is not given or empty then the max\n"
|
||||||
|
"\tfile size for non-block-wise search will be 1MB.\n"
|
||||||
|
"\tKeys from stdin must not be longer than 1023 bytes.\n\n"
|
||||||
|
);
|
||||||
|
exit(status);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MAX_BLOCK_SIZE (1024 * 1024)
|
||||||
|
#define DEFAULT_MAX_FILE_SIZE (1024 * 1024)
|
||||||
|
|
||||||
|
int
|
||||||
|
main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
char keybuf[1024];
|
||||||
|
char *progname = argv[0];
|
||||||
|
char *fname;
|
||||||
|
char *key = keybuf;
|
||||||
|
char *value;
|
||||||
|
char *p;
|
||||||
|
bsearch_file_handle bfh = NULL;
|
||||||
|
size_t num;
|
||||||
|
size_t loc; /* index where record is located or to be inserted */
|
||||||
|
size_t loops; /* number of loops/comparisons needed for lookup */
|
||||||
|
size_t reads = 0; /* number of reads needed for a lookup */
|
||||||
|
size_t failures = 0; /* number of lookup failures -- for exit status */
|
||||||
|
size_t block_size = 0;
|
||||||
|
size_t max_size = 0;
|
||||||
|
int optidx = 0;
|
||||||
|
int blockwise;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (getarg(args, num_args, argc, argv, &optidx))
|
||||||
|
usage(progname, 1);
|
||||||
|
|
||||||
|
if (version_flag) {
|
||||||
|
print_version(NULL);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (help_flag)
|
||||||
|
usage(progname, 0);
|
||||||
|
|
||||||
|
if (block_size_int != 0 && block_size_int < 512) {
|
||||||
|
fprintf(stderr, "Invalid block size: too small\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (block_size_int > 0) {
|
||||||
|
/* Check that block_size is a power of 2 */
|
||||||
|
num = block_size_int;
|
||||||
|
while (num) {
|
||||||
|
if ((num % 2) && (num >> 1)) {
|
||||||
|
fprintf(stderr, "Invalid block size: must be power "
|
||||||
|
"of two\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
num >>= 1;
|
||||||
|
}
|
||||||
|
if (block_size_int > MAX_BLOCK_SIZE)
|
||||||
|
fprintf(stderr, "Invalid block size: too large\n");
|
||||||
|
block_size = block_size_int;
|
||||||
|
}
|
||||||
|
if (max_size_int < 0)
|
||||||
|
usage(progname, 1);
|
||||||
|
max_size = max_size_int;
|
||||||
|
|
||||||
|
argc -= optind;
|
||||||
|
argv += optind;
|
||||||
|
|
||||||
|
if (argc == 0)
|
||||||
|
usage(progname, 1);
|
||||||
|
|
||||||
|
fname = argv[0];
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
|
||||||
|
ret = __bsearch_file_open(fname, max_size, block_size, &bfh, &reads);
|
||||||
|
if (ret != 0) {
|
||||||
|
perror("bsearch_file_open");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
__bsearch_file_info(bfh, &block_size, &max_size, &blockwise);
|
||||||
|
if (verbose_flag && blockwise) {
|
||||||
|
fprintf(stderr, "Using block-wise method with block size %lu and "
|
||||||
|
"cache size %lu\n",
|
||||||
|
(long unsigned)block_size, (long unsigned)max_size);
|
||||||
|
} else if (verbose_flag) {
|
||||||
|
fprintf(stderr, "Using whole-file method\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
loops = 0; /* reset stats */
|
||||||
|
/* Eww */
|
||||||
|
if (argc) {
|
||||||
|
key = *(argv++);
|
||||||
|
if (!key)
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
if (!fgets(keybuf, sizeof (keybuf), stdin))
|
||||||
|
break;
|
||||||
|
p = strchr(key, '\n');
|
||||||
|
if (!p)
|
||||||
|
break;
|
||||||
|
*p = '\0';
|
||||||
|
if (!*key)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
ret = __bsearch_file(bfh, key, &value, &loc, &loops, &reads);
|
||||||
|
if (ret != 0) {
|
||||||
|
if (ret > 0) {
|
||||||
|
fprintf(stderr, "Error: %s\n", strerror(ret));
|
||||||
|
__bsearch_file_close(&bfh);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (verbose_flag)
|
||||||
|
fprintf(stderr, "Key %s not found in %lu loops and %lu reads; "
|
||||||
|
"insert at %lu\n", key, (long unsigned)loops,
|
||||||
|
(long unsigned)reads, (long unsigned)loc);
|
||||||
|
failures++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (verbose_flag)
|
||||||
|
fprintf(stderr, "Key %s found at offset %lu in %lu loops and "
|
||||||
|
"%lu reads\n", key, (long unsigned)loc,
|
||||||
|
(long unsigned)loops, (long unsigned)reads);
|
||||||
|
if (print_keys_flag && !no_values_flag && value)
|
||||||
|
printf("%s %s\n", key, value);
|
||||||
|
else if (print_keys_flag)
|
||||||
|
printf("%s\n", key);
|
||||||
|
else if (no_values_flag && value)
|
||||||
|
printf("%s\n", value);
|
||||||
|
free(value);
|
||||||
|
}
|
||||||
|
if (failures)
|
||||||
|
return 2;
|
||||||
|
__bsearch_file_close(&bfh);
|
||||||
|
return 0;
|
||||||
|
}
|
@@ -17,6 +17,7 @@ include_HEADERS = heimbase.h
|
|||||||
dist_libheimbase_la_SOURCES = \
|
dist_libheimbase_la_SOURCES = \
|
||||||
array.c \
|
array.c \
|
||||||
baselocl.h \
|
baselocl.h \
|
||||||
|
bsearch.c \
|
||||||
bool.c \
|
bool.c \
|
||||||
data.c \
|
data.c \
|
||||||
dict.c \
|
dict.c \
|
||||||
|
786
base/bsearch.c
Normal file
786
base/bsearch.c
Normal file
@@ -0,0 +1,786 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2011, Secure Endpoints Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* - Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* - Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||||
|
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||||
|
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||||
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||||
|
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "baselocl.h"
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <strings.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <getopt.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This file contains functions for binary searching flat text in memory
|
||||||
|
* and in text files where each line is a [variable length] record.
|
||||||
|
* Each record has a key and an optional value separated from the key by
|
||||||
|
* unquoted whitespace. Whitespace in the key, and leading whitespace
|
||||||
|
* for the value, can be quoted with backslashes (but CR and LF must be
|
||||||
|
* quoted in such a way that they don't appear in the quoted result).
|
||||||
|
*
|
||||||
|
* Binary searching a tree are normally a dead simple algorithm. It
|
||||||
|
* turns out that binary searching flat text with *variable* length
|
||||||
|
* records is... tricky. There's no indexes to record beginning bytes,
|
||||||
|
* thus any index selected during the search is likely to fall in the
|
||||||
|
* middle of a record. When deciding to search a left sub-tree one
|
||||||
|
* might fail to find the last record in that sub-tree on account of the
|
||||||
|
* right boundary falling in the middle of it -- the chosen solution to
|
||||||
|
* this makes left sub-tree searches slightly less efficient than right
|
||||||
|
* sub-tree searches.
|
||||||
|
*
|
||||||
|
* If binary searching flat text in memory is tricky, using block-wise
|
||||||
|
* I/O instead is trickier! But it's necessary in order to support
|
||||||
|
* large files (which we either can't or wouldn't want to read or map
|
||||||
|
* into memory). Each block we read has to be large enough that the
|
||||||
|
* largest record can fit in it. And each block might start and/or end
|
||||||
|
* in the middle of a record. Here it is the right sub-tree searches
|
||||||
|
* that are less efficient than left sub-tree searches.
|
||||||
|
*
|
||||||
|
* bsearch_common() contains the common text block binary search code.
|
||||||
|
*
|
||||||
|
* __bsearch_text() is the interface for searching in-core text.
|
||||||
|
* __bsearch_file() is the interface for block-wise searching files.
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct bsearch_file_handle {
|
||||||
|
int fd; /* file descriptor */
|
||||||
|
char *cache; /* cache bytes */
|
||||||
|
char *page; /* one double-size page worth of bytes */
|
||||||
|
size_t file_sz; /* file size */
|
||||||
|
size_t cache_sz; /* cache size */
|
||||||
|
size_t page_sz; /* page size */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Find a new-line */
|
||||||
|
static const char *
|
||||||
|
find_line(const char *buf, size_t i, size_t right)
|
||||||
|
{
|
||||||
|
if (i == 0)
|
||||||
|
return &buf[i];
|
||||||
|
for (; i < right; i++) {
|
||||||
|
if (buf[i] == '\n') {
|
||||||
|
if ((i + 1) < right)
|
||||||
|
return &buf[i + 1];
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Common routine for binary searching text in core.
|
||||||
|
*
|
||||||
|
* Perform a binary search of a char array containing a block from a
|
||||||
|
* text file where each line is a record (LF and CRLF supported). Each
|
||||||
|
* record consists of a key followed by an optional value separated from
|
||||||
|
* the key by whitespace. Whitespace can be quoted with backslashes.
|
||||||
|
* It's the caller's responsibility to encode/decode keys/values if
|
||||||
|
* quoting is desired; newlines should be encoded such that a newline
|
||||||
|
* does not appear in the result.
|
||||||
|
*
|
||||||
|
* All output arguments are optional.
|
||||||
|
*
|
||||||
|
* Returns 0 if key is found, -1 if not found, or an error code such as
|
||||||
|
* ENOMEM in case of error.
|
||||||
|
*
|
||||||
|
* Inputs:
|
||||||
|
*
|
||||||
|
* @buf String to search
|
||||||
|
* @sz Size of string to search
|
||||||
|
* @key Key string to search for
|
||||||
|
* @buf_is_start True if the buffer starts with a record, false if it
|
||||||
|
* starts in the middle of a record or if the caller
|
||||||
|
* doesn't know.
|
||||||
|
*
|
||||||
|
* Outputs:
|
||||||
|
*
|
||||||
|
* @value Location to store a copy of the value (caller must free)
|
||||||
|
* @location Record location if found else the location where the
|
||||||
|
* record should be inserted (index into @buf)
|
||||||
|
* @cmp Set to less than or greater than 0 to indicate that a
|
||||||
|
* key not found would have fit in an earlier or later
|
||||||
|
* part of a file. Callers should use this to decide
|
||||||
|
* whether to read a block to the left or to the right and
|
||||||
|
* search that.
|
||||||
|
* @loops Location to store a count of bisections required for
|
||||||
|
* search (useful for confirming logarithmic performance)
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
bsearch_common(const char *buf, size_t sz, const char *key,
|
||||||
|
int buf_is_start, char **value, size_t *location,
|
||||||
|
int *cmp, size_t *loops)
|
||||||
|
{
|
||||||
|
const char *linep;
|
||||||
|
size_t key_start, key_len; /* key string in buf */
|
||||||
|
size_t val_start, val_len; /* value string in buf */
|
||||||
|
int key_cmp;
|
||||||
|
size_t k;
|
||||||
|
size_t l; /* left side of buffer for binary search */
|
||||||
|
size_t r; /* right side of buffer for binary search */
|
||||||
|
size_t rmax; /* right side of buffer for binary search */
|
||||||
|
size_t i; /* index into buffer, typically in the middle of l and r */
|
||||||
|
size_t loop_count = 0;
|
||||||
|
int ret = -1;
|
||||||
|
|
||||||
|
if (value)
|
||||||
|
*value = NULL;
|
||||||
|
if (cmp)
|
||||||
|
*cmp = 0;
|
||||||
|
if (loops)
|
||||||
|
*loops = 0;
|
||||||
|
|
||||||
|
/* Binary search; file should be sorted */
|
||||||
|
for (l = 0, r = rmax = sz, i = sz >> 1; i >= l && i < rmax; loop_count++) {
|
||||||
|
heim_assert(i >= 0 && i < sz, "invalid aname2lname db index");
|
||||||
|
|
||||||
|
/* buf[i] is likely in the middle of a line; find the next line */
|
||||||
|
linep = find_line(buf, i, rmax);
|
||||||
|
k = linep ? linep - buf : i;
|
||||||
|
if (linep == NULL || k >= rmax) {
|
||||||
|
/*
|
||||||
|
* No new line found to the right; search to the left then
|
||||||
|
* but don't change rmax (this isn't optimal, but it's
|
||||||
|
* simple).
|
||||||
|
*/
|
||||||
|
if (i == l)
|
||||||
|
break;
|
||||||
|
r = i;
|
||||||
|
i = l + ((r - l) >> 1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
i = k;
|
||||||
|
heim_assert(i >= l && i < rmax, "invalid aname2lname db index");
|
||||||
|
|
||||||
|
/* Got a line; check it */
|
||||||
|
|
||||||
|
/* Search for and split on unquoted whitespace */
|
||||||
|
for (key_start = i, key_len = 0, val_len = 0, k = i; k < rmax; k++) {
|
||||||
|
if (buf[k] == '\\') {
|
||||||
|
k++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (buf[k] == '\r' || buf[k] == '\n') {
|
||||||
|
/* We now know where the key ends, and there's no value */
|
||||||
|
key_len = k - i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!isspace(buf[k]))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
while (k < rmax && isspace(buf[k])) {
|
||||||
|
key_len = k - i;
|
||||||
|
k++;
|
||||||
|
}
|
||||||
|
if (k < rmax)
|
||||||
|
val_start = k;
|
||||||
|
/* Find end of value */
|
||||||
|
for (; k < rmax && buf[k] != '\0'; k++) {
|
||||||
|
if (buf[k] == '\r' || buf[k] == '\n') {
|
||||||
|
val_len = k - val_start;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The following logic is for dealing with partial buffers,
|
||||||
|
* which we use for block-wise binary searches of large files
|
||||||
|
*/
|
||||||
|
if (key_start == 0 && !buf_is_start) {
|
||||||
|
/*
|
||||||
|
* We're at the beginning of a block that might have started
|
||||||
|
* in the middle of a record whose "key" might well compare
|
||||||
|
* as greater than the key we're looking for, so we don't
|
||||||
|
* bother comparing -- we know key_cmp must be -1 here.
|
||||||
|
*/
|
||||||
|
key_cmp = -1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if ((val_len && buf[val_start + val_len] != '\n') ||
|
||||||
|
(!val_len && buf[key_start + key_len] != '\n')) {
|
||||||
|
/*
|
||||||
|
* We're at the end of a block that ends in the middle of a
|
||||||
|
* record whose "key" might well compare as less than the
|
||||||
|
* key we're looking for, so we don't bother comparing -- we
|
||||||
|
* know key_cmp must be >= 0 but we can't tell. Our caller
|
||||||
|
* will end up reading a double-size block to handle this.
|
||||||
|
*/
|
||||||
|
key_cmp = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
key_cmp = strncmp(key, &buf[key_start], key_len);
|
||||||
|
if (key_cmp == 0 && strlen(key) != key_len)
|
||||||
|
key_cmp = 1;
|
||||||
|
if (key_cmp < 0) {
|
||||||
|
/* search left */
|
||||||
|
r = rmax = (linep - buf);
|
||||||
|
i = l + ((r - l) >> 1);
|
||||||
|
if (location)
|
||||||
|
*location = key_start;
|
||||||
|
} else if (key_cmp > 0) {
|
||||||
|
/* search right */
|
||||||
|
if (l == i)
|
||||||
|
break; /* not found */
|
||||||
|
l = i;
|
||||||
|
i = l + ((r - l) >> 1);
|
||||||
|
if (location)
|
||||||
|
*location = val_start + val_len;
|
||||||
|
} else {
|
||||||
|
/* match! */
|
||||||
|
if (location)
|
||||||
|
*location = key_start;
|
||||||
|
ret = 0;
|
||||||
|
if (val_len && value) {
|
||||||
|
*value = strndup(&buf[val_start], val_len);
|
||||||
|
if (!*value)
|
||||||
|
ret = errno;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cmp)
|
||||||
|
*cmp = key_cmp;
|
||||||
|
if (loops)
|
||||||
|
*loops = loop_count;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Binary search a char array containing sorted text records separated
|
||||||
|
* by new-lines (or CRLF). Each record consists of a key and an
|
||||||
|
* optional value following the key, separated from the key by unquoted
|
||||||
|
* whitespace.
|
||||||
|
*
|
||||||
|
* All output arguments are optional.
|
||||||
|
*
|
||||||
|
* Returns 0 if key is found, -1 if not found, or an error code such as
|
||||||
|
* ENOMEM in case of error.
|
||||||
|
*
|
||||||
|
* Inputs:
|
||||||
|
*
|
||||||
|
* @buf Char array pointer
|
||||||
|
* @buf_sz Size of buf
|
||||||
|
* @key Key to search for
|
||||||
|
*
|
||||||
|
* Outputs:
|
||||||
|
*
|
||||||
|
* @value Location where to put the value, if any (caller must free)
|
||||||
|
* @location Record location if found else the location where the record
|
||||||
|
* should be inserted (index into @buf)
|
||||||
|
* @loops Location where to put a number of loops (or comparisons)
|
||||||
|
* needed for the search (useful for benchmarking)
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
__bsearch_text(const char *buf, size_t buf_sz, const char *key,
|
||||||
|
char **value, size_t *location, size_t *loops)
|
||||||
|
{
|
||||||
|
return bsearch_common(buf, buf_sz, key, 1, value, location, NULL, loops);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MAX_BLOCK_SIZE (1024 * 1024)
|
||||||
|
#define DEFAULT_MAX_FILE_SIZE (1024 * 1024)
|
||||||
|
/**
|
||||||
|
* Open a file for binary searching. The file will be read in entirely
|
||||||
|
* if it is smaller than @max_sz, else a cache of @max_sz bytes will be
|
||||||
|
* allocated.
|
||||||
|
*
|
||||||
|
* Returns 0 on success, else an error number or -1 if the file is empty.
|
||||||
|
*
|
||||||
|
* Inputs:
|
||||||
|
*
|
||||||
|
* @fname Name of file to open
|
||||||
|
* @max_sz Maximum size of cache to allocate, in bytes (if zero, default)
|
||||||
|
* @page_sz Page size (must be a power of two, larger than 256, smaller
|
||||||
|
* than 1MB; if zero use default)
|
||||||
|
*
|
||||||
|
* Outputs:
|
||||||
|
*
|
||||||
|
* @bfh Handle for use with __bsearch_file() and __bsearch_file_close()
|
||||||
|
* @reads Number of reads performed
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
__bsearch_file_open(const char *fname, size_t max_sz, size_t page_sz,
|
||||||
|
bsearch_file_handle *bfh, size_t *reads)
|
||||||
|
{
|
||||||
|
bsearch_file_handle new_bfh;
|
||||||
|
struct stat st;
|
||||||
|
size_t i;
|
||||||
|
int fd;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
*bfh = NULL;
|
||||||
|
|
||||||
|
if (reads)
|
||||||
|
*reads = 0;
|
||||||
|
|
||||||
|
fd = open(fname, O_RDONLY);
|
||||||
|
if (fd == -1)
|
||||||
|
return errno;
|
||||||
|
|
||||||
|
if (fstat(fd, &st) == -1) {
|
||||||
|
ret = errno;
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (st.st_size == 0) {
|
||||||
|
ret = -1; /* no data -> no binary search */
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Validate / default arguments */
|
||||||
|
if (max_sz == 0)
|
||||||
|
max_sz = DEFAULT_MAX_FILE_SIZE;
|
||||||
|
for (i = page_sz; i; i >>= 1) {
|
||||||
|
/* Make sure page_sz is a power of two */
|
||||||
|
if ((i % 2) && (i >> 1)) {
|
||||||
|
page_sz = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (page_sz == 0)
|
||||||
|
page_sz = st.st_blksize;
|
||||||
|
for (i = page_sz; i; i >>= 1) {
|
||||||
|
/* Make sure page_sz is a power of two */
|
||||||
|
if ((i % 2) && (i >> 1)) {
|
||||||
|
/* Can't happen! Filesystems always use powers of two! */
|
||||||
|
page_sz = 4096;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (page_sz > MAX_BLOCK_SIZE)
|
||||||
|
page_sz = MAX_BLOCK_SIZE;
|
||||||
|
|
||||||
|
new_bfh = calloc(1, sizeof (*new_bfh));
|
||||||
|
if (new_bfh == NULL) {
|
||||||
|
ret = ENOMEM;
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
new_bfh->fd = fd;
|
||||||
|
new_bfh->page_sz = page_sz;
|
||||||
|
new_bfh->file_sz = st.st_size;
|
||||||
|
|
||||||
|
if (max_sz >= st.st_size) {
|
||||||
|
/* Whole-file method */
|
||||||
|
new_bfh->cache = malloc(st.st_size + 1);
|
||||||
|
if (new_bfh->cache) {
|
||||||
|
new_bfh->cache[st.st_size] = '\0';
|
||||||
|
new_bfh->cache_sz = st.st_size;
|
||||||
|
ret = read(fd, new_bfh->cache, st.st_size);
|
||||||
|
if (ret < 0) {
|
||||||
|
ret = errno;
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
if (ret != st.st_size) {
|
||||||
|
ret = EIO; /* XXX ??? */
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
if (reads)
|
||||||
|
*reads = 1;
|
||||||
|
(void) close(fd);
|
||||||
|
new_bfh->fd = -1;
|
||||||
|
*bfh = new_bfh;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Block-size method, or above malloc() failed */
|
||||||
|
new_bfh->page = malloc(new_bfh->page_sz << 1);
|
||||||
|
if (new_bfh->page == NULL) {
|
||||||
|
/* Can't even allocate a single double-size page! */
|
||||||
|
ret = ENOMEM;
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
new_bfh->cache_sz = max_sz < st.st_size ? max_sz : st.st_size;
|
||||||
|
new_bfh->cache = malloc(new_bfh->cache_sz);
|
||||||
|
*bfh = new_bfh;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* malloc() may have failed because we were asking for a lot of
|
||||||
|
* memory, but we may still be able to operate without a cache,
|
||||||
|
* so let's not fail.
|
||||||
|
*/
|
||||||
|
if (new_bfh->cache == NULL) {
|
||||||
|
new_bfh->cache_sz = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize cache */
|
||||||
|
for (i = 0; i < new_bfh->cache_sz; i += new_bfh->page_sz)
|
||||||
|
new_bfh->cache[i] = '\0';
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
err:
|
||||||
|
(void) close(fd);
|
||||||
|
if (new_bfh) {
|
||||||
|
free(new_bfh->page);
|
||||||
|
free(new_bfh->cache);
|
||||||
|
free(new_bfh);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indicate whether the given binary search file handle will be searched
|
||||||
|
* with block-wise method.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
__bsearch_file_info(bsearch_file_handle bfh,
|
||||||
|
size_t *page_sz, size_t *max_sz, int *blockwise)
|
||||||
|
{
|
||||||
|
if (page_sz)
|
||||||
|
*page_sz = bfh->page_sz;
|
||||||
|
if (max_sz)
|
||||||
|
*max_sz = bfh->cache_sz;
|
||||||
|
if (blockwise)
|
||||||
|
*blockwise = (bfh->file_sz != bfh->cache_sz);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the given binary file search handle.
|
||||||
|
*
|
||||||
|
* Inputs:
|
||||||
|
*
|
||||||
|
* @bfh Pointer to variable containing handle to close.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
__bsearch_file_close(bsearch_file_handle *bfh)
|
||||||
|
{
|
||||||
|
if (!*bfh)
|
||||||
|
return;
|
||||||
|
if ((*bfh)->fd >= 0)
|
||||||
|
(void) close((*bfh)->fd);
|
||||||
|
if ((*bfh)->page)
|
||||||
|
free((*bfh)->page);
|
||||||
|
if ((*bfh)->cache)
|
||||||
|
free((*bfh)->cache);
|
||||||
|
free(*bfh);
|
||||||
|
*bfh = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Private function to get a page from a cache. The cache is a char
|
||||||
|
* array of 2^n - 1 double-size page worth of bytes, where n is the
|
||||||
|
* number of tree levels that the cache stores. The cache can be
|
||||||
|
* smaller than n implies.
|
||||||
|
*
|
||||||
|
* The page may or may not be valid. If the first byte of it is NUL
|
||||||
|
* then it's not valid, else it is.
|
||||||
|
*
|
||||||
|
* Returns 1 if page is in cache and valid, 0 if the cache is too small
|
||||||
|
* or the page is invalid. The page address is output in @buf if the
|
||||||
|
* cache is large enough to contain it regardless of whether the page is
|
||||||
|
* valid.
|
||||||
|
*
|
||||||
|
* Inputs:
|
||||||
|
*
|
||||||
|
* @bfh Binary search file handle
|
||||||
|
* @level Level in the tree that we want a page for
|
||||||
|
* @page_idx Page number in the given level (0..2^level - 1)
|
||||||
|
*
|
||||||
|
* Outputs:
|
||||||
|
*
|
||||||
|
* @buf Set to address of page if the cache is large enough
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
get_page_from_cache(bsearch_file_handle bfh, size_t level, size_t page_idx,
|
||||||
|
char **buf)
|
||||||
|
{
|
||||||
|
size_t idx = 0;
|
||||||
|
size_t page_sz;
|
||||||
|
|
||||||
|
page_sz = bfh->page_sz << 1; /* we use double-size pages in the cache */
|
||||||
|
|
||||||
|
*buf = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute index into cache. The cache is basically an array of
|
||||||
|
* double-size pages. The first (zeroth) double-size page in the
|
||||||
|
* cache will be the middle page of the file -- the root of the
|
||||||
|
* tree. The next two double-size pages will be the left and right
|
||||||
|
* pages of the second level in the tree. The next four double-size
|
||||||
|
* pages will be the four pages at the next level. And so on for as
|
||||||
|
* many pages as fit in the cache.
|
||||||
|
*
|
||||||
|
* The page index is the number of the page at the given level. We
|
||||||
|
* then compute (2^level - 1 + page index) * 2page size, check that
|
||||||
|
* we have that in the cache, check that the page has been read (it
|
||||||
|
* doesn't start with NUL).
|
||||||
|
*/
|
||||||
|
if (level)
|
||||||
|
idx = (1 << level) - 1 + page_idx;
|
||||||
|
if (((idx + 1) * page_sz * 2) > bfh->cache_sz)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
*buf = &bfh->cache[idx * page_sz * 2];
|
||||||
|
if (bfh->cache[idx * page_sz * 2] == '\0')
|
||||||
|
return 0; /* cache[idx] == NUL -> page not loaded in cache */
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Private function to read a page of @page_sz from @fd at offset @off
|
||||||
|
* into @buf, outputing the number of bytes read, which will be the same
|
||||||
|
* as @page_sz unless the page being read is the last page, in which
|
||||||
|
* case the number of remaining bytes in the file will be output.
|
||||||
|
*
|
||||||
|
* Returns 0 on success or an errno value otherwise (EIO if reads are
|
||||||
|
* short).
|
||||||
|
*
|
||||||
|
* Inputs:
|
||||||
|
*
|
||||||
|
* @bfh Binary search file handle
|
||||||
|
* @level Level in the binary search tree that we're at
|
||||||
|
* @page_idx Page "index" at the @level of the tree that we want
|
||||||
|
* @page Actual page number that we want
|
||||||
|
* want_double Whether we need a page or double page read
|
||||||
|
*
|
||||||
|
* Outputs:
|
||||||
|
*
|
||||||
|
* @buf Page read or cached
|
||||||
|
* @bytes Bytes read (may be less than page or double page size in
|
||||||
|
* the case of the last page, of course)
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
read_page(bsearch_file_handle bfh, size_t level, size_t page_idx, size_t page,
|
||||||
|
int want_double, const char **buf, size_t *bytes)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
off_t off;
|
||||||
|
size_t expected;
|
||||||
|
size_t wanted;
|
||||||
|
char *page_buf;
|
||||||
|
|
||||||
|
/* Figure out where we're reading and how much */
|
||||||
|
off = page * bfh->page_sz;
|
||||||
|
if (off < 0)
|
||||||
|
return EOVERFLOW;
|
||||||
|
|
||||||
|
wanted = bfh->page_sz << want_double;
|
||||||
|
expected = ((bfh->file_sz - off) > wanted) ? wanted : bfh->file_sz - off;
|
||||||
|
|
||||||
|
if (get_page_from_cache(bfh, level, page_idx, &page_buf)) {
|
||||||
|
*buf = page_buf;
|
||||||
|
*bytes = expected;
|
||||||
|
return 0; /* found in cache */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
*bytes = 0;
|
||||||
|
*buf = NULL;
|
||||||
|
|
||||||
|
/* OK, we have to read a page or double-size page */
|
||||||
|
|
||||||
|
if (page_buf)
|
||||||
|
want_double = 1; /* we'll be caching; we cache double-size pages */
|
||||||
|
else
|
||||||
|
page_buf = bfh->page; /* we won't cache this page */
|
||||||
|
|
||||||
|
wanted = bfh->page_sz << want_double;
|
||||||
|
expected = ((bfh->file_sz - off) > wanted) ? wanted : bfh->file_sz - off;
|
||||||
|
|
||||||
|
#ifdef HAVE_PREAD
|
||||||
|
ret = pread(bfh->fd, page_buf, expected, off);
|
||||||
|
#else
|
||||||
|
if (lseek(bfh->fd, off, SEEK_SET) == (off_t)-1)
|
||||||
|
return errno;
|
||||||
|
ret = read(bfh->fd, page_buf, expected);
|
||||||
|
#endif
|
||||||
|
if (ret < 0)
|
||||||
|
return errno;
|
||||||
|
|
||||||
|
if (ret != expected)
|
||||||
|
return EIO; /* XXX ??? */
|
||||||
|
|
||||||
|
*buf = page_buf;
|
||||||
|
*bytes = expected;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform a binary search of a file where each line is a record (LF and
|
||||||
|
* CRLF supported). Each record consists of a key followed by an
|
||||||
|
* optional value separated from the key by whitespace. Whitespace can
|
||||||
|
* be quoted with backslashes. It's the caller's responsibility to
|
||||||
|
* encode/decode keys/values if quoting is desired; newlines should be
|
||||||
|
* encoded such that a newline does not appear in the result.
|
||||||
|
*
|
||||||
|
* The search is done with block-wise I/O (i.e., the whole file is not
|
||||||
|
* read into memory).
|
||||||
|
*
|
||||||
|
* All output arguments are optional.
|
||||||
|
*
|
||||||
|
* Returns 0 if key is found, -1 if not found, or an error code such as
|
||||||
|
* ENOMEM in case of error.
|
||||||
|
*
|
||||||
|
* NOTE: We could improve this by not freeing the buffer, instead
|
||||||
|
* requiring that the caller provide it. Further, we could cache
|
||||||
|
* the top N levels of [double-size] pages (2^N - 1 pages), which
|
||||||
|
* should speed up most searches by reducing the number of reads
|
||||||
|
* by N.
|
||||||
|
*
|
||||||
|
* Inputs:
|
||||||
|
*
|
||||||
|
* @fd File descriptor (file to search)
|
||||||
|
* @page_sz Page size (if zero then the file's st_blksize will be used)
|
||||||
|
* @key Key string to search for
|
||||||
|
*
|
||||||
|
* Outputs:
|
||||||
|
*
|
||||||
|
* @value Location to store a copy of the value (caller must free)
|
||||||
|
* @location Record location if found else the location where the
|
||||||
|
* record should be inserted (index into @buf)
|
||||||
|
* @loops Location to store a count of bisections required for
|
||||||
|
* search (useful for confirming logarithmic performance)
|
||||||
|
* @reads Location to store a count of pages read during search
|
||||||
|
* (useful for confirming logarithmic performance)
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
__bsearch_file(bsearch_file_handle bfh, const char *key,
|
||||||
|
char **value, size_t *location, size_t *loops, size_t *reads)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
const char *buf;
|
||||||
|
size_t buf_sz;
|
||||||
|
size_t page, l, r;
|
||||||
|
size_t my_reads = 0;
|
||||||
|
size_t my_loops_total = 0;
|
||||||
|
size_t my_loops;
|
||||||
|
size_t level; /* level in the tree */
|
||||||
|
size_t page_idx = 0; /* page number in the tree level */
|
||||||
|
size_t buf_location;
|
||||||
|
int cmp;
|
||||||
|
int buf_ends_in_eol = 0;
|
||||||
|
int buf_is_start = 0;
|
||||||
|
|
||||||
|
if (reads)
|
||||||
|
*reads = 0;
|
||||||
|
|
||||||
|
/* If whole file is in memory then search that and we're done */
|
||||||
|
if (bfh->file_sz == bfh->cache_sz)
|
||||||
|
return __bsearch_text(bfh->cache, bfh->cache_sz, key, value, location, loops);
|
||||||
|
|
||||||
|
/* Else block-wise binary search */
|
||||||
|
|
||||||
|
if (value)
|
||||||
|
*value = NULL;
|
||||||
|
if (loops)
|
||||||
|
*loops = 0;
|
||||||
|
|
||||||
|
l = 0;
|
||||||
|
r = (bfh->file_sz / bfh->page_sz) + 1;
|
||||||
|
for (level = 0, page = r >> 1; page >= l && page < r ; level++) {
|
||||||
|
ret = read_page(bfh, level, page_idx, page, 0, &buf, &buf_sz);
|
||||||
|
if (ret != 0)
|
||||||
|
return ret;
|
||||||
|
my_reads++;
|
||||||
|
if (buf[buf_sz - 1] == '\r' || buf[buf_sz - 1] == '\n')
|
||||||
|
buf_ends_in_eol = 1;
|
||||||
|
else
|
||||||
|
buf_ends_in_eol = 0;
|
||||||
|
|
||||||
|
buf_is_start = page == 0 ? 1 : 0;
|
||||||
|
ret = bsearch_common(buf, (size_t)buf_sz, key, buf_is_start,
|
||||||
|
value, &buf_location, &cmp, &my_loops);
|
||||||
|
if (ret > 0)
|
||||||
|
return ret;
|
||||||
|
/* Found or no we update stats */
|
||||||
|
my_loops_total += my_loops;
|
||||||
|
if (loops)
|
||||||
|
*loops = my_loops_total;
|
||||||
|
if (reads)
|
||||||
|
*reads = my_reads;
|
||||||
|
if (location)
|
||||||
|
*location = page * bfh->page_sz + buf_location;
|
||||||
|
if (ret == 0)
|
||||||
|
return 0; /* found! */
|
||||||
|
/* Not found */
|
||||||
|
if (cmp < 0) {
|
||||||
|
/* Search left */
|
||||||
|
page_idx <<= 1;
|
||||||
|
r = page;
|
||||||
|
page = l + ((r - l) >> 1);
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* Search right, but first search the current and next
|
||||||
|
* blocks in case that the record we're looking for either
|
||||||
|
* straddles the boundary between this and the next record,
|
||||||
|
* or in case the record starts exactly at the next page.
|
||||||
|
*/
|
||||||
|
heim_assert(cmp > 0, "cmp > 0");
|
||||||
|
|
||||||
|
if (!buf_ends_in_eol || page == l || page == (r - 1)) {
|
||||||
|
ret = read_page(bfh, level, page_idx, page, 1, &buf, &buf_sz);
|
||||||
|
if (ret != 0)
|
||||||
|
return ret;
|
||||||
|
my_reads++;
|
||||||
|
|
||||||
|
buf_is_start = page == l ? 1 : 0;
|
||||||
|
|
||||||
|
ret = bsearch_common(buf, (size_t)buf_sz, key, buf_is_start,
|
||||||
|
value, &buf_location, &cmp, &my_loops);
|
||||||
|
if (ret > 0)
|
||||||
|
return ret;
|
||||||
|
my_loops_total += my_loops;
|
||||||
|
if (loops)
|
||||||
|
*loops = my_loops_total;
|
||||||
|
if (reads)
|
||||||
|
*reads = my_reads;
|
||||||
|
if (location)
|
||||||
|
*location = page * bfh->page_sz + buf_location;
|
||||||
|
if (ret == 0)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Oh well, search right */
|
||||||
|
if (l == page && r == (l + 1))
|
||||||
|
break;
|
||||||
|
page_idx = (page_idx << 1) + 1;
|
||||||
|
l = page;
|
||||||
|
page = l + ((r - l) >> 1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
@@ -235,5 +235,20 @@ const void * heim_data_get_ptr(heim_data_t);
|
|||||||
size_t heim_data_get_length(heim_data_t);
|
size_t heim_data_get_length(heim_data_t);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Binary search.
|
||||||
|
*
|
||||||
|
* Note: these are private until integrated into the heimbase object system.
|
||||||
|
*/
|
||||||
|
typedef struct bsearch_file_handle *bsearch_file_handle;
|
||||||
|
int __bsearch_text(const char *buf, size_t buf_sz, const char *key,
|
||||||
|
char **value, size_t *location, size_t *loops);
|
||||||
|
int __bsearch_file_open(const char *fname, size_t max_sz, size_t page_sz,
|
||||||
|
bsearch_file_handle *bfh, size_t *reads);
|
||||||
|
int __bsearch_file(bsearch_file_handle bfh, const char *key, char **value,
|
||||||
|
size_t *location, size_t *loops, size_t *reads);
|
||||||
|
void __bsearch_file_info(bsearch_file_handle bfh, size_t *page_sz,
|
||||||
|
size_t *max_sz, int *blockwise);
|
||||||
|
void __bsearch_file_close(bsearch_file_handle *bfh);
|
||||||
|
|
||||||
#endif /* HEIM_BASE_H */
|
#endif /* HEIM_BASE_H */
|
||||||
|
@@ -39,6 +39,11 @@ HEIMDAL_BASE_1.0 {
|
|||||||
heim_string_create_with_bytes;
|
heim_string_create_with_bytes;
|
||||||
heim_string_get_type_id;
|
heim_string_get_type_id;
|
||||||
heim_string_get_utf8;
|
heim_string_get_utf8;
|
||||||
|
__bsearch_text;
|
||||||
|
__bsearch_file_open;
|
||||||
|
__bsearch_file;
|
||||||
|
__bsearch_file_info;
|
||||||
|
__bsearch_file_close;
|
||||||
local:
|
local:
|
||||||
*;
|
*;
|
||||||
};
|
};
|
||||||
|
@@ -637,6 +637,7 @@ AC_CONFIG_FILES(Makefile \
|
|||||||
kdc/Makefile \
|
kdc/Makefile \
|
||||||
appl/Makefile \
|
appl/Makefile \
|
||||||
appl/afsutil/Makefile \
|
appl/afsutil/Makefile \
|
||||||
|
appl/dbutils/Makefile \
|
||||||
appl/ftp/Makefile \
|
appl/ftp/Makefile \
|
||||||
appl/ftp/common/Makefile \
|
appl/ftp/common/Makefile \
|
||||||
appl/ftp/ftp/Makefile \
|
appl/ftp/ftp/Makefile \
|
||||||
|
Reference in New Issue
Block a user