1
0
mirror of https://github.com/samba-team/samba.git synced 2025-02-26 21:57:41 +03:00

tdb2: add lib/tdb2 (from CCAN init-1161-g661d41f)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell 2011-06-20 18:40:33 +09:30
parent d925b327f4
commit d24ddb0350
84 changed files with 26504 additions and 0 deletions

165
lib/tdb2/LICENSE Normal file
View File

@ -0,0 +1,165 @@
GNU LESSER GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
This version of the GNU Lesser General Public License incorporates
the terms and conditions of version 3 of the GNU General Public
License, supplemented by the additional permissions listed below.
0. Additional Definitions.
As used herein, "this License" refers to version 3 of the GNU Lesser
General Public License, and the "GNU GPL" refers to version 3 of the GNU
General Public License.
"The Library" refers to a covered work governed by this License,
other than an Application or a Combined Work as defined below.
An "Application" is any work that makes use of an interface provided
by the Library, but which is not otherwise based on the Library.
Defining a subclass of a class defined by the Library is deemed a mode
of using an interface provided by the Library.
A "Combined Work" is a work produced by combining or linking an
Application with the Library. The particular version of the Library
with which the Combined Work was made is also called the "Linked
Version".
The "Minimal Corresponding Source" for a Combined Work means the
Corresponding Source for the Combined Work, excluding any source code
for portions of the Combined Work that, considered in isolation, are
based on the Application, and not on the Linked Version.
The "Corresponding Application Code" for a Combined Work means the
object code and/or source code for the Application, including any data
and utility programs needed for reproducing the Combined Work from the
Application, but excluding the System Libraries of the Combined Work.
1. Exception to Section 3 of the GNU GPL.
You may convey a covered work under sections 3 and 4 of this License
without being bound by section 3 of the GNU GPL.
2. Conveying Modified Versions.
If you modify a copy of the Library, and, in your modifications, a
facility refers to a function or data to be supplied by an Application
that uses the facility (other than as an argument passed when the
facility is invoked), then you may convey a copy of the modified
version:
a) under this License, provided that you make a good faith effort to
ensure that, in the event an Application does not supply the
function or data, the facility still operates, and performs
whatever part of its purpose remains meaningful, or
b) under the GNU GPL, with none of the additional permissions of
this License applicable to that copy.
3. Object Code Incorporating Material from Library Header Files.
The object code form of an Application may incorporate material from
a header file that is part of the Library. You may convey such object
code under terms of your choice, provided that, if the incorporated
material is not limited to numerical parameters, data structure
layouts and accessors, or small macros, inline functions and templates
(ten or fewer lines in length), you do both of the following:
a) Give prominent notice with each copy of the object code that the
Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the object code with a copy of the GNU GPL and this license
document.
4. Combined Works.
You may convey a Combined Work under terms of your choice that,
taken together, effectively do not restrict modification of the
portions of the Library contained in the Combined Work and reverse
engineering for debugging such modifications, if you also do each of
the following:
a) Give prominent notice with each copy of the Combined Work that
the Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the Combined Work with a copy of the GNU GPL and this license
document.
c) For a Combined Work that displays copyright notices during
execution, include the copyright notice for the Library among
these notices, as well as a reference directing the user to the
copies of the GNU GPL and this license document.
d) Do one of the following:
0) Convey the Minimal Corresponding Source under the terms of this
License, and the Corresponding Application Code in a form
suitable for, and under terms that permit, the user to
recombine or relink the Application with a modified version of
the Linked Version to produce a modified Combined Work, in the
manner specified by section 6 of the GNU GPL for conveying
Corresponding Source.
1) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (a) uses at run time
a copy of the Library already present on the user's computer
system, and (b) will operate properly with a modified version
of the Library that is interface-compatible with the Linked
Version.
e) Provide Installation Information, but only if you would otherwise
be required to provide such information under section 6 of the
GNU GPL, and only to the extent that such information is
necessary to install and execute a modified version of the
Combined Work produced by recombining or relinking the
Application with a modified version of the Linked Version. (If
you use option 4d0, the Installation Information must accompany
the Minimal Corresponding Source and Corresponding Application
Code. If you use option 4d1, you must provide the Installation
Information in the manner specified by section 6 of the GNU GPL
for conveying Corresponding Source.)
5. Combined Libraries.
You may place library facilities that are a work based on the
Library side by side in a single library together with other library
facilities that are not Applications and are not covered by this
License, and convey such a combined library under terms of your
choice, if you do both of the following:
a) Accompany the combined library with a copy of the same work based
on the Library, uncombined with any other library facilities,
conveyed under the terms of this License.
b) Give prominent notice with the combined library that part of it
is a work based on the Library, and explaining where to find the
accompanying uncombined form of the same work.
6. Revised Versions of the GNU Lesser General Public License.
The Free Software Foundation may publish revised and/or new versions
of the GNU Lesser General Public License from time to time. Such new
versions will be similar in spirit to the present version, but may
differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the
Library as you received it specifies that a certain numbered version
of the GNU Lesser General Public License "or any later version"
applies to it, you have the option of following the terms and
conditions either of that published version or of any later version
published by the Free Software Foundation. If the Library as you
received it does not specify a version number of the GNU Lesser
General Public License, you may choose any version of the GNU Lesser
General Public License ever published by the Free Software Foundation.
If the Library as you received it specifies that a proxy can decide
whether future versions of the GNU Lesser General Public License shall
apply, that proxy's public statement of acceptance of any version is
permanent authorization for you to choose that version for the
Library.

91
lib/tdb2/_info Normal file
View File

@ -0,0 +1,91 @@
#include <string.h>
#include <stdio.h>
/**
* tdb2 - [[WORK IN PROGRESS!]] The trivial (64bit transactional) database
*
* The tdb2 module provides an efficient keyword data mapping (usually
* within a file). It supports transactions, so the contents of the
* database is reliable even across crashes.
*
* Example:
* #include <ccan/tdb2/tdb2.h>
* #include <ccan/str/str.h>
* #include <err.h>
* #include <stdio.h>
*
* static void usage(const char *argv0)
* {
* errx(1, "Usage: %s fetch <dbfile> <key>\n"
* "OR %s store <dbfile> <key> <data>", argv0, argv0);
* }
*
* int main(int argc, char *argv[])
* {
* struct tdb_context *tdb;
* TDB_DATA key, value;
* enum TDB_ERROR error;
*
* if (argc < 4)
* usage(argv[0]);
*
* tdb = tdb_open(argv[2], TDB_DEFAULT, O_CREAT|O_RDWR,0600, NULL);
* if (!tdb)
* err(1, "Opening %s", argv[2]);
*
* key.dptr = (void *)argv[3];
* key.dsize = strlen(argv[3]);
*
* if (streq(argv[1], "fetch")) {
* if (argc != 4)
* usage(argv[0]);
* error = tdb_fetch(tdb, key, &value);
* if (error)
* errx(1, "fetch %s: %s",
* argv[3], tdb_errorstr(error));
* printf("%.*s\n", value.dsize, (char *)value.dptr);
* free(value.dptr);
* } else if (streq(argv[1], "store")) {
* if (argc != 5)
* usage(argv[0]);
* value.dptr = (void *)argv[4];
* value.dsize = strlen(argv[4]);
* error = tdb_store(tdb, key, value, 0);
* if (error)
* errx(1, "store %s: %s",
* argv[3], tdb_errorstr(error));
* } else
* usage(argv[0]);
*
* return 0;
* }
*
* Maintainer: Rusty Russell <rusty@rustcorp.com.au>
*
* Author: Rusty Russell
*
* License: LGPLv3 (or later)
*/
int main(int argc, char *argv[])
{
if (argc != 2)
return 1;
if (strcmp(argv[1], "depends") == 0) {
printf("ccan/asprintf\n");
printf("ccan/hash\n");
printf("ccan/likely\n");
printf("ccan/asearch\n");
printf("ccan/compiler\n");
printf("ccan/build_assert\n");
printf("ccan/ilog\n");
printf("ccan/failtest\n");
printf("ccan/tally\n");
printf("ccan/typesafe_cb\n");
printf("ccan/cast\n");
printf("ccan/endian\n");
return 0;
}
return 1;
}

835
lib/tdb2/check.c Normal file
View File

@ -0,0 +1,835 @@
/*
Trivial Database 2: free list/block handling
Copyright (C) Rusty Russell 2010
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "private.h"
#include <ccan/likely/likely.h>
#include <ccan/asearch/asearch.h>
/* We keep an ordered array of offsets. */
static bool append(tdb_off_t **arr, size_t *num, tdb_off_t off)
{
tdb_off_t *new = realloc(*arr, (*num + 1) * sizeof(tdb_off_t));
if (!new)
return false;
new[(*num)++] = off;
*arr = new;
return true;
}
static enum TDB_ERROR check_header(struct tdb_context *tdb, tdb_off_t *recovery,
uint64_t *features)
{
uint64_t hash_test;
struct tdb_header hdr;
enum TDB_ERROR ecode;
ecode = tdb_read_convert(tdb, 0, &hdr, sizeof(hdr));
if (ecode != TDB_SUCCESS) {
return ecode;
}
/* magic food should not be converted, so convert back. */
tdb_convert(tdb, hdr.magic_food, sizeof(hdr.magic_food));
hash_test = TDB_HASH_MAGIC;
hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test));
if (hdr.hash_test != hash_test) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"check: hash test %llu should be %llu",
(long long)hdr.hash_test,
(long long)hash_test);
}
if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"check: bad magic '%.*s'",
(unsigned)sizeof(hdr.magic_food),
hdr.magic_food);
}
/* Features which are used must be a subset of features offered. */
if (hdr.features_used & ~hdr.features_offered) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"check: features used (0x%llx) which"
" are not offered (0x%llx)",
(long long)hdr.features_used,
(long long)hdr.features_offered);
}
*features = hdr.features_offered;
*recovery = hdr.recovery;
if (*recovery) {
if (*recovery < sizeof(hdr)
|| *recovery > tdb->file->map_size) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check:"
" invalid recovery offset %zu",
(size_t)*recovery);
}
}
/* Don't check reserved: they *can* be used later. */
return TDB_SUCCESS;
}
static enum TDB_ERROR check_hash_tree(struct tdb_context *tdb,
tdb_off_t off, unsigned int group_bits,
uint64_t hprefix,
unsigned hprefix_bits,
tdb_off_t used[],
size_t num_used,
size_t *num_found,
enum TDB_ERROR (*check)(TDB_DATA,
TDB_DATA, void *),
void *data);
static enum TDB_ERROR check_hash_chain(struct tdb_context *tdb,
tdb_off_t off,
uint64_t hash,
tdb_off_t used[],
size_t num_used,
size_t *num_found,
enum TDB_ERROR (*check)(TDB_DATA,
TDB_DATA,
void *),
void *data)
{
struct tdb_used_record rec;
enum TDB_ERROR ecode;
ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
if (ecode != TDB_SUCCESS) {
return ecode;
}
if (rec_magic(&rec) != TDB_CHAIN_MAGIC) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check: Bad hash chain magic %llu",
(long long)rec_magic(&rec));
}
if (rec_data_length(&rec) != sizeof(struct tdb_chain)) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check:"
" Bad hash chain length %llu vs %zu",
(long long)rec_data_length(&rec),
sizeof(struct tdb_chain));
}
if (rec_key_length(&rec) != 0) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check: Bad hash chain key length %llu",
(long long)rec_key_length(&rec));
}
if (rec_hash(&rec) != 0) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check: Bad hash chain hash value %llu",
(long long)rec_hash(&rec));
}
off += sizeof(rec);
ecode = check_hash_tree(tdb, off, 0, hash, 64,
used, num_used, num_found, check, data);
if (ecode != TDB_SUCCESS) {
return ecode;
}
off = tdb_read_off(tdb, off + offsetof(struct tdb_chain, next));
if (TDB_OFF_IS_ERR(off)) {
return off;
}
if (off == 0)
return TDB_SUCCESS;
(*num_found)++;
return check_hash_chain(tdb, off, hash, used, num_used, num_found,
check, data);
}
static enum TDB_ERROR check_hash_record(struct tdb_context *tdb,
tdb_off_t off,
uint64_t hprefix,
unsigned hprefix_bits,
tdb_off_t used[],
size_t num_used,
size_t *num_found,
enum TDB_ERROR (*check)(TDB_DATA,
TDB_DATA,
void *),
void *data)
{
struct tdb_used_record rec;
enum TDB_ERROR ecode;
if (hprefix_bits >= 64)
return check_hash_chain(tdb, off, hprefix, used, num_used,
num_found, check, data);
ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
if (ecode != TDB_SUCCESS) {
return ecode;
}
if (rec_magic(&rec) != TDB_HTABLE_MAGIC) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check: Bad hash table magic %llu",
(long long)rec_magic(&rec));
}
if (rec_data_length(&rec)
!= sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check:"
" Bad hash table length %llu vs %llu",
(long long)rec_data_length(&rec),
(long long)sizeof(tdb_off_t)
<< TDB_SUBLEVEL_HASH_BITS);
}
if (rec_key_length(&rec) != 0) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check: Bad hash table key length %llu",
(long long)rec_key_length(&rec));
}
if (rec_hash(&rec) != 0) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check: Bad hash table hash value %llu",
(long long)rec_hash(&rec));
}
off += sizeof(rec);
return check_hash_tree(tdb, off,
TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
hprefix, hprefix_bits,
used, num_used, num_found, check, data);
}
static int off_cmp(const tdb_off_t *a, const tdb_off_t *b)
{
/* Can overflow an int. */
return *a > *b ? 1
: *a < *b ? -1
: 0;
}
static uint64_t get_bits(uint64_t h, unsigned num, unsigned *used)
{
*used += num;
return (h >> (64 - *used)) & ((1U << num) - 1);
}
static enum TDB_ERROR check_hash_tree(struct tdb_context *tdb,
tdb_off_t off, unsigned int group_bits,
uint64_t hprefix,
unsigned hprefix_bits,
tdb_off_t used[],
size_t num_used,
size_t *num_found,
enum TDB_ERROR (*check)(TDB_DATA,
TDB_DATA, void *),
void *data)
{
unsigned int g, b;
const tdb_off_t *hash;
struct tdb_used_record rec;
enum TDB_ERROR ecode;
hash = tdb_access_read(tdb, off,
sizeof(tdb_off_t)
<< (group_bits + TDB_HASH_GROUP_BITS),
true);
if (TDB_PTR_IS_ERR(hash)) {
return TDB_PTR_ERR(hash);
}
for (g = 0; g < (1 << group_bits); g++) {
const tdb_off_t *group = hash + (g << TDB_HASH_GROUP_BITS);
for (b = 0; b < (1 << TDB_HASH_GROUP_BITS); b++) {
unsigned int bucket, i, used_bits;
uint64_t h;
tdb_off_t *p;
if (group[b] == 0)
continue;
off = group[b] & TDB_OFF_MASK;
p = asearch(&off, used, num_used, off_cmp);
if (!p) {
ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_LOG_ERROR,
"tdb_check: Invalid offset"
" %llu in hash",
(long long)off);
goto fail;
}
/* Mark it invalid. */
*p ^= 1;
(*num_found)++;
if (hprefix_bits == 64) {
/* Chained entries are unordered. */
if (is_subhash(group[b])) {
ecode = TDB_ERR_CORRUPT;
tdb_logerr(tdb, ecode,
TDB_LOG_ERROR,
"tdb_check: Invalid chain"
" entry subhash");
goto fail;
}
h = hash_record(tdb, off);
if (h != hprefix) {
ecode = TDB_ERR_CORRUPT;
tdb_logerr(tdb, ecode,
TDB_LOG_ERROR,
"check: bad hash chain"
" placement"
" 0x%llx vs 0x%llx",
(long long)h,
(long long)hprefix);
goto fail;
}
ecode = tdb_read_convert(tdb, off, &rec,
sizeof(rec));
if (ecode != TDB_SUCCESS) {
goto fail;
}
goto check;
}
if (is_subhash(group[b])) {
uint64_t subprefix;
subprefix = (hprefix
<< (group_bits + TDB_HASH_GROUP_BITS))
+ g * (1 << TDB_HASH_GROUP_BITS) + b;
ecode = check_hash_record(tdb,
group[b] & TDB_OFF_MASK,
subprefix,
hprefix_bits
+ group_bits
+ TDB_HASH_GROUP_BITS,
used, num_used, num_found,
check, data);
if (ecode != TDB_SUCCESS) {
goto fail;
}
continue;
}
/* A normal entry */
/* Does it belong here at all? */
h = hash_record(tdb, off);
used_bits = 0;
if (get_bits(h, hprefix_bits, &used_bits) != hprefix
&& hprefix_bits) {
ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_LOG_ERROR,
"check: bad hash placement"
" 0x%llx vs 0x%llx",
(long long)h,
(long long)hprefix);
goto fail;
}
/* Does it belong in this group? */
if (get_bits(h, group_bits, &used_bits) != g) {
ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_LOG_ERROR,
"check: bad group %llu"
" vs %u",
(long long)h, g);
goto fail;
}
/* Are bucket bits correct? */
bucket = group[b] & TDB_OFF_HASH_GROUP_MASK;
if (get_bits(h, TDB_HASH_GROUP_BITS, &used_bits)
!= bucket) {
used_bits -= TDB_HASH_GROUP_BITS;
ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_LOG_ERROR,
"check: bad bucket %u vs %u",
(unsigned)get_bits(h,
TDB_HASH_GROUP_BITS,
&used_bits),
bucket);
goto fail;
}
/* There must not be any zero entries between
* the bucket it belongs in and this one! */
for (i = bucket;
i != b;
i = (i + 1) % (1 << TDB_HASH_GROUP_BITS)) {
if (group[i] == 0) {
ecode = TDB_ERR_CORRUPT;
tdb_logerr(tdb, ecode,
TDB_LOG_ERROR,
"check: bad group placement"
" %u vs %u",
b, bucket);
goto fail;
}
}
ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
if (ecode != TDB_SUCCESS) {
goto fail;
}
/* Bottom bits must match header. */
if ((h & ((1 << 11)-1)) != rec_hash(&rec)) {
ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_LOG_ERROR,
"tdb_check: Bad hash magic"
" at offset %llu"
" (0x%llx vs 0x%llx)",
(long long)off,
(long long)h,
(long long)rec_hash(&rec));
goto fail;
}
check:
if (check) {
TDB_DATA k, d;
const unsigned char *kptr;
kptr = tdb_access_read(tdb,
off + sizeof(rec),
rec_key_length(&rec)
+ rec_data_length(&rec),
false);
if (TDB_PTR_IS_ERR(kptr)) {
ecode = TDB_PTR_ERR(kptr);
goto fail;
}
k = tdb_mkdata(kptr, rec_key_length(&rec));
d = tdb_mkdata(kptr + k.dsize,
rec_data_length(&rec));
ecode = check(k, d, data);
tdb_access_release(tdb, kptr);
if (ecode != TDB_SUCCESS) {
goto fail;
}
}
}
}
tdb_access_release(tdb, hash);
return TDB_SUCCESS;
fail:
tdb_access_release(tdb, hash);
return ecode;
}
static enum TDB_ERROR check_hash(struct tdb_context *tdb,
tdb_off_t used[],
size_t num_used, size_t num_ftables,
int (*check)(TDB_DATA, TDB_DATA, void *),
void *data)
{
/* Free tables also show up as used. */
size_t num_found = num_ftables;
enum TDB_ERROR ecode;
ecode = check_hash_tree(tdb, offsetof(struct tdb_header, hashtable),
TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
0, 0, used, num_used, &num_found,
check, data);
if (ecode == TDB_SUCCESS) {
if (num_found != num_used) {
ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check: Not all entries"
" are in hash");
}
}
return ecode;
}
static enum TDB_ERROR check_free(struct tdb_context *tdb,
tdb_off_t off,
const struct tdb_free_record *frec,
tdb_off_t prev, unsigned int ftable,
unsigned int bucket)
{
enum TDB_ERROR ecode;
if (frec_magic(frec) != TDB_FREE_MAGIC) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check: offset %llu bad magic 0x%llx",
(long long)off,
(long long)frec->magic_and_prev);
}
if (frec_ftable(frec) != ftable) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check: offset %llu bad freetable %u",
(long long)off, frec_ftable(frec));
}
ecode = tdb->methods->oob(tdb, off
+ frec_len(frec)
+ sizeof(struct tdb_used_record),
false);
if (ecode != TDB_SUCCESS) {
return ecode;
}
if (size_to_bucket(frec_len(frec)) != bucket) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check: offset %llu in wrong bucket"
" (%u vs %u)",
(long long)off,
bucket, size_to_bucket(frec_len(frec)));
}
if (prev && prev != frec_prev(frec)) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check: offset %llu bad prev"
" (%llu vs %llu)",
(long long)off,
(long long)prev, (long long)frec_len(frec));
}
return TDB_SUCCESS;
}
static enum TDB_ERROR check_free_table(struct tdb_context *tdb,
tdb_off_t ftable_off,
unsigned ftable_num,
tdb_off_t fr[],
size_t num_free,
size_t *num_found)
{
struct tdb_freetable ft;
tdb_off_t h;
unsigned int i;
enum TDB_ERROR ecode;
ecode = tdb_read_convert(tdb, ftable_off, &ft, sizeof(ft));
if (ecode != TDB_SUCCESS) {
return ecode;
}
if (rec_magic(&ft.hdr) != TDB_FTABLE_MAGIC
|| rec_key_length(&ft.hdr) != 0
|| rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr)
|| rec_hash(&ft.hdr) != 0) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check: Invalid header on free table");
}
for (i = 0; i < TDB_FREE_BUCKETS; i++) {
tdb_off_t off, prev = 0, *p, first = 0;
struct tdb_free_record f;
h = bucket_off(ftable_off, i);
for (off = tdb_read_off(tdb, h); off; off = f.next) {
if (TDB_OFF_IS_ERR(off)) {
return off;
}
if (!first) {
off &= TDB_OFF_MASK;
first = off;
}
ecode = tdb_read_convert(tdb, off, &f, sizeof(f));
if (ecode != TDB_SUCCESS) {
return ecode;
}
ecode = check_free(tdb, off, &f, prev, ftable_num, i);
if (ecode != TDB_SUCCESS) {
return ecode;
}
/* FIXME: Check hash bits */
p = asearch(&off, fr, num_free, off_cmp);
if (!p) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_LOG_ERROR,
"tdb_check: Invalid offset"
" %llu in free table",
(long long)off);
}
/* Mark it invalid. */
*p ^= 1;
(*num_found)++;
prev = off;
}
if (first) {
/* Now we can check first back pointer. */
ecode = tdb_read_convert(tdb, first, &f, sizeof(f));
if (ecode != TDB_SUCCESS) {
return ecode;
}
ecode = check_free(tdb, first, &f, prev, ftable_num, i);
if (ecode != TDB_SUCCESS) {
return ecode;
}
}
}
return TDB_SUCCESS;
}
/* Slow, but should be very rare. */
tdb_off_t dead_space(struct tdb_context *tdb, tdb_off_t off)
{
size_t len;
enum TDB_ERROR ecode;
for (len = 0; off + len < tdb->file->map_size; len++) {
char c;
ecode = tdb->methods->tread(tdb, off, &c, 1);
if (ecode != TDB_SUCCESS) {
return ecode;
}
if (c != 0 && c != 0x43)
break;
}
return len;
}
static enum TDB_ERROR check_linear(struct tdb_context *tdb,
tdb_off_t **used, size_t *num_used,
tdb_off_t **fr, size_t *num_free,
uint64_t features, tdb_off_t recovery)
{
tdb_off_t off;
tdb_len_t len;
enum TDB_ERROR ecode;
bool found_recovery = false;
for (off = sizeof(struct tdb_header);
off < tdb->file->map_size;
off += len) {
union {
struct tdb_used_record u;
struct tdb_free_record f;
struct tdb_recovery_record r;
} rec;
/* r is larger: only get that if we need to. */
ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.f));
if (ecode != TDB_SUCCESS) {
return ecode;
}
/* If we crash after ftruncate, we can get zeroes or fill. */
if (rec.r.magic == TDB_RECOVERY_INVALID_MAGIC
|| rec.r.magic == 0x4343434343434343ULL) {
ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.r));
if (ecode != TDB_SUCCESS) {
return ecode;
}
if (recovery == off) {
found_recovery = true;
len = sizeof(rec.r) + rec.r.max_len;
} else {
len = dead_space(tdb, off);
if (TDB_OFF_IS_ERR(len)) {
return len;
}
if (len < sizeof(rec.r)) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_LOG_ERROR,
"tdb_check: invalid"
" dead space at %zu",
(size_t)off);
}
tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
"Dead space at %zu-%zu (of %zu)",
(size_t)off, (size_t)(off + len),
(size_t)tdb->file->map_size);
}
} else if (rec.r.magic == TDB_RECOVERY_MAGIC) {
ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.r));
if (ecode != TDB_SUCCESS) {
return ecode;
}
if (recovery != off) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_LOG_ERROR,
"tdb_check: unexpected"
" recovery record at offset"
" %zu",
(size_t)off);
}
if (rec.r.len > rec.r.max_len) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_LOG_ERROR,
"tdb_check: invalid recovery"
" length %zu",
(size_t)rec.r.len);
}
if (rec.r.eof > tdb->file->map_size) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_LOG_ERROR,
"tdb_check: invalid old EOF"
" %zu", (size_t)rec.r.eof);
}
found_recovery = true;
len = sizeof(rec.r) + rec.r.max_len;
} else if (frec_magic(&rec.f) == TDB_FREE_MAGIC) {
len = sizeof(rec.u) + frec_len(&rec.f);
if (off + len > tdb->file->map_size) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_LOG_ERROR,
"tdb_check: free overlength"
" %llu at offset %llu",
(long long)len,
(long long)off);
}
/* This record should be in free lists. */
if (frec_ftable(&rec.f) != TDB_FTABLE_NONE
&& !append(fr, num_free, off)) {
return tdb_logerr(tdb, TDB_ERR_OOM,
TDB_LOG_ERROR,
"tdb_check: tracking %zu'th"
" free record.", *num_free);
}
} else if (rec_magic(&rec.u) == TDB_USED_MAGIC
|| rec_magic(&rec.u) == TDB_CHAIN_MAGIC
|| rec_magic(&rec.u) == TDB_HTABLE_MAGIC
|| rec_magic(&rec.u) == TDB_FTABLE_MAGIC) {
uint64_t klen, dlen, extra;
/* This record is used! */
if (!append(used, num_used, off)) {
return tdb_logerr(tdb, TDB_ERR_OOM,
TDB_LOG_ERROR,
"tdb_check: tracking %zu'th"
" used record.", *num_used);
}
klen = rec_key_length(&rec.u);
dlen = rec_data_length(&rec.u);
extra = rec_extra_padding(&rec.u);
len = sizeof(rec.u) + klen + dlen + extra;
if (off + len > tdb->file->map_size) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_LOG_ERROR,
"tdb_check: used overlength"
" %llu at offset %llu",
(long long)len,
(long long)off);
}
if (len < sizeof(rec.f)) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_LOG_ERROR,
"tdb_check: too short record"
" %llu at %llu",
(long long)len,
(long long)off);
}
/* Check that records have correct 0 at end (but may
* not in future). */
if (extra && !features) {
const char *p;
char c;
p = tdb_access_read(tdb, off + sizeof(rec.u)
+ klen + dlen, 1, false);
if (TDB_PTR_IS_ERR(p))
return TDB_PTR_ERR(p);
c = *p;
tdb_access_release(tdb, p);
if (c != '\0') {
return tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_LOG_ERROR,
"tdb_check:"
" non-zero extra"
" at %llu",
(long long)off);
}
}
} else {
return tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_LOG_ERROR,
"tdb_check: Bad magic 0x%llx"
" at offset %zu",
(long long)rec_magic(&rec.u),
(size_t)off);
}
}
/* We must have found recovery area if there was one. */
if (recovery != 0 && !found_recovery) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check: expected a recovery area at %zu",
(size_t)recovery);
}
return TDB_SUCCESS;
}
enum TDB_ERROR tdb_check_(struct tdb_context *tdb,
enum TDB_ERROR (*check)(TDB_DATA, TDB_DATA, void *),
void *data)
{
tdb_off_t *fr = NULL, *used = NULL, ft, recovery;
size_t num_free = 0, num_used = 0, num_found = 0, num_ftables = 0;
uint64_t features;
enum TDB_ERROR ecode;
ecode = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
if (ecode != TDB_SUCCESS) {
return tdb->last_error = ecode;
}
ecode = tdb_lock_expand(tdb, F_RDLCK);
if (ecode != TDB_SUCCESS) {
tdb_allrecord_unlock(tdb, F_RDLCK);
return tdb->last_error = ecode;
}
ecode = check_header(tdb, &recovery, &features);
if (ecode != TDB_SUCCESS)
goto out;
/* First we do a linear scan, checking all records. */
ecode = check_linear(tdb, &used, &num_used, &fr, &num_free, features,
recovery);
if (ecode != TDB_SUCCESS)
goto out;
for (ft = first_ftable(tdb); ft; ft = next_ftable(tdb, ft)) {
if (TDB_OFF_IS_ERR(ft)) {
ecode = ft;
goto out;
}
ecode = check_free_table(tdb, ft, num_ftables, fr, num_free,
&num_found);
if (ecode != TDB_SUCCESS)
goto out;
num_ftables++;
}
/* FIXME: Check key uniqueness? */
ecode = check_hash(tdb, used, num_used, num_ftables, check, data);
if (ecode != TDB_SUCCESS)
goto out;
if (num_found != num_free) {
ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"tdb_check: Not all entries are in"
" free table");
}
out:
tdb_allrecord_unlock(tdb, F_RDLCK);
tdb_unlock_expand(tdb, F_RDLCK);
free(fr);
free(used);
return tdb->last_error = ecode;
}

View File

@ -0,0 +1,44 @@
Interface differences between TDB1 and TDB2.
- tdb2 uses 'struct tdb_data', tdb1 uses 'struct TDB_DATA'. Use the
TDB_DATA typedef if you want portability between the two.
- tdb2 functions return 0 on success, and a negative error on failure,
whereas tdb1 functions returned 0 on success, and -1 on failure.
tdb1 then used tdb_error() to determine the error; this is also
supported in tdb2 to ease backwards compatibility, though the other
form is preferred.
- tdb2's tdb_fetch() returns an error, tdb1's returned the data directly
(or tdb_null, and you were supposed to check tdb_error() to find out why).
- tdb2's tdb_nextkey() frees the old key's dptr, in tdb2 you needed to do
this manually.
- tdb1's tdb_open/tdb_open_ex took an explicit hash size. tdb2's hash table
resizes as required.
- tdb2 uses a linked list of attribute structures to implement logging and
alternate hashes. tdb1 used tdb_open_ex, which was not extensible.
- tdb2 does locking on read-only databases (ie. O_RDONLY passed to tdb_open).
tdb1 did not: use the TDB_NOLOCK flag if you want to suppress locking.
- tdb2's log function is simpler than tdb1's log function. The string is
already formatted, and it takes an enum tdb_log_level not a tdb_debug_level,
and which has only three values: TDB_LOG_ERROR, TDB_LOG_USE_ERROR and
TDB_LOG_WARNING.
- tdb2 provides tdb_deq() for comparing two struct tdb_data.
- tdb2's tdb_name() returns a copy of the name even for TDB_INTERNAL dbs.
- tdb2 does not need tdb_reopen() or tdb_reopen_all(). If you call
fork() after during certain operations the child should close the
tdb, or complete the operations before continuing to use the tdb:
tdb_transaction_start(): child must tdb_transaction_cancel()
tdb_lockall(): child must call tdb_unlockall()
tdb_lockall_read(): child must call tdb_unlockall_read()
tdb_chainlock(): child must call tdb_chainunlock()
tdb_parse() callback: child must return from tdb_parse()

1049
lib/tdb2/doc/design-1.3.txt Normal file

File diff suppressed because it is too large Load Diff

2689
lib/tdb2/doc/design.lyx Normal file

File diff suppressed because it is too large Load Diff

4679
lib/tdb2/doc/design.lyx,v Normal file

File diff suppressed because it is too large Load Diff

BIN
lib/tdb2/doc/design.pdf Normal file

Binary file not shown.

1258
lib/tdb2/doc/design.txt Normal file

File diff suppressed because it is too large Load Diff

968
lib/tdb2/free.c Normal file
View File

@ -0,0 +1,968 @@
/*
Trivial Database 2: free list/block handling
Copyright (C) Rusty Russell 2010
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "private.h"
#include <ccan/likely/likely.h>
#include <ccan/ilog/ilog.h>
#include <time.h>
#include <assert.h>
#include <limits.h>
static unsigned fls64(uint64_t val)
{
return ilog64(val);
}
/* In which bucket would we find a particular record size? (ignoring header) */
unsigned int size_to_bucket(tdb_len_t data_len)
{
unsigned int bucket;
/* We can't have records smaller than this. */
assert(data_len >= TDB_MIN_DATA_LEN);
/* Ignoring the header... */
if (data_len - TDB_MIN_DATA_LEN <= 64) {
/* 0 in bucket 0, 8 in bucket 1... 64 in bucket 8. */
bucket = (data_len - TDB_MIN_DATA_LEN) / 8;
} else {
/* After that we go power of 2. */
bucket = fls64(data_len - TDB_MIN_DATA_LEN) + 2;
}
if (unlikely(bucket >= TDB_FREE_BUCKETS))
bucket = TDB_FREE_BUCKETS - 1;
return bucket;
}
tdb_off_t first_ftable(struct tdb_context *tdb)
{
return tdb_read_off(tdb, offsetof(struct tdb_header, free_table));
}
tdb_off_t next_ftable(struct tdb_context *tdb, tdb_off_t ftable)
{
return tdb_read_off(tdb, ftable + offsetof(struct tdb_freetable,next));
}
enum TDB_ERROR tdb_ftable_init(struct tdb_context *tdb)
{
/* Use reservoir sampling algorithm to select a free list at random. */
unsigned int rnd, max = 0, count = 0;
tdb_off_t off;
tdb->ftable_off = off = first_ftable(tdb);
tdb->ftable = 0;
while (off) {
if (TDB_OFF_IS_ERR(off)) {
return off;
}
rnd = random();
if (rnd >= max) {
tdb->ftable_off = off;
tdb->ftable = count;
max = rnd;
}
off = next_ftable(tdb, off);
count++;
}
return TDB_SUCCESS;
}
/* Offset of a given bucket. */
tdb_off_t bucket_off(tdb_off_t ftable_off, unsigned bucket)
{
return ftable_off + offsetof(struct tdb_freetable, buckets)
+ bucket * sizeof(tdb_off_t);
}
/* Returns free_buckets + 1, or list number to search, or -ve error. */
static tdb_off_t find_free_head(struct tdb_context *tdb,
tdb_off_t ftable_off,
tdb_off_t bucket)
{
/* Speculatively search for a non-zero bucket. */
return tdb_find_nonzero_off(tdb, bucket_off(ftable_off, 0),
bucket, TDB_FREE_BUCKETS);
}
static void check_list(struct tdb_context *tdb, tdb_off_t b_off)
{
#ifdef CCAN_TDB2_DEBUG
tdb_off_t off, prev = 0, first;
struct tdb_free_record r;
first = off = (tdb_read_off(tdb, b_off) & TDB_OFF_MASK);
while (off != 0) {
tdb_read_convert(tdb, off, &r, sizeof(r));
if (frec_magic(&r) != TDB_FREE_MAGIC)
abort();
if (prev && frec_prev(&r) != prev)
abort();
prev = off;
off = r.next;
}
if (first) {
tdb_read_convert(tdb, first, &r, sizeof(r));
if (frec_prev(&r) != prev)
abort();
}
#endif
}
/* Remove from free bucket. */
static enum TDB_ERROR remove_from_list(struct tdb_context *tdb,
tdb_off_t b_off, tdb_off_t r_off,
const struct tdb_free_record *r)
{
tdb_off_t off, prev_next, head;
enum TDB_ERROR ecode;
/* Is this only element in list? Zero out bucket, and we're done. */
if (frec_prev(r) == r_off)
return tdb_write_off(tdb, b_off, 0);
/* off = &r->prev->next */
off = frec_prev(r) + offsetof(struct tdb_free_record, next);
/* Get prev->next */
prev_next = tdb_read_off(tdb, off);
if (TDB_OFF_IS_ERR(prev_next))
return prev_next;
/* If prev->next == 0, we were head: update bucket to point to next. */
if (prev_next == 0) {
/* We must preserve upper bits. */
head = tdb_read_off(tdb, b_off);
if (TDB_OFF_IS_ERR(head))
return head;
if ((head & TDB_OFF_MASK) != r_off) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"remove_from_list:"
" %llu head %llu on list %llu",
(long long)r_off,
(long long)head,
(long long)b_off);
}
head = ((head & ~TDB_OFF_MASK) | r->next);
ecode = tdb_write_off(tdb, b_off, head);
if (ecode != TDB_SUCCESS)
return ecode;
} else {
/* r->prev->next = r->next */
ecode = tdb_write_off(tdb, off, r->next);
if (ecode != TDB_SUCCESS)
return ecode;
}
/* If we were the tail, off = &head->prev. */
if (r->next == 0) {
head = tdb_read_off(tdb, b_off);
if (TDB_OFF_IS_ERR(head))
return head;
head &= TDB_OFF_MASK;
off = head + offsetof(struct tdb_free_record, magic_and_prev);
} else {
/* off = &r->next->prev */
off = r->next + offsetof(struct tdb_free_record,
magic_and_prev);
}
#ifdef CCAN_TDB2_DEBUG
/* *off == r */
if ((tdb_read_off(tdb, off) & TDB_OFF_MASK) != r_off) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"remove_from_list:"
" %llu bad prev in list %llu",
(long long)r_off, (long long)b_off);
}
#endif
/* r->next->prev = r->prev */
return tdb_write_off(tdb, off, r->magic_and_prev);
}
/* Enqueue in this free bucket: sets coalesce if we've added 128
* entries to it. */
static enum TDB_ERROR enqueue_in_free(struct tdb_context *tdb,
tdb_off_t b_off,
tdb_off_t off,
tdb_len_t len,
bool *coalesce)
{
struct tdb_free_record new;
enum TDB_ERROR ecode;
tdb_off_t prev, head;
uint64_t magic = (TDB_FREE_MAGIC << (64 - TDB_OFF_UPPER_STEAL));
head = tdb_read_off(tdb, b_off);
if (TDB_OFF_IS_ERR(head))
return head;
/* We only need to set ftable_and_len; rest is set in enqueue_in_free */
new.ftable_and_len = ((uint64_t)tdb->ftable << (64 - TDB_OFF_UPPER_STEAL))
| len;
/* new->next = head. */
new.next = (head & TDB_OFF_MASK);
/* First element? Prev points to ourselves. */
if (!new.next) {
new.magic_and_prev = (magic | off);
} else {
/* new->prev = next->prev */
prev = tdb_read_off(tdb,
new.next + offsetof(struct tdb_free_record,
magic_and_prev));
new.magic_and_prev = prev;
if (frec_magic(&new) != TDB_FREE_MAGIC) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"enqueue_in_free: %llu bad head"
" prev %llu",
(long long)new.next,
(long long)prev);
}
/* next->prev = new. */
ecode = tdb_write_off(tdb, new.next
+ offsetof(struct tdb_free_record,
magic_and_prev),
off | magic);
if (ecode != TDB_SUCCESS) {
return ecode;
}
#ifdef CCAN_TDB2_DEBUG
prev = tdb_read_off(tdb, frec_prev(&new)
+ offsetof(struct tdb_free_record, next));
if (prev != 0) {
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"enqueue_in_free:"
" %llu bad tail next ptr %llu",
(long long)frec_prev(&new)
+ offsetof(struct tdb_free_record,
next),
(long long)prev);
}
#endif
}
/* Update enqueue count, but don't set high bit: see TDB_OFF_IS_ERR */
if (*coalesce)
head += (1ULL << (64 - TDB_OFF_UPPER_STEAL));
head &= ~(TDB_OFF_MASK | (1ULL << 63));
head |= off;
ecode = tdb_write_off(tdb, b_off, head);
if (ecode != TDB_SUCCESS) {
return ecode;
}
/* It's time to coalesce if counter wrapped. */
if (*coalesce)
*coalesce = ((head & ~TDB_OFF_MASK) == 0);
return tdb_write_convert(tdb, off, &new, sizeof(new));
}
static tdb_off_t ftable_offset(struct tdb_context *tdb, unsigned int ftable)
{
tdb_off_t off;
unsigned int i;
if (likely(tdb->ftable == ftable))
return tdb->ftable_off;
off = first_ftable(tdb);
for (i = 0; i < ftable; i++) {
if (TDB_OFF_IS_ERR(off)) {
break;
}
off = next_ftable(tdb, off);
}
return off;
}
/* Note: we unlock the current bucket if fail (-ve), or coalesce (+ve) and
* need to blatt the *protect record (which is set to an error). */
static tdb_len_t coalesce(struct tdb_context *tdb,
tdb_off_t off, tdb_off_t b_off,
tdb_len_t data_len,
tdb_off_t *protect)
{
tdb_off_t end;
struct tdb_free_record rec;
enum TDB_ERROR ecode;
tdb->stats.alloc_coalesce_tried++;
end = off + sizeof(struct tdb_used_record) + data_len;
while (end < tdb->file->map_size) {
const struct tdb_free_record *r;
tdb_off_t nb_off;
unsigned ftable, bucket;
r = tdb_access_read(tdb, end, sizeof(*r), true);
if (TDB_PTR_IS_ERR(r)) {
ecode = TDB_PTR_ERR(r);
goto err;
}
if (frec_magic(r) != TDB_FREE_MAGIC
|| frec_ftable(r) == TDB_FTABLE_NONE) {
tdb_access_release(tdb, r);
break;
}
ftable = frec_ftable(r);
bucket = size_to_bucket(frec_len(r));
nb_off = ftable_offset(tdb, ftable);
if (TDB_OFF_IS_ERR(nb_off)) {
tdb_access_release(tdb, r);
ecode = nb_off;
goto err;
}
nb_off = bucket_off(nb_off, bucket);
tdb_access_release(tdb, r);
/* We may be violating lock order here, so best effort. */
if (tdb_lock_free_bucket(tdb, nb_off, TDB_LOCK_NOWAIT)
!= TDB_SUCCESS) {
tdb->stats.alloc_coalesce_lockfail++;
break;
}
/* Now we have lock, re-check. */
ecode = tdb_read_convert(tdb, end, &rec, sizeof(rec));
if (ecode != TDB_SUCCESS) {
tdb_unlock_free_bucket(tdb, nb_off);
goto err;
}
if (unlikely(frec_magic(&rec) != TDB_FREE_MAGIC)) {
tdb->stats.alloc_coalesce_race++;
tdb_unlock_free_bucket(tdb, nb_off);
break;
}
if (unlikely(frec_ftable(&rec) != ftable)
|| unlikely(size_to_bucket(frec_len(&rec)) != bucket)) {
tdb->stats.alloc_coalesce_race++;
tdb_unlock_free_bucket(tdb, nb_off);
break;
}
/* Did we just mess up a record you were hoping to use? */
if (end == *protect) {
tdb->stats.alloc_coalesce_iterate_clash++;
*protect = TDB_ERR_NOEXIST;
}
ecode = remove_from_list(tdb, nb_off, end, &rec);
check_list(tdb, nb_off);
if (ecode != TDB_SUCCESS) {
tdb_unlock_free_bucket(tdb, nb_off);
goto err;
}
end += sizeof(struct tdb_used_record) + frec_len(&rec);
tdb_unlock_free_bucket(tdb, nb_off);
tdb->stats.alloc_coalesce_num_merged++;
}
/* Didn't find any adjacent free? */
if (end == off + sizeof(struct tdb_used_record) + data_len)
return 0;
/* Before we expand, check this isn't one you wanted protected? */
if (off == *protect) {
*protect = TDB_ERR_EXISTS;
tdb->stats.alloc_coalesce_iterate_clash++;
}
/* OK, expand initial record */
ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
if (ecode != TDB_SUCCESS) {
goto err;
}
if (frec_len(&rec) != data_len) {
ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"coalesce: expected data len %zu not %zu",
(size_t)data_len, (size_t)frec_len(&rec));
goto err;
}
ecode = remove_from_list(tdb, b_off, off, &rec);
check_list(tdb, b_off);
if (ecode != TDB_SUCCESS) {
goto err;
}
/* Try locking violation first. We don't allow coalesce recursion! */
ecode = add_free_record(tdb, off, end - off, TDB_LOCK_NOWAIT, false);
if (ecode != TDB_SUCCESS) {
/* Need to drop lock. Can't rely on anything stable. */
tdb->stats.alloc_coalesce_lockfail++;
*protect = TDB_ERR_CORRUPT;
/* We have to drop this to avoid deadlocks, so make sure record
* doesn't get coalesced by someone else! */
rec.ftable_and_len = (TDB_FTABLE_NONE
<< (64 - TDB_OFF_UPPER_STEAL))
| (end - off - sizeof(struct tdb_used_record));
ecode = tdb_write_off(tdb,
off + offsetof(struct tdb_free_record,
ftable_and_len),
rec.ftable_and_len);
if (ecode != TDB_SUCCESS) {
goto err;
}
tdb_unlock_free_bucket(tdb, b_off);
ecode = add_free_record(tdb, off, end - off, TDB_LOCK_WAIT,
false);
if (ecode != TDB_SUCCESS) {
return ecode;
}
} else if (TDB_OFF_IS_ERR(*protect)) {
/* For simplicity, we always drop lock if they can't continue */
tdb_unlock_free_bucket(tdb, b_off);
}
tdb->stats.alloc_coalesce_succeeded++;
/* Return usable length. */
return end - off - sizeof(struct tdb_used_record);
err:
/* To unify error paths, we *always* unlock bucket on error. */
tdb_unlock_free_bucket(tdb, b_off);
return ecode;
}
/* List is locked: we unlock it. */
static enum TDB_ERROR coalesce_list(struct tdb_context *tdb,
tdb_off_t ftable_off,
tdb_off_t b_off,
unsigned int limit)
{
enum TDB_ERROR ecode;
tdb_off_t off;
off = tdb_read_off(tdb, b_off);
if (TDB_OFF_IS_ERR(off)) {
ecode = off;
goto unlock_err;
}
/* A little bit of paranoia: counter should be 0. */
off &= TDB_OFF_MASK;
while (off && limit--) {
struct tdb_free_record rec;
tdb_len_t coal;
tdb_off_t next;
ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
if (ecode != TDB_SUCCESS)
goto unlock_err;
next = rec.next;
coal = coalesce(tdb, off, b_off, frec_len(&rec), &next);
if (TDB_OFF_IS_ERR(coal)) {
/* This has already unlocked on error. */
return coal;
}
if (TDB_OFF_IS_ERR(next)) {
/* Coalescing had to unlock, so stop. */
return TDB_SUCCESS;
}
/* Keep going if we're doing well... */
limit += size_to_bucket(coal / 16 + TDB_MIN_DATA_LEN);
off = next;
}
/* Now, move those elements to the tail of the list so we get something
* else next time. */
if (off) {
struct tdb_free_record oldhrec, newhrec, oldtrec, newtrec;
tdb_off_t oldhoff, oldtoff, newtoff;
/* The record we were up to is the new head. */
ecode = tdb_read_convert(tdb, off, &newhrec, sizeof(newhrec));
if (ecode != TDB_SUCCESS)
goto unlock_err;
/* Get the new tail. */
newtoff = frec_prev(&newhrec);
ecode = tdb_read_convert(tdb, newtoff, &newtrec,
sizeof(newtrec));
if (ecode != TDB_SUCCESS)
goto unlock_err;
/* Get the old head. */
oldhoff = tdb_read_off(tdb, b_off);
if (TDB_OFF_IS_ERR(oldhoff)) {
ecode = oldhoff;
goto unlock_err;
}
/* This could happen if they all coalesced away. */
if (oldhoff == off)
goto out;
ecode = tdb_read_convert(tdb, oldhoff, &oldhrec,
sizeof(oldhrec));
if (ecode != TDB_SUCCESS)
goto unlock_err;
/* Get the old tail. */
oldtoff = frec_prev(&oldhrec);
ecode = tdb_read_convert(tdb, oldtoff, &oldtrec,
sizeof(oldtrec));
if (ecode != TDB_SUCCESS)
goto unlock_err;
/* Old tail's next points to old head. */
oldtrec.next = oldhoff;
/* Old head's prev points to old tail. */
oldhrec.magic_and_prev
= (TDB_FREE_MAGIC << (64 - TDB_OFF_UPPER_STEAL))
| oldtoff;
/* New tail's next is 0. */
newtrec.next = 0;
/* Write out the modified versions. */
ecode = tdb_write_convert(tdb, oldtoff, &oldtrec,
sizeof(oldtrec));
if (ecode != TDB_SUCCESS)
goto unlock_err;
ecode = tdb_write_convert(tdb, oldhoff, &oldhrec,
sizeof(oldhrec));
if (ecode != TDB_SUCCESS)
goto unlock_err;
ecode = tdb_write_convert(tdb, newtoff, &newtrec,
sizeof(newtrec));
if (ecode != TDB_SUCCESS)
goto unlock_err;
/* And finally link in new head. */
ecode = tdb_write_off(tdb, b_off, off);
if (ecode != TDB_SUCCESS)
goto unlock_err;
}
out:
tdb_unlock_free_bucket(tdb, b_off);
return TDB_SUCCESS;
unlock_err:
tdb_unlock_free_bucket(tdb, b_off);
return ecode;
}
/* List must not be locked if coalesce_ok is set. */
enum TDB_ERROR add_free_record(struct tdb_context *tdb,
tdb_off_t off, tdb_len_t len_with_header,
enum tdb_lock_flags waitflag,
bool coalesce)
{
tdb_off_t b_off;
tdb_len_t len;
enum TDB_ERROR ecode;
assert(len_with_header >= sizeof(struct tdb_free_record));
len = len_with_header - sizeof(struct tdb_used_record);
b_off = bucket_off(tdb->ftable_off, size_to_bucket(len));
ecode = tdb_lock_free_bucket(tdb, b_off, waitflag);
if (ecode != TDB_SUCCESS) {
return ecode;
}
ecode = enqueue_in_free(tdb, b_off, off, len, &coalesce);
check_list(tdb, b_off);
/* Coalescing unlocks free list. */
if (!ecode && coalesce)
ecode = coalesce_list(tdb, tdb->ftable_off, b_off, 2);
else
tdb_unlock_free_bucket(tdb, b_off);
return ecode;
}
static size_t adjust_size(size_t keylen, size_t datalen)
{
size_t size = keylen + datalen;
if (size < TDB_MIN_DATA_LEN)
size = TDB_MIN_DATA_LEN;
/* Round to next uint64_t boundary. */
return (size + (sizeof(uint64_t) - 1ULL)) & ~(sizeof(uint64_t) - 1ULL);
}
/* If we have enough left over to be useful, split that off. */
static size_t record_leftover(size_t keylen, size_t datalen,
bool want_extra, size_t total_len)
{
ssize_t leftover;
if (want_extra)
datalen += datalen / 2;
leftover = total_len - adjust_size(keylen, datalen);
if (leftover < (ssize_t)sizeof(struct tdb_free_record))
return 0;
return leftover;
}
/* We need size bytes to put our key and data in. */
static tdb_off_t lock_and_alloc(struct tdb_context *tdb,
tdb_off_t ftable_off,
tdb_off_t bucket,
size_t keylen, size_t datalen,
bool want_extra,
unsigned magic,
unsigned hashlow)
{
tdb_off_t off, b_off,best_off;
struct tdb_free_record best = { 0 };
double multiplier;
size_t size = adjust_size(keylen, datalen);
enum TDB_ERROR ecode;
tdb->stats.allocs++;
b_off = bucket_off(ftable_off, bucket);
/* FIXME: Try non-blocking wait first, to measure contention. */
/* Lock this bucket. */
ecode = tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT);
if (ecode != TDB_SUCCESS) {
return ecode;
}
best.ftable_and_len = -1ULL;
best_off = 0;
/* Get slack if we're after extra. */
if (want_extra)
multiplier = 1.5;
else
multiplier = 1.0;
/* Walk the list to see if any are large enough, getting less fussy
* as we go. */
off = tdb_read_off(tdb, b_off);
if (TDB_OFF_IS_ERR(off)) {
ecode = off;
goto unlock_err;
}
off &= TDB_OFF_MASK;
while (off) {
const struct tdb_free_record *r;
tdb_len_t len;
tdb_off_t next;
r = tdb_access_read(tdb, off, sizeof(*r), true);
if (TDB_PTR_IS_ERR(r)) {
ecode = TDB_PTR_ERR(r);
goto unlock_err;
}
if (frec_magic(r) != TDB_FREE_MAGIC) {
ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
"lock_and_alloc:"
" %llu non-free 0x%llx",
(long long)off,
(long long)r->magic_and_prev);
tdb_access_release(tdb, r);
goto unlock_err;
}
if (frec_len(r) >= size && frec_len(r) < frec_len(&best)) {
best_off = off;
best = *r;
}
if (frec_len(&best) <= size * multiplier && best_off) {
tdb_access_release(tdb, r);
break;
}
multiplier *= 1.01;
next = r->next;
len = frec_len(r);
tdb_access_release(tdb, r);
off = next;
}
/* If we found anything at all, use it. */
if (best_off) {
struct tdb_used_record rec;
size_t leftover;
/* We're happy with this size: take it. */
ecode = remove_from_list(tdb, b_off, best_off, &best);
check_list(tdb, b_off);
if (ecode != TDB_SUCCESS) {
goto unlock_err;
}
leftover = record_leftover(keylen, datalen, want_extra,
frec_len(&best));
assert(keylen + datalen + leftover <= frec_len(&best));
/* We need to mark non-free before we drop lock, otherwise
* coalesce() could try to merge it! */
ecode = set_header(tdb, &rec, magic, keylen, datalen,
frec_len(&best) - leftover, hashlow);
if (ecode != TDB_SUCCESS) {
goto unlock_err;
}
ecode = tdb_write_convert(tdb, best_off, &rec, sizeof(rec));
if (ecode != TDB_SUCCESS) {
goto unlock_err;
}
/* For futureproofing, we put a 0 in any unused space. */
if (rec_extra_padding(&rec)) {
ecode = tdb->methods->twrite(tdb, best_off + sizeof(rec)
+ keylen + datalen, "", 1);
if (ecode != TDB_SUCCESS) {
goto unlock_err;
}
}
/* Bucket of leftover will be <= current bucket, so nested
* locking is allowed. */
if (leftover) {
tdb->stats.alloc_leftover++;
ecode = add_free_record(tdb,
best_off + sizeof(rec)
+ frec_len(&best) - leftover,
leftover, TDB_LOCK_WAIT, false);
if (ecode != TDB_SUCCESS) {
best_off = ecode;
}
}
tdb_unlock_free_bucket(tdb, b_off);
return best_off;
}
tdb_unlock_free_bucket(tdb, b_off);
return 0;
unlock_err:
tdb_unlock_free_bucket(tdb, b_off);
return ecode;
}
/* Get a free block from current free list, or 0 if none, -ve on error. */
static tdb_off_t get_free(struct tdb_context *tdb,
size_t keylen, size_t datalen, bool want_extra,
unsigned magic, unsigned hashlow)
{
tdb_off_t off, ftable_off;
tdb_off_t start_b, b, ftable;
bool wrapped = false;
/* If they are growing, add 50% to get to higher bucket. */
if (want_extra)
start_b = size_to_bucket(adjust_size(keylen,
datalen + datalen / 2));
else
start_b = size_to_bucket(adjust_size(keylen, datalen));
ftable_off = tdb->ftable_off;
ftable = tdb->ftable;
while (!wrapped || ftable_off != tdb->ftable_off) {
/* Start at exact size bucket, and search up... */
for (b = find_free_head(tdb, ftable_off, start_b);
b < TDB_FREE_BUCKETS;
b = find_free_head(tdb, ftable_off, b + 1)) {
/* Try getting one from list. */
off = lock_and_alloc(tdb, ftable_off,
b, keylen, datalen, want_extra,
magic, hashlow);
if (TDB_OFF_IS_ERR(off))
return off;
if (off != 0) {
if (b == start_b)
tdb->stats.alloc_bucket_exact++;
if (b == TDB_FREE_BUCKETS - 1)
tdb->stats.alloc_bucket_max++;
/* Worked? Stay using this list. */
tdb->ftable_off = ftable_off;
tdb->ftable = ftable;
return off;
}
/* Didn't work. Try next bucket. */
}
if (TDB_OFF_IS_ERR(b)) {
return b;
}
/* Hmm, try next table. */
ftable_off = next_ftable(tdb, ftable_off);
if (TDB_OFF_IS_ERR(ftable_off)) {
return ftable_off;
}
ftable++;
if (ftable_off == 0) {
wrapped = true;
ftable_off = first_ftable(tdb);
if (TDB_OFF_IS_ERR(ftable_off)) {
return ftable_off;
}
ftable = 0;
}
}
return 0;
}
enum TDB_ERROR set_header(struct tdb_context *tdb,
struct tdb_used_record *rec,
unsigned magic, uint64_t keylen, uint64_t datalen,
uint64_t actuallen, unsigned hashlow)
{
uint64_t keybits = (fls64(keylen) + 1) / 2;
/* Use bottom bits of hash, so it's independent of hash table size. */
rec->magic_and_meta = (hashlow & ((1 << 11)-1))
| ((actuallen - (keylen + datalen)) << 11)
| (keybits << 43)
| ((uint64_t)magic << 48);
rec->key_and_data_len = (keylen | (datalen << (keybits*2)));
/* Encoding can fail on big values. */
if (rec_key_length(rec) != keylen
|| rec_data_length(rec) != datalen
|| rec_extra_padding(rec) != actuallen - (keylen + datalen)) {
return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"Could not encode k=%llu,d=%llu,a=%llu",
(long long)keylen, (long long)datalen,
(long long)actuallen);
}
return TDB_SUCCESS;
}
/* Expand the database. */
static enum TDB_ERROR tdb_expand(struct tdb_context *tdb, tdb_len_t size)
{
uint64_t old_size, rec_size, map_size;
tdb_len_t wanted;
enum TDB_ERROR ecode;
/* Need to hold a hash lock to expand DB: transactions rely on it. */
if (!(tdb->flags & TDB_NOLOCK)
&& !tdb->file->allrecord_lock.count && !tdb_has_hash_locks(tdb)) {
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_expand: must hold lock during expand");
}
/* Only one person can expand file at a time. */
ecode = tdb_lock_expand(tdb, F_WRLCK);
if (ecode != TDB_SUCCESS) {
return ecode;
}
/* Someone else may have expanded the file, so retry. */
old_size = tdb->file->map_size;
tdb->methods->oob(tdb, tdb->file->map_size + 1, true);
if (tdb->file->map_size != old_size) {
tdb_unlock_expand(tdb, F_WRLCK);
return TDB_SUCCESS;
}
/* limit size in order to avoid using up huge amounts of memory for
* in memory tdbs if an oddball huge record creeps in */
if (size > 100 * 1024) {
rec_size = size * 2;
} else {
rec_size = size * 100;
}
/* always make room for at least rec_size more records, and at
least 25% more space. if the DB is smaller than 100MiB,
otherwise grow it by 10% only. */
if (old_size > 100 * 1024 * 1024) {
map_size = old_size / 10;
} else {
map_size = old_size / 4;
}
if (map_size > rec_size) {
wanted = map_size;
} else {
wanted = rec_size;
}
/* We need room for the record header too. */
wanted = adjust_size(0, sizeof(struct tdb_used_record) + wanted);
ecode = tdb->methods->expand_file(tdb, wanted);
if (ecode != TDB_SUCCESS) {
tdb_unlock_expand(tdb, F_WRLCK);
return ecode;
}
/* We need to drop this lock before adding free record. */
tdb_unlock_expand(tdb, F_WRLCK);
tdb->stats.expands++;
return add_free_record(tdb, old_size, wanted, TDB_LOCK_WAIT, true);
}
/* This won't fail: it will expand the database if it has to. */
tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen,
uint64_t hash, unsigned magic, bool growing)
{
tdb_off_t off;
/* We can't hold pointers during this: we could unmap! */
assert(!tdb->direct_access);
for (;;) {
enum TDB_ERROR ecode;
off = get_free(tdb, keylen, datalen, growing, magic, hash);
if (likely(off != 0))
break;
ecode = tdb_expand(tdb, adjust_size(keylen, datalen));
if (ecode != TDB_SUCCESS) {
return ecode;
}
}
return off;
}

881
lib/tdb2/hash.c Normal file
View File

@ -0,0 +1,881 @@
/*
Trivial Database 2: hash handling
Copyright (C) Rusty Russell 2010
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "private.h"
#include <assert.h>
uint64_t tdb_hash(struct tdb_context *tdb, const void *ptr, size_t len)
{
return tdb->hash_fn(ptr, len, tdb->hash_seed, tdb->hash_data);
}
uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off)
{
const struct tdb_used_record *r;
const void *key;
uint64_t klen, hash;
r = tdb_access_read(tdb, off, sizeof(*r), true);
if (TDB_PTR_IS_ERR(r)) {
/* FIXME */
return 0;
}
klen = rec_key_length(r);
tdb_access_release(tdb, r);
key = tdb_access_read(tdb, off + sizeof(*r), klen, false);
if (TDB_PTR_IS_ERR(key)) {
return 0;
}
hash = tdb_hash(tdb, key, klen);
tdb_access_release(tdb, key);
return hash;
}
/* Get bits from a value. */
static uint32_t bits_from(uint64_t val, unsigned start, unsigned num)
{
assert(num <= 32);
return (val >> start) & ((1U << num) - 1);
}
/* We take bits from the top: that way we can lock whole sections of the hash
* by using lock ranges. */
static uint32_t use_bits(struct hash_info *h, unsigned num)
{
h->hash_used += num;
return bits_from(h->h, 64 - h->hash_used, num);
}
static tdb_bool_err key_matches(struct tdb_context *tdb,
const struct tdb_used_record *rec,
tdb_off_t off,
const struct tdb_data *key)
{
tdb_bool_err ret = false;
const char *rkey;
if (rec_key_length(rec) != key->dsize) {
tdb->stats.compare_wrong_keylen++;
return ret;
}
rkey = tdb_access_read(tdb, off + sizeof(*rec), key->dsize, false);
if (TDB_PTR_IS_ERR(rkey)) {
return TDB_PTR_ERR(rkey);
}
if (memcmp(rkey, key->dptr, key->dsize) == 0)
ret = true;
else
tdb->stats.compare_wrong_keycmp++;
tdb_access_release(tdb, rkey);
return ret;
}
/* Does entry match? */
static tdb_bool_err match(struct tdb_context *tdb,
struct hash_info *h,
const struct tdb_data *key,
tdb_off_t val,
struct tdb_used_record *rec)
{
tdb_off_t off;
enum TDB_ERROR ecode;
tdb->stats.compares++;
/* Desired bucket must match. */
if (h->home_bucket != (val & TDB_OFF_HASH_GROUP_MASK)) {
tdb->stats.compare_wrong_bucket++;
return false;
}
/* Top bits of offset == next bits of hash. */
if (bits_from(val, TDB_OFF_HASH_EXTRA_BIT, TDB_OFF_UPPER_STEAL_EXTRA)
!= bits_from(h->h, 64 - h->hash_used - TDB_OFF_UPPER_STEAL_EXTRA,
TDB_OFF_UPPER_STEAL_EXTRA)) {
tdb->stats.compare_wrong_offsetbits++;
return false;
}
off = val & TDB_OFF_MASK;
ecode = tdb_read_convert(tdb, off, rec, sizeof(*rec));
if (ecode != TDB_SUCCESS) {
return ecode;
}
if ((h->h & ((1 << 11)-1)) != rec_hash(rec)) {
tdb->stats.compare_wrong_rechash++;
return false;
}
return key_matches(tdb, rec, off, key);
}
static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned bucket)
{
return group_start
+ (bucket % (1 << TDB_HASH_GROUP_BITS)) * sizeof(tdb_off_t);
}
bool is_subhash(tdb_off_t val)
{
return (val >> TDB_OFF_UPPER_STEAL_SUBHASH_BIT) & 1;
}
/* FIXME: Guess the depth, don't over-lock! */
static tdb_off_t hlock_range(tdb_off_t group, tdb_off_t *size)
{
*size = 1ULL << (64 - (TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS));
return group << (64 - (TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS));
}
static tdb_off_t COLD find_in_chain(struct tdb_context *tdb,
struct tdb_data key,
tdb_off_t chain,
struct hash_info *h,
struct tdb_used_record *rec,
struct traverse_info *tinfo)
{
tdb_off_t off, next;
enum TDB_ERROR ecode;
/* In case nothing is free, we set these to zero. */
h->home_bucket = h->found_bucket = 0;
for (off = chain; off; off = next) {
unsigned int i;
h->group_start = off;
ecode = tdb_read_convert(tdb, off, h->group, sizeof(h->group));
if (ecode != TDB_SUCCESS) {
return ecode;
}
for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
tdb_off_t recoff;
if (!h->group[i]) {
/* Remember this empty bucket. */
h->home_bucket = h->found_bucket = i;
continue;
}
/* We can insert extra bits via add_to_hash
* empty bucket logic. */
recoff = h->group[i] & TDB_OFF_MASK;
ecode = tdb_read_convert(tdb, recoff, rec,
sizeof(*rec));
if (ecode != TDB_SUCCESS) {
return ecode;
}
ecode = key_matches(tdb, rec, recoff, &key);
if (ecode < 0) {
return ecode;
}
if (ecode == 1) {
h->home_bucket = h->found_bucket = i;
if (tinfo) {
tinfo->levels[tinfo->num_levels]
.hashtable = off;
tinfo->levels[tinfo->num_levels]
.total_buckets
= 1 << TDB_HASH_GROUP_BITS;
tinfo->levels[tinfo->num_levels].entry
= i;
tinfo->num_levels++;
}
return recoff;
}
}
next = tdb_read_off(tdb, off
+ offsetof(struct tdb_chain, next));
if (TDB_OFF_IS_ERR(next)) {
return next;
}
if (next)
next += sizeof(struct tdb_used_record);
}
return 0;
}
/* This is the core routine which searches the hashtable for an entry.
* On error, no locks are held and -ve is returned.
* Otherwise, hinfo is filled in (and the optional tinfo).
* If not found, the return value is 0.
* If found, the return value is the offset, and *rec is the record. */
tdb_off_t find_and_lock(struct tdb_context *tdb,
struct tdb_data key,
int ltype,
struct hash_info *h,
struct tdb_used_record *rec,
struct traverse_info *tinfo)
{
uint32_t i, group;
tdb_off_t hashtable;
enum TDB_ERROR ecode;
h->h = tdb_hash(tdb, key.dptr, key.dsize);
h->hash_used = 0;
group = use_bits(h, TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS);
h->home_bucket = use_bits(h, TDB_HASH_GROUP_BITS);
h->hlock_start = hlock_range(group, &h->hlock_range);
ecode = tdb_lock_hashes(tdb, h->hlock_start, h->hlock_range, ltype,
TDB_LOCK_WAIT);
if (ecode != TDB_SUCCESS) {
return ecode;
}
hashtable = offsetof(struct tdb_header, hashtable);
if (tinfo) {
tinfo->toplevel_group = group;
tinfo->num_levels = 1;
tinfo->levels[0].entry = 0;
tinfo->levels[0].hashtable = hashtable
+ (group << TDB_HASH_GROUP_BITS) * sizeof(tdb_off_t);
tinfo->levels[0].total_buckets = 1 << TDB_HASH_GROUP_BITS;
}
while (h->hash_used <= 64) {
/* Read in the hash group. */
h->group_start = hashtable
+ group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
ecode = tdb_read_convert(tdb, h->group_start, &h->group,
sizeof(h->group));
if (ecode != TDB_SUCCESS) {
goto fail;
}
/* Pointer to another hash table? Go down... */
if (is_subhash(h->group[h->home_bucket])) {
hashtable = (h->group[h->home_bucket] & TDB_OFF_MASK)
+ sizeof(struct tdb_used_record);
if (tinfo) {
/* When we come back, use *next* bucket */
tinfo->levels[tinfo->num_levels-1].entry
+= h->home_bucket + 1;
}
group = use_bits(h, TDB_SUBLEVEL_HASH_BITS
- TDB_HASH_GROUP_BITS);
h->home_bucket = use_bits(h, TDB_HASH_GROUP_BITS);
if (tinfo) {
tinfo->levels[tinfo->num_levels].hashtable
= hashtable;
tinfo->levels[tinfo->num_levels].total_buckets
= 1 << TDB_SUBLEVEL_HASH_BITS;
tinfo->levels[tinfo->num_levels].entry
= group << TDB_HASH_GROUP_BITS;
tinfo->num_levels++;
}
continue;
}
/* It's in this group: search (until 0 or all searched) */
for (i = 0, h->found_bucket = h->home_bucket;
i < (1 << TDB_HASH_GROUP_BITS);
i++, h->found_bucket = ((h->found_bucket+1)
% (1 << TDB_HASH_GROUP_BITS))) {
tdb_bool_err berr;
if (is_subhash(h->group[h->found_bucket]))
continue;
if (!h->group[h->found_bucket])
break;
berr = match(tdb, h, &key, h->group[h->found_bucket],
rec);
if (berr < 0) {
ecode = berr;
goto fail;
}
if (berr) {
if (tinfo) {
tinfo->levels[tinfo->num_levels-1].entry
+= h->found_bucket;
}
return h->group[h->found_bucket] & TDB_OFF_MASK;
}
}
/* Didn't find it: h indicates where it would go. */
return 0;
}
return find_in_chain(tdb, key, hashtable, h, rec, tinfo);
fail:
tdb_unlock_hashes(tdb, h->hlock_start, h->hlock_range, ltype);
return ecode;
}
/* I wrote a simple test, expanding a hash to 2GB, for the following
* cases:
* 1) Expanding all the buckets at once,
* 2) Expanding the bucket we wanted to place the new entry into.
* 3) Expanding the most-populated bucket,
*
* I measured the worst/average/best density during this process.
* 1) 3%/16%/30%
* 2) 4%/20%/38%
* 3) 6%/22%/41%
*
* So we figure out the busiest bucket for the moment.
*/
static unsigned fullest_bucket(struct tdb_context *tdb,
const tdb_off_t *group,
unsigned new_bucket)
{
unsigned counts[1 << TDB_HASH_GROUP_BITS] = { 0 };
unsigned int i, best_bucket;
/* Count the new entry. */
counts[new_bucket]++;
best_bucket = new_bucket;
for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
unsigned this_bucket;
if (is_subhash(group[i]))
continue;
this_bucket = group[i] & TDB_OFF_HASH_GROUP_MASK;
if (++counts[this_bucket] > counts[best_bucket])
best_bucket = this_bucket;
}
return best_bucket;
}
static bool put_into_group(tdb_off_t *group,
unsigned bucket, tdb_off_t encoded)
{
unsigned int i;
for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
unsigned b = (bucket + i) % (1 << TDB_HASH_GROUP_BITS);
if (group[b] == 0) {
group[b] = encoded;
return true;
}
}
return false;
}
static void force_into_group(tdb_off_t *group,
unsigned bucket, tdb_off_t encoded)
{
if (!put_into_group(group, bucket, encoded))
abort();
}
static tdb_off_t encode_offset(tdb_off_t new_off, struct hash_info *h)
{
return h->home_bucket
| new_off
| ((uint64_t)bits_from(h->h,
64 - h->hash_used - TDB_OFF_UPPER_STEAL_EXTRA,
TDB_OFF_UPPER_STEAL_EXTRA)
<< TDB_OFF_HASH_EXTRA_BIT);
}
/* Simply overwrite the hash entry we found before. */
enum TDB_ERROR replace_in_hash(struct tdb_context *tdb,
struct hash_info *h,
tdb_off_t new_off)
{
return tdb_write_off(tdb, hbucket_off(h->group_start, h->found_bucket),
encode_offset(new_off, h));
}
/* We slot in anywhere that's empty in the chain. */
static enum TDB_ERROR COLD add_to_chain(struct tdb_context *tdb,
tdb_off_t subhash,
tdb_off_t new_off)
{
tdb_off_t entry;
enum TDB_ERROR ecode;
entry = tdb_find_zero_off(tdb, subhash, 1<<TDB_HASH_GROUP_BITS);
if (TDB_OFF_IS_ERR(entry)) {
return entry;
}
if (entry == 1 << TDB_HASH_GROUP_BITS) {
tdb_off_t next;
next = tdb_read_off(tdb, subhash
+ offsetof(struct tdb_chain, next));
if (TDB_OFF_IS_ERR(next)) {
return next;
}
if (!next) {
next = alloc(tdb, 0, sizeof(struct tdb_chain), 0,
TDB_CHAIN_MAGIC, false);
if (TDB_OFF_IS_ERR(next))
return next;
ecode = zero_out(tdb,
next+sizeof(struct tdb_used_record),
sizeof(struct tdb_chain));
if (ecode != TDB_SUCCESS) {
return ecode;
}
ecode = tdb_write_off(tdb, subhash
+ offsetof(struct tdb_chain,
next),
next);
if (ecode != TDB_SUCCESS) {
return ecode;
}
}
return add_to_chain(tdb, next, new_off);
}
return tdb_write_off(tdb, subhash + entry * sizeof(tdb_off_t),
new_off);
}
/* Add into a newly created subhash. */
static enum TDB_ERROR add_to_subhash(struct tdb_context *tdb, tdb_off_t subhash,
unsigned hash_used, tdb_off_t val)
{
tdb_off_t off = (val & TDB_OFF_MASK), *group;
struct hash_info h;
unsigned int gnum;
h.hash_used = hash_used;
if (hash_used + TDB_SUBLEVEL_HASH_BITS > 64)
return add_to_chain(tdb, subhash, off);
h.h = hash_record(tdb, off);
gnum = use_bits(&h, TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS);
h.group_start = subhash
+ gnum * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
h.home_bucket = use_bits(&h, TDB_HASH_GROUP_BITS);
group = tdb_access_write(tdb, h.group_start,
sizeof(*group) << TDB_HASH_GROUP_BITS, true);
if (TDB_PTR_IS_ERR(group)) {
return TDB_PTR_ERR(group);
}
force_into_group(group, h.home_bucket, encode_offset(off, &h));
return tdb_access_commit(tdb, group);
}
static enum TDB_ERROR expand_group(struct tdb_context *tdb, struct hash_info *h)
{
unsigned bucket, num_vals, i, magic;
size_t subsize;
tdb_off_t subhash;
tdb_off_t vals[1 << TDB_HASH_GROUP_BITS];
enum TDB_ERROR ecode;
/* Attach new empty subhash under fullest bucket. */
bucket = fullest_bucket(tdb, h->group, h->home_bucket);
if (h->hash_used == 64) {
tdb->stats.alloc_chain++;
subsize = sizeof(struct tdb_chain);
magic = TDB_CHAIN_MAGIC;
} else {
tdb->stats.alloc_subhash++;
subsize = (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS);
magic = TDB_HTABLE_MAGIC;
}
subhash = alloc(tdb, 0, subsize, 0, magic, false);
if (TDB_OFF_IS_ERR(subhash)) {
return subhash;
}
ecode = zero_out(tdb, subhash + sizeof(struct tdb_used_record),
subsize);
if (ecode != TDB_SUCCESS) {
return ecode;
}
/* Remove any which are destined for bucket or are in wrong place. */
num_vals = 0;
for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
unsigned home_bucket = h->group[i] & TDB_OFF_HASH_GROUP_MASK;
if (!h->group[i] || is_subhash(h->group[i]))
continue;
if (home_bucket == bucket || home_bucket != i) {
vals[num_vals++] = h->group[i];
h->group[i] = 0;
}
}
/* FIXME: This assert is valid, but we do this during unit test :( */
/* assert(num_vals); */
/* Overwrite expanded bucket with subhash pointer. */
h->group[bucket] = subhash | (1ULL << TDB_OFF_UPPER_STEAL_SUBHASH_BIT);
/* Point to actual contents of record. */
subhash += sizeof(struct tdb_used_record);
/* Put values back. */
for (i = 0; i < num_vals; i++) {
unsigned this_bucket = vals[i] & TDB_OFF_HASH_GROUP_MASK;
if (this_bucket == bucket) {
ecode = add_to_subhash(tdb, subhash, h->hash_used,
vals[i]);
if (ecode != TDB_SUCCESS)
return ecode;
} else {
/* There should be room to put this back. */
force_into_group(h->group, this_bucket, vals[i]);
}
}
return TDB_SUCCESS;
}
enum TDB_ERROR delete_from_hash(struct tdb_context *tdb, struct hash_info *h)
{
unsigned int i, num_movers = 0;
tdb_off_t movers[1 << TDB_HASH_GROUP_BITS];
h->group[h->found_bucket] = 0;
for (i = 1; i < (1 << TDB_HASH_GROUP_BITS); i++) {
unsigned this_bucket;
this_bucket = (h->found_bucket+i) % (1 << TDB_HASH_GROUP_BITS);
/* Empty bucket? We're done. */
if (!h->group[this_bucket])
break;
/* Ignore subhashes. */
if (is_subhash(h->group[this_bucket]))
continue;
/* If this one is not happy where it is, we'll move it. */
if ((h->group[this_bucket] & TDB_OFF_HASH_GROUP_MASK)
!= this_bucket) {
movers[num_movers++] = h->group[this_bucket];
h->group[this_bucket] = 0;
}
}
/* Put back the ones we erased. */
for (i = 0; i < num_movers; i++) {
force_into_group(h->group, movers[i] & TDB_OFF_HASH_GROUP_MASK,
movers[i]);
}
/* Now we write back the hash group */
return tdb_write_convert(tdb, h->group_start,
h->group, sizeof(h->group));
}
enum TDB_ERROR add_to_hash(struct tdb_context *tdb, struct hash_info *h,
tdb_off_t new_off)
{
enum TDB_ERROR ecode;
/* We hit an empty bucket during search? That's where it goes. */
if (!h->group[h->found_bucket]) {
h->group[h->found_bucket] = encode_offset(new_off, h);
/* Write back the modified group. */
return tdb_write_convert(tdb, h->group_start,
h->group, sizeof(h->group));
}
if (h->hash_used > 64)
return add_to_chain(tdb, h->group_start, new_off);
/* We're full. Expand. */
ecode = expand_group(tdb, h);
if (ecode != TDB_SUCCESS) {
return ecode;
}
if (is_subhash(h->group[h->home_bucket])) {
/* We were expanded! */
tdb_off_t hashtable;
unsigned int gnum;
/* Write back the modified group. */
ecode = tdb_write_convert(tdb, h->group_start, h->group,
sizeof(h->group));
if (ecode != TDB_SUCCESS) {
return ecode;
}
/* Move hashinfo down a level. */
hashtable = (h->group[h->home_bucket] & TDB_OFF_MASK)
+ sizeof(struct tdb_used_record);
gnum = use_bits(h,TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS);
h->home_bucket = use_bits(h, TDB_HASH_GROUP_BITS);
h->group_start = hashtable
+ gnum * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
ecode = tdb_read_convert(tdb, h->group_start, &h->group,
sizeof(h->group));
if (ecode != TDB_SUCCESS) {
return ecode;
}
}
/* Expanding the group must have made room if it didn't choose this
* bucket. */
if (put_into_group(h->group, h->home_bucket, encode_offset(new_off,h))){
return tdb_write_convert(tdb, h->group_start,
h->group, sizeof(h->group));
}
/* This can happen if all hashes in group (and us) dropped into same
* group in subhash. */
return add_to_hash(tdb, h, new_off);
}
/* Traverse support: returns offset of record, or 0 or -ve error. */
static tdb_off_t iterate_hash(struct tdb_context *tdb,
struct traverse_info *tinfo)
{
tdb_off_t off, val, i;
struct traverse_level *tlevel;
tlevel = &tinfo->levels[tinfo->num_levels-1];
again:
for (i = tdb_find_nonzero_off(tdb, tlevel->hashtable,
tlevel->entry, tlevel->total_buckets);
i != tlevel->total_buckets;
i = tdb_find_nonzero_off(tdb, tlevel->hashtable,
i+1, tlevel->total_buckets)) {
if (TDB_OFF_IS_ERR(i)) {
return i;
}
val = tdb_read_off(tdb, tlevel->hashtable+sizeof(tdb_off_t)*i);
if (TDB_OFF_IS_ERR(val)) {
return val;
}
off = val & TDB_OFF_MASK;
/* This makes the delete-all-in-traverse case work
* (and simplifies our logic a little). */
if (off == tinfo->prev)
continue;
tlevel->entry = i;
if (!is_subhash(val)) {
/* Found one. */
tinfo->prev = off;
return off;
}
/* When we come back, we want the next one */
tlevel->entry++;
tinfo->num_levels++;
tlevel++;
tlevel->hashtable = off + sizeof(struct tdb_used_record);
tlevel->entry = 0;
/* Next level is a chain? */
if (unlikely(tinfo->num_levels == TDB_MAX_LEVELS + 1))
tlevel->total_buckets = (1 << TDB_HASH_GROUP_BITS);
else
tlevel->total_buckets = (1 << TDB_SUBLEVEL_HASH_BITS);
goto again;
}
/* Nothing there? */
if (tinfo->num_levels == 1)
return 0;
/* Handle chained entries. */
if (unlikely(tinfo->num_levels == TDB_MAX_LEVELS + 1)) {
tlevel->hashtable = tdb_read_off(tdb, tlevel->hashtable
+ offsetof(struct tdb_chain,
next));
if (TDB_OFF_IS_ERR(tlevel->hashtable)) {
return tlevel->hashtable;
}
if (tlevel->hashtable) {
tlevel->hashtable += sizeof(struct tdb_used_record);
tlevel->entry = 0;
goto again;
}
}
/* Go back up and keep searching. */
tinfo->num_levels--;
tlevel--;
goto again;
}
/* Return success if we find something, TDB_ERR_NOEXIST if none. */
enum TDB_ERROR next_in_hash(struct tdb_context *tdb,
struct traverse_info *tinfo,
TDB_DATA *kbuf, size_t *dlen)
{
const unsigned group_bits = TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS;
tdb_off_t hl_start, hl_range, off;
enum TDB_ERROR ecode;
while (tinfo->toplevel_group < (1 << group_bits)) {
hl_start = (tdb_off_t)tinfo->toplevel_group
<< (64 - group_bits);
hl_range = 1ULL << group_bits;
ecode = tdb_lock_hashes(tdb, hl_start, hl_range, F_RDLCK,
TDB_LOCK_WAIT);
if (ecode != TDB_SUCCESS) {
return ecode;
}
off = iterate_hash(tdb, tinfo);
if (off) {
struct tdb_used_record rec;
if (TDB_OFF_IS_ERR(off)) {
ecode = off;
goto fail;
}
ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
if (ecode != TDB_SUCCESS) {
goto fail;
}
if (rec_magic(&rec) != TDB_USED_MAGIC) {
ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
TDB_LOG_ERROR,
"next_in_hash:"
" corrupt record at %llu",
(long long)off);
goto fail;
}
kbuf->dsize = rec_key_length(&rec);
/* They want data as well? */
if (dlen) {
*dlen = rec_data_length(&rec);
kbuf->dptr = tdb_alloc_read(tdb,
off + sizeof(rec),
kbuf->dsize
+ *dlen);
} else {
kbuf->dptr = tdb_alloc_read(tdb,
off + sizeof(rec),
kbuf->dsize);
}
tdb_unlock_hashes(tdb, hl_start, hl_range, F_RDLCK);
if (TDB_PTR_IS_ERR(kbuf->dptr)) {
return TDB_PTR_ERR(kbuf->dptr);
}
return TDB_SUCCESS;
}
tdb_unlock_hashes(tdb, hl_start, hl_range, F_RDLCK);
tinfo->toplevel_group++;
tinfo->levels[0].hashtable
+= (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
tinfo->levels[0].entry = 0;
}
return TDB_ERR_NOEXIST;
fail:
tdb_unlock_hashes(tdb, hl_start, hl_range, F_RDLCK);
return ecode;
}
enum TDB_ERROR first_in_hash(struct tdb_context *tdb,
struct traverse_info *tinfo,
TDB_DATA *kbuf, size_t *dlen)
{
tinfo->prev = 0;
tinfo->toplevel_group = 0;
tinfo->num_levels = 1;
tinfo->levels[0].hashtable = offsetof(struct tdb_header, hashtable);
tinfo->levels[0].entry = 0;
tinfo->levels[0].total_buckets = (1 << TDB_HASH_GROUP_BITS);
return next_in_hash(tdb, tinfo, kbuf, dlen);
}
/* Even if the entry isn't in this hash bucket, you'd have to lock this
* bucket to find it. */
static enum TDB_ERROR chainlock(struct tdb_context *tdb, const TDB_DATA *key,
int ltype, enum tdb_lock_flags waitflag,
const char *func)
{
enum TDB_ERROR ecode;
uint64_t h = tdb_hash(tdb, key->dptr, key->dsize);
tdb_off_t lockstart, locksize;
unsigned int group, gbits;
gbits = TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS;
group = bits_from(h, 64 - gbits, gbits);
lockstart = hlock_range(group, &locksize);
ecode = tdb_lock_hashes(tdb, lockstart, locksize, ltype, waitflag);
tdb_trace_1rec(tdb, func, *key);
return ecode;
}
/* lock/unlock one hash chain. This is meant to be used to reduce
contention - it cannot guarantee how many records will be locked */
enum TDB_ERROR tdb_chainlock(struct tdb_context *tdb, TDB_DATA key)
{
return tdb->last_error = chainlock(tdb, &key, F_WRLCK, TDB_LOCK_WAIT,
"tdb_chainlock");
}
void tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key)
{
uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
tdb_off_t lockstart, locksize;
unsigned int group, gbits;
gbits = TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS;
group = bits_from(h, 64 - gbits, gbits);
lockstart = hlock_range(group, &locksize);
tdb_trace_1rec(tdb, "tdb_chainunlock", key);
tdb_unlock_hashes(tdb, lockstart, locksize, F_WRLCK);
}
enum TDB_ERROR tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key)
{
return tdb->last_error = chainlock(tdb, &key, F_RDLCK, TDB_LOCK_WAIT,
"tdb_chainlock_read");
}
void tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key)
{
uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
tdb_off_t lockstart, locksize;
unsigned int group, gbits;
gbits = TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS;
group = bits_from(h, 64 - gbits, gbits);
lockstart = hlock_range(group, &locksize);
tdb_trace_1rec(tdb, "tdb_chainunlock_read", key);
tdb_unlock_hashes(tdb, lockstart, locksize, F_RDLCK);
}

615
lib/tdb2/io.c Normal file
View File

@ -0,0 +1,615 @@
/*
Unix SMB/CIFS implementation.
trivial database library
Copyright (C) Andrew Tridgell 1999-2005
Copyright (C) Paul `Rusty' Russell 2000
Copyright (C) Jeremy Allison 2000-2003
Copyright (C) Rusty Russell 2010
** NOTE! The following LGPL license applies to the tdb
** library. This does NOT imply that all of Samba is released
** under the LGPL
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "private.h"
#include <assert.h>
#include <ccan/likely/likely.h>
void tdb_munmap(struct tdb_file *file)
{
if (file->fd == -1)
return;
if (file->map_ptr) {
munmap(file->map_ptr, file->map_size);
file->map_ptr = NULL;
}
}
void tdb_mmap(struct tdb_context *tdb)
{
if (tdb->flags & TDB_INTERNAL)
return;
if (tdb->flags & TDB_NOMMAP)
return;
/* size_t can be smaller than off_t. */
if ((size_t)tdb->file->map_size == tdb->file->map_size) {
tdb->file->map_ptr = mmap(NULL, tdb->file->map_size,
tdb->mmap_flags,
MAP_SHARED, tdb->file->fd, 0);
} else
tdb->file->map_ptr = MAP_FAILED;
/*
* NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
*/
if (tdb->file->map_ptr == MAP_FAILED) {
tdb->file->map_ptr = NULL;
tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
"tdb_mmap failed for size %lld (%s)",
(long long)tdb->file->map_size, strerror(errno));
}
}
/* check for an out of bounds access - if it is out of bounds then
see if the database has been expanded by someone else and expand
if necessary
note that "len" is the minimum length needed for the db
*/
static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len,
bool probe)
{
struct stat st;
enum TDB_ERROR ecode;
/* We can't hold pointers during this: we could unmap! */
assert(!tdb->direct_access
|| (tdb->flags & TDB_NOLOCK)
|| tdb_has_expansion_lock(tdb));
if (len <= tdb->file->map_size)
return 0;
if (tdb->flags & TDB_INTERNAL) {
if (!probe) {
tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"tdb_oob len %lld beyond internal"
" malloc size %lld",
(long long)len,
(long long)tdb->file->map_size);
}
return TDB_ERR_IO;
}
ecode = tdb_lock_expand(tdb, F_RDLCK);
if (ecode != TDB_SUCCESS) {
return ecode;
}
if (fstat(tdb->file->fd, &st) != 0) {
tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"Failed to fstat file: %s", strerror(errno));
tdb_unlock_expand(tdb, F_RDLCK);
return TDB_ERR_IO;
}
tdb_unlock_expand(tdb, F_RDLCK);
if (st.st_size < (size_t)len) {
if (!probe) {
tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"tdb_oob len %zu beyond eof at %zu",
(size_t)len, st.st_size);
}
return TDB_ERR_IO;
}
/* Unmap, update size, remap */
tdb_munmap(tdb->file);
tdb->file->map_size = st.st_size;
tdb_mmap(tdb);
return TDB_SUCCESS;
}
/* Endian conversion: we only ever deal with 8 byte quantities */
void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
{
assert(size % 8 == 0);
if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
uint64_t i, *p = (uint64_t *)buf;
for (i = 0; i < size / 8; i++)
p[i] = bswap_64(p[i]);
}
return buf;
}
/* Return first non-zero offset in offset array, or end, or -ve error. */
/* FIXME: Return the off? */
uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
tdb_off_t base, uint64_t start, uint64_t end)
{
uint64_t i;
const uint64_t *val;
/* Zero vs non-zero is the same unconverted: minor optimization. */
val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
(end - start) * sizeof(tdb_off_t), false);
if (TDB_PTR_IS_ERR(val)) {
return TDB_PTR_ERR(val);
}
for (i = 0; i < (end - start); i++) {
if (val[i])
break;
}
tdb_access_release(tdb, val);
return start + i;
}
/* Return first zero offset in num offset array, or num, or -ve error. */
uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
uint64_t num)
{
uint64_t i;
const uint64_t *val;
/* Zero vs non-zero is the same unconverted: minor optimization. */
val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
if (TDB_PTR_IS_ERR(val)) {
return TDB_PTR_ERR(val);
}
for (i = 0; i < num; i++) {
if (!val[i])
break;
}
tdb_access_release(tdb, val);
return i;
}
enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
{
char buf[8192] = { 0 };
void *p = tdb->methods->direct(tdb, off, len, true);
enum TDB_ERROR ecode = TDB_SUCCESS;
assert(!tdb->read_only);
if (TDB_PTR_IS_ERR(p)) {
return TDB_PTR_ERR(p);
}
if (p) {
memset(p, 0, len);
return ecode;
}
while (len) {
unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
ecode = tdb->methods->twrite(tdb, off, buf, todo);
if (ecode != TDB_SUCCESS) {
break;
}
len -= todo;
off += todo;
}
return ecode;
}
tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
{
tdb_off_t ret;
enum TDB_ERROR ecode;
if (likely(!(tdb->flags & TDB_CONVERT))) {
tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
false);
if (TDB_PTR_IS_ERR(p)) {
return TDB_PTR_ERR(p);
}
if (p)
return *p;
}
ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret));
if (ecode != TDB_SUCCESS) {
return ecode;
}
return ret;
}
/* write a lump of data at a specified offset */
static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off,
const void *buf, tdb_len_t len)
{
enum TDB_ERROR ecode;
if (tdb->read_only) {
return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
"Write to read-only database");
}
ecode = tdb->methods->oob(tdb, off + len, 0);
if (ecode != TDB_SUCCESS) {
return ecode;
}
if (tdb->file->map_ptr) {
memcpy(off + (char *)tdb->file->map_ptr, buf, len);
} else {
ssize_t ret;
ret = pwrite(tdb->file->fd, buf, len, off);
if (ret != len) {
/* This shouldn't happen: we avoid sparse files. */
if (ret >= 0)
errno = ENOSPC;
return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"tdb_write: %zi at %zu len=%zu (%s)",
ret, (size_t)off, (size_t)len,
strerror(errno));
}
}
return TDB_SUCCESS;
}
/* read a lump of data at a specified offset */
static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off,
void *buf, tdb_len_t len)
{
enum TDB_ERROR ecode;
ecode = tdb->methods->oob(tdb, off + len, 0);
if (ecode != TDB_SUCCESS) {
return ecode;
}
if (tdb->file->map_ptr) {
memcpy(buf, off + (char *)tdb->file->map_ptr, len);
} else {
ssize_t r = pread(tdb->file->fd, buf, len, off);
if (r != len) {
return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"tdb_read failed with %zi at %zu "
"len=%zu (%s) map_size=%zu",
r, (size_t)off, (size_t)len,
strerror(errno),
(size_t)tdb->file->map_size);
}
}
return TDB_SUCCESS;
}
enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
const void *rec, size_t len)
{
enum TDB_ERROR ecode;
if (unlikely((tdb->flags & TDB_CONVERT))) {
void *conv = malloc(len);
if (!conv) {
return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
"tdb_write: no memory converting"
" %zu bytes", len);
}
memcpy(conv, rec, len);
ecode = tdb->methods->twrite(tdb, off,
tdb_convert(tdb, conv, len), len);
free(conv);
} else {
ecode = tdb->methods->twrite(tdb, off, rec, len);
}
return ecode;
}
enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
void *rec, size_t len)
{
enum TDB_ERROR ecode = tdb->methods->tread(tdb, off, rec, len);
tdb_convert(tdb, rec, len);
return ecode;
}
enum TDB_ERROR tdb_write_off(struct tdb_context *tdb,
tdb_off_t off, tdb_off_t val)
{
if (tdb->read_only) {
return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
"Write to read-only database");
}
if (likely(!(tdb->flags & TDB_CONVERT))) {
tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
true);
if (TDB_PTR_IS_ERR(p)) {
return TDB_PTR_ERR(p);
}
if (p) {
*p = val;
return TDB_SUCCESS;
}
}
return tdb_write_convert(tdb, off, &val, sizeof(val));
}
static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
tdb_len_t len, unsigned int prefix)
{
unsigned char *buf;
enum TDB_ERROR ecode;
/* some systems don't like zero length malloc */
buf = malloc(prefix + len ? prefix + len : 1);
if (!buf) {
tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
"tdb_alloc_read malloc failed len=%zu",
(size_t)(prefix + len));
return TDB_ERR_PTR(TDB_ERR_OOM);
} else {
ecode = tdb->methods->tread(tdb, offset, buf+prefix, len);
if (unlikely(ecode != TDB_SUCCESS)) {
free(buf);
return TDB_ERR_PTR(ecode);
}
}
return buf;
}
/* read a lump of data, allocating the space for it */
void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
{
return _tdb_alloc_read(tdb, offset, len, 0);
}
static enum TDB_ERROR fill(struct tdb_context *tdb,
const void *buf, size_t size,
tdb_off_t off, tdb_len_t len)
{
while (len) {
size_t n = len > size ? size : len;
ssize_t ret = pwrite(tdb->file->fd, buf, n, off);
if (ret != n) {
if (ret >= 0)
errno = ENOSPC;
return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"fill failed:"
" %zi at %zu len=%zu (%s)",
ret, (size_t)off, (size_t)len,
strerror(errno));
}
len -= n;
off += n;
}
return TDB_SUCCESS;
}
/* expand a file. we prefer to use ftruncate, as that is what posix
says to use for mmap expansion */
static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb,
tdb_len_t addition)
{
char buf[8192];
enum TDB_ERROR ecode;
if (tdb->read_only) {
return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
"Expand on read-only database");
}
if (tdb->flags & TDB_INTERNAL) {
char *new = realloc(tdb->file->map_ptr,
tdb->file->map_size + addition);
if (!new) {
return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
"No memory to expand database");
}
tdb->file->map_ptr = new;
tdb->file->map_size += addition;
} else {
/* Unmap before trying to write; old TDB claimed OpenBSD had
* problem with this otherwise. */
tdb_munmap(tdb->file);
/* If this fails, we try to fill anyway. */
if (ftruncate(tdb->file->fd, tdb->file->map_size + addition))
;
/* now fill the file with something. This ensures that the
file isn't sparse, which would be very bad if we ran out of
disk. This must be done with write, not via mmap */
memset(buf, 0x43, sizeof(buf));
ecode = fill(tdb, buf, sizeof(buf), tdb->file->map_size,
addition);
if (ecode != TDB_SUCCESS)
return ecode;
tdb->file->map_size += addition;
tdb_mmap(tdb);
}
return TDB_SUCCESS;
}
const void *tdb_access_read(struct tdb_context *tdb,
tdb_off_t off, tdb_len_t len, bool convert)
{
void *ret = NULL;
if (likely(!(tdb->flags & TDB_CONVERT))) {
ret = tdb->methods->direct(tdb, off, len, false);
if (TDB_PTR_IS_ERR(ret)) {
return ret;
}
}
if (!ret) {
struct tdb_access_hdr *hdr;
hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
if (TDB_PTR_IS_ERR(hdr)) {
return hdr;
}
hdr->next = tdb->access;
tdb->access = hdr;
ret = hdr + 1;
if (convert) {
tdb_convert(tdb, (void *)ret, len);
}
} else
tdb->direct_access++;
return ret;
}
void *tdb_access_write(struct tdb_context *tdb,
tdb_off_t off, tdb_len_t len, bool convert)
{
void *ret = NULL;
if (tdb->read_only) {
tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
"Write to read-only database");
return TDB_ERR_PTR(TDB_ERR_RDONLY);
}
if (likely(!(tdb->flags & TDB_CONVERT))) {
ret = tdb->methods->direct(tdb, off, len, true);
if (TDB_PTR_IS_ERR(ret)) {
return ret;
}
}
if (!ret) {
struct tdb_access_hdr *hdr;
hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
if (TDB_PTR_IS_ERR(hdr)) {
return hdr;
}
hdr->next = tdb->access;
tdb->access = hdr;
hdr->off = off;
hdr->len = len;
hdr->convert = convert;
ret = hdr + 1;
if (convert)
tdb_convert(tdb, (void *)ret, len);
} else
tdb->direct_access++;
return ret;
}
static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
{
struct tdb_access_hdr **hp;
for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
if (*hp + 1 == p)
return hp;
}
return NULL;
}
void tdb_access_release(struct tdb_context *tdb, const void *p)
{
struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
if (hp) {
hdr = *hp;
*hp = hdr->next;
free(hdr);
} else
tdb->direct_access--;
}
enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p)
{
struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
enum TDB_ERROR ecode;
if (hp) {
hdr = *hp;
if (hdr->convert)
ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len);
else
ecode = tdb_write(tdb, hdr->off, p, hdr->len);
*hp = hdr->next;
free(hdr);
} else {
tdb->direct_access--;
ecode = TDB_SUCCESS;
}
return ecode;
}
static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
bool write_mode)
{
enum TDB_ERROR ecode;
if (unlikely(!tdb->file->map_ptr))
return NULL;
ecode = tdb_oob(tdb, off + len, true);
if (unlikely(ecode != TDB_SUCCESS))
return TDB_ERR_PTR(ecode);
return (char *)tdb->file->map_ptr + off;
}
void tdb_inc_seqnum(struct tdb_context *tdb)
{
tdb_off_t seq;
if (likely(!(tdb->flags & TDB_CONVERT))) {
int64_t *direct;
direct = tdb->methods->direct(tdb,
offsetof(struct tdb_header,
seqnum),
sizeof(*direct), true);
if (likely(direct)) {
/* Don't let it go negative, even briefly */
if (unlikely((*direct) + 1) < 0)
*direct = 0;
(*direct)++;
return;
}
}
seq = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
if (!TDB_OFF_IS_ERR(seq)) {
seq++;
if (unlikely((int64_t)seq < 0))
seq = 0;
tdb_write_off(tdb, offsetof(struct tdb_header, seqnum), seq);
}
}
static const struct tdb_methods io_methods = {
tdb_read,
tdb_write,
tdb_oob,
tdb_expand_file,
tdb_direct,
};
/*
initialise the default methods table
*/
void tdb_io_init(struct tdb_context *tdb)
{
tdb->methods = &io_methods;
}

875
lib/tdb2/lock.c Normal file
View File

@ -0,0 +1,875 @@
/*
Unix SMB/CIFS implementation.
trivial database library
Copyright (C) Andrew Tridgell 1999-2005
Copyright (C) Paul `Rusty' Russell 2000
Copyright (C) Jeremy Allison 2000-2003
** NOTE! The following LGPL license applies to the tdb
** library. This does NOT imply that all of Samba is released
** under the LGPL
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "private.h"
#include <assert.h>
#include <ccan/build_assert/build_assert.h>
/* If we were threaded, we could wait for unlock, but we're not, so fail. */
static enum TDB_ERROR owner_conflict(struct tdb_context *tdb, const char *call)
{
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
"%s: lock owned by another tdb in this process.",
call);
}
/* If we fork, we no longer really own locks. */
static bool check_lock_pid(struct tdb_context *tdb,
const char *call, bool log)
{
/* No locks? No problem! */
if (tdb->file->allrecord_lock.count == 0
&& tdb->file->num_lockrecs == 0) {
return true;
}
/* No fork? No problem! */
if (tdb->file->locker == getpid()) {
return true;
}
if (log) {
tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
"%s: fork() detected after lock acquisition!"
" (%u vs %u)", call, tdb->file->locker, getpid());
}
return false;
}
int tdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag,
void *unused)
{
struct flock fl;
int ret;
do {
fl.l_type = rw;
fl.l_whence = SEEK_SET;
fl.l_start = off;
fl.l_len = len;
if (waitflag)
ret = fcntl(fd, F_SETLKW, &fl);
else
ret = fcntl(fd, F_SETLK, &fl);
} while (ret != 0 && errno == EINTR);
return ret;
}
int tdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *unused)
{
struct flock fl;
int ret;
do {
fl.l_type = F_UNLCK;
fl.l_whence = SEEK_SET;
fl.l_start = off;
fl.l_len = len;
ret = fcntl(fd, F_SETLKW, &fl);
} while (ret != 0 && errno == EINTR);
return ret;
}
static int lock(struct tdb_context *tdb,
int rw, off_t off, off_t len, bool waitflag)
{
int ret;
if (tdb->file->allrecord_lock.count == 0
&& tdb->file->num_lockrecs == 0) {
tdb->file->locker = getpid();
}
tdb->stats.lock_lowlevel++;
ret = tdb->lock_fn(tdb->file->fd, rw, off, len, waitflag,
tdb->lock_data);
if (!waitflag) {
tdb->stats.lock_nonblock++;
if (ret != 0)
tdb->stats.lock_nonblock_fail++;
}
return ret;
}
static int unlock(struct tdb_context *tdb, int rw, off_t off, off_t len)
{
#if 0 /* Check they matched up locks and unlocks correctly. */
char line[80];
FILE *locks;
bool found = false;
locks = fopen("/proc/locks", "r");
while (fgets(line, 80, locks)) {
char *p;
int type, start, l;
/* eg. 1: FLOCK ADVISORY WRITE 2440 08:01:2180826 0 EOF */
p = strchr(line, ':') + 1;
if (strncmp(p, " POSIX ADVISORY ", strlen(" POSIX ADVISORY ")))
continue;
p += strlen(" FLOCK ADVISORY ");
if (strncmp(p, "READ ", strlen("READ ")) == 0)
type = F_RDLCK;
else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0)
type = F_WRLCK;
else
abort();
p += 6;
if (atoi(p) != getpid())
continue;
p = strchr(strchr(p, ' ') + 1, ' ') + 1;
start = atoi(p);
p = strchr(p, ' ') + 1;
if (strncmp(p, "EOF", 3) == 0)
l = 0;
else
l = atoi(p) - start + 1;
if (off == start) {
if (len != l) {
fprintf(stderr, "Len %u should be %u: %s",
(int)len, l, line);
abort();
}
if (type != rw) {
fprintf(stderr, "Type %s wrong: %s",
rw == F_RDLCK ? "READ" : "WRITE", line);
abort();
}
found = true;
break;
}
}
if (!found) {
fprintf(stderr, "Unlock on %u@%u not found!",
(int)off, (int)len);
abort();
}
fclose(locks);
#endif
return tdb->unlock_fn(tdb->file->fd, rw, off, len, tdb->lock_data);
}
/* a byte range locking function - return 0 on success
this functions locks len bytes at the specified offset.
note that a len of zero means lock to end of file
*/
static enum TDB_ERROR tdb_brlock(struct tdb_context *tdb,
int rw_type, tdb_off_t offset, tdb_off_t len,
enum tdb_lock_flags flags)
{
int ret;
if (tdb->flags & TDB_NOLOCK) {
return TDB_SUCCESS;
}
if (rw_type == F_WRLCK && tdb->read_only) {
return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
"Write lock attempted on read-only database");
}
/* A 32 bit system cannot open a 64-bit file, but it could have
* expanded since then: check here. */
if ((size_t)(offset + len) != offset + len) {
return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"tdb_brlock: lock on giant offset %llu",
(long long)(offset + len));
}
ret = lock(tdb, rw_type, offset, len, flags & TDB_LOCK_WAIT);
if (ret != 0) {
/* Generic lock error. errno set by fcntl.
* EAGAIN is an expected return from non-blocking
* locks. */
if (!(flags & TDB_LOCK_PROBE)
&& (errno != EAGAIN && errno != EINTR)) {
tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_brlock failed (fd=%d) at"
" offset %zu rw_type=%d flags=%d len=%zu:"
" %s",
tdb->file->fd, (size_t)offset, rw_type,
flags, (size_t)len, strerror(errno));
}
return TDB_ERR_LOCK;
}
return TDB_SUCCESS;
}
static enum TDB_ERROR tdb_brunlock(struct tdb_context *tdb,
int rw_type, tdb_off_t offset, size_t len)
{
if (tdb->flags & TDB_NOLOCK) {
return TDB_SUCCESS;
}
if (!check_lock_pid(tdb, "tdb_brunlock", true))
return TDB_ERR_LOCK;
if (unlock(tdb, rw_type, offset, len) == -1) {
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_brunlock failed (fd=%d) at offset %zu"
" rw_type=%d len=%zu: %s",
tdb->file->fd, (size_t)offset, rw_type,
(size_t)len, strerror(errno));
}
return TDB_SUCCESS;
}
/*
upgrade a read lock to a write lock. This needs to be handled in a
special way as some OSes (such as solaris) have too conservative
deadlock detection and claim a deadlock when progress can be
made. For those OSes we may loop for a while.
*/
enum TDB_ERROR tdb_allrecord_upgrade(struct tdb_context *tdb)
{
int count = 1000;
if (!check_lock_pid(tdb, "tdb_transaction_prepare_commit", true))
return TDB_ERR_LOCK;
if (tdb->file->allrecord_lock.count != 1) {
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_allrecord_upgrade failed:"
" count %u too high",
tdb->file->allrecord_lock.count);
}
if (tdb->file->allrecord_lock.off != 1) {
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_allrecord_upgrade failed:"
" already upgraded?");
}
if (tdb->file->allrecord_lock.owner != tdb) {
return owner_conflict(tdb, "tdb_allrecord_upgrade");
}
while (count--) {
struct timeval tv;
if (tdb_brlock(tdb, F_WRLCK,
TDB_HASH_LOCK_START, 0,
TDB_LOCK_WAIT|TDB_LOCK_PROBE) == TDB_SUCCESS) {
tdb->file->allrecord_lock.ltype = F_WRLCK;
tdb->file->allrecord_lock.off = 0;
return TDB_SUCCESS;
}
if (errno != EDEADLK) {
break;
}
/* sleep for as short a time as we can - more portable than usleep() */
tv.tv_sec = 0;
tv.tv_usec = 1;
select(0, NULL, NULL, NULL, &tv);
}
if (errno != EAGAIN && errno != EINTR)
tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_allrecord_upgrade failed");
return TDB_ERR_LOCK;
}
static struct tdb_lock *find_nestlock(struct tdb_context *tdb, tdb_off_t offset,
const struct tdb_context *owner)
{
unsigned int i;
for (i=0; i<tdb->file->num_lockrecs; i++) {
if (tdb->file->lockrecs[i].off == offset) {
if (owner && tdb->file->lockrecs[i].owner != owner)
return NULL;
return &tdb->file->lockrecs[i];
}
}
return NULL;
}
enum TDB_ERROR tdb_lock_and_recover(struct tdb_context *tdb)
{
enum TDB_ERROR ecode;
if (!check_lock_pid(tdb, "tdb_transaction_prepare_commit", true))
return TDB_ERR_LOCK;
ecode = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK,
false);
if (ecode != TDB_SUCCESS) {
return ecode;
}
ecode = tdb_lock_open(tdb, F_WRLCK, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
if (ecode != TDB_SUCCESS) {
tdb_allrecord_unlock(tdb, F_WRLCK);
return ecode;
}
ecode = tdb_transaction_recover(tdb);
tdb_unlock_open(tdb, F_WRLCK);
tdb_allrecord_unlock(tdb, F_WRLCK);
return ecode;
}
/* lock an offset in the database. */
static enum TDB_ERROR tdb_nest_lock(struct tdb_context *tdb,
tdb_off_t offset, int ltype,
enum tdb_lock_flags flags)
{
struct tdb_lock *new_lck;
enum TDB_ERROR ecode;
if (offset > (TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE
+ tdb->file->map_size / 8)) {
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_nest_lock: invalid offset %zu ltype=%d",
(size_t)offset, ltype);
}
if (tdb->flags & TDB_NOLOCK)
return TDB_SUCCESS;
if (!check_lock_pid(tdb, "tdb_nest_lock", true)) {
return TDB_ERR_LOCK;
}
tdb->stats.locks++;
new_lck = find_nestlock(tdb, offset, NULL);
if (new_lck) {
if (new_lck->owner != tdb) {
return owner_conflict(tdb, "tdb_nest_lock");
}
if (new_lck->ltype == F_RDLCK && ltype == F_WRLCK) {
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_nest_lock:"
" offset %zu has read lock",
(size_t)offset);
}
/* Just increment the struct, posix locks don't stack. */
new_lck->count++;
return TDB_SUCCESS;
}
#if 0
if (tdb->file->num_lockrecs
&& offset >= TDB_HASH_LOCK_START
&& offset < TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE) {
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_nest_lock: already have a hash lock?");
}
#endif
new_lck = (struct tdb_lock *)realloc(
tdb->file->lockrecs,
sizeof(*tdb->file->lockrecs) * (tdb->file->num_lockrecs+1));
if (new_lck == NULL) {
return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
"tdb_nest_lock:"
" unable to allocate %zu lock struct",
tdb->file->num_lockrecs + 1);
}
tdb->file->lockrecs = new_lck;
/* Since fcntl locks don't nest, we do a lock for the first one,
and simply bump the count for future ones */
ecode = tdb_brlock(tdb, ltype, offset, 1, flags);
if (ecode != TDB_SUCCESS) {
return ecode;
}
/* First time we grab a lock, perhaps someone died in commit? */
if (!(flags & TDB_LOCK_NOCHECK)
&& tdb->file->num_lockrecs == 0) {
tdb_bool_err berr = tdb_needs_recovery(tdb);
if (berr != false) {
tdb_brunlock(tdb, ltype, offset, 1);
if (berr < 0)
return berr;
ecode = tdb_lock_and_recover(tdb);
if (ecode == TDB_SUCCESS) {
ecode = tdb_brlock(tdb, ltype, offset, 1,
flags);
}
if (ecode != TDB_SUCCESS) {
return ecode;
}
}
}
tdb->file->lockrecs[tdb->file->num_lockrecs].owner = tdb;
tdb->file->lockrecs[tdb->file->num_lockrecs].off = offset;
tdb->file->lockrecs[tdb->file->num_lockrecs].count = 1;
tdb->file->lockrecs[tdb->file->num_lockrecs].ltype = ltype;
tdb->file->num_lockrecs++;
return TDB_SUCCESS;
}
static enum TDB_ERROR tdb_nest_unlock(struct tdb_context *tdb,
tdb_off_t off, int ltype)
{
struct tdb_lock *lck;
enum TDB_ERROR ecode;
if (tdb->flags & TDB_NOLOCK)
return TDB_SUCCESS;
lck = find_nestlock(tdb, off, tdb);
if ((lck == NULL) || (lck->count == 0)) {
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_nest_unlock: no lock for %zu",
(size_t)off);
}
if (lck->count > 1) {
lck->count--;
return TDB_SUCCESS;
}
/*
* This lock has count==1 left, so we need to unlock it in the
* kernel. We don't bother with decrementing the in-memory array
* element, we're about to overwrite it with the last array element
* anyway.
*/
ecode = tdb_brunlock(tdb, ltype, off, 1);
/*
* Shrink the array by overwriting the element just unlocked with the
* last array element.
*/
*lck = tdb->file->lockrecs[--tdb->file->num_lockrecs];
return ecode;
}
/*
get the transaction lock
*/
enum TDB_ERROR tdb_transaction_lock(struct tdb_context *tdb, int ltype)
{
return tdb_nest_lock(tdb, TDB_TRANSACTION_LOCK, ltype, TDB_LOCK_WAIT);
}
/*
release the transaction lock
*/
void tdb_transaction_unlock(struct tdb_context *tdb, int ltype)
{
tdb_nest_unlock(tdb, TDB_TRANSACTION_LOCK, ltype);
}
/* We only need to lock individual bytes, but Linux merges consecutive locks
* so we lock in contiguous ranges. */
static enum TDB_ERROR tdb_lock_gradual(struct tdb_context *tdb,
int ltype, enum tdb_lock_flags flags,
tdb_off_t off, tdb_off_t len)
{
enum TDB_ERROR ecode;
enum tdb_lock_flags nb_flags = (flags & ~TDB_LOCK_WAIT);
if (len <= 1) {
/* 0 would mean to end-of-file... */
assert(len != 0);
/* Single hash. Just do blocking lock. */
return tdb_brlock(tdb, ltype, off, len, flags);
}
/* First we try non-blocking. */
if (tdb_brlock(tdb, ltype, off, len, nb_flags) == TDB_SUCCESS) {
return TDB_SUCCESS;
}
/* Try locking first half, then second. */
ecode = tdb_lock_gradual(tdb, ltype, flags, off, len / 2);
if (ecode != TDB_SUCCESS)
return ecode;
ecode = tdb_lock_gradual(tdb, ltype, flags,
off + len / 2, len - len / 2);
if (ecode != TDB_SUCCESS) {
tdb_brunlock(tdb, ltype, off, len / 2);
}
return ecode;
}
/* lock/unlock entire database. It can only be upgradable if you have some
* other way of guaranteeing exclusivity (ie. transaction write lock). */
enum TDB_ERROR tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
enum tdb_lock_flags flags, bool upgradable)
{
enum TDB_ERROR ecode;
tdb_bool_err berr;
if (tdb->flags & TDB_NOLOCK)
return TDB_SUCCESS;
if (!check_lock_pid(tdb, "tdb_allrecord_lock", true)) {
return TDB_ERR_LOCK;
}
if (tdb->file->allrecord_lock.count) {
if (tdb->file->allrecord_lock.owner != tdb) {
return owner_conflict(tdb, "tdb_allrecord_lock");
}
if (ltype == F_RDLCK
|| tdb->file->allrecord_lock.ltype == F_WRLCK) {
tdb->file->allrecord_lock.count++;
return TDB_SUCCESS;
}
/* a global lock of a different type exists */
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
"tdb_allrecord_lock: already have %s lock",
tdb->file->allrecord_lock.ltype == F_RDLCK
? "read" : "write");
}
if (tdb_has_hash_locks(tdb)) {
/* can't combine global and chain locks */
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
"tdb_allrecord_lock:"
" already have chain lock");
}
if (upgradable && ltype != F_RDLCK) {
/* tdb error: you can't upgrade a write lock! */
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_allrecord_lock:"
" can't upgrade a write lock");
}
tdb->stats.locks++;
again:
/* Lock hashes, gradually. */
ecode = tdb_lock_gradual(tdb, ltype, flags, TDB_HASH_LOCK_START,
TDB_HASH_LOCK_RANGE);
if (ecode != TDB_SUCCESS)
return ecode;
/* Lock free tables: there to end of file. */
ecode = tdb_brlock(tdb, ltype,
TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE,
0, flags);
if (ecode != TDB_SUCCESS) {
tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START,
TDB_HASH_LOCK_RANGE);
return ecode;
}
tdb->file->allrecord_lock.owner = tdb;
tdb->file->allrecord_lock.count = 1;
/* If it's upgradable, it's actually exclusive so we can treat
* it as a write lock. */
tdb->file->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
tdb->file->allrecord_lock.off = upgradable;
/* Now check for needing recovery. */
if (flags & TDB_LOCK_NOCHECK)
return TDB_SUCCESS;
berr = tdb_needs_recovery(tdb);
if (likely(berr == false))
return TDB_SUCCESS;
tdb_allrecord_unlock(tdb, ltype);
if (berr < 0)
return berr;
ecode = tdb_lock_and_recover(tdb);
if (ecode != TDB_SUCCESS) {
return ecode;
}
goto again;
}
enum TDB_ERROR tdb_lock_open(struct tdb_context *tdb,
int ltype, enum tdb_lock_flags flags)
{
return tdb_nest_lock(tdb, TDB_OPEN_LOCK, ltype, flags);
}
void tdb_unlock_open(struct tdb_context *tdb, int ltype)
{
tdb_nest_unlock(tdb, TDB_OPEN_LOCK, ltype);
}
bool tdb_has_open_lock(struct tdb_context *tdb)
{
return !(tdb->flags & TDB_NOLOCK)
&& find_nestlock(tdb, TDB_OPEN_LOCK, tdb) != NULL;
}
enum TDB_ERROR tdb_lock_expand(struct tdb_context *tdb, int ltype)
{
/* Lock doesn't protect data, so don't check (we recurse if we do!) */
return tdb_nest_lock(tdb, TDB_EXPANSION_LOCK, ltype,
TDB_LOCK_WAIT | TDB_LOCK_NOCHECK);
}
void tdb_unlock_expand(struct tdb_context *tdb, int ltype)
{
tdb_nest_unlock(tdb, TDB_EXPANSION_LOCK, ltype);
}
/* unlock entire db */
void tdb_allrecord_unlock(struct tdb_context *tdb, int ltype)
{
if (tdb->flags & TDB_NOLOCK)
return;
if (tdb->file->allrecord_lock.count == 0) {
tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
"tdb_allrecord_unlock: not locked!");
return;
}
if (tdb->file->allrecord_lock.owner != tdb) {
tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
"tdb_allrecord_unlock: not locked by us!");
return;
}
/* Upgradable locks are marked as write locks. */
if (tdb->file->allrecord_lock.ltype != ltype
&& (!tdb->file->allrecord_lock.off || ltype != F_RDLCK)) {
tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_allrecord_unlock: have %s lock",
tdb->file->allrecord_lock.ltype == F_RDLCK
? "read" : "write");
return;
}
if (tdb->file->allrecord_lock.count > 1) {
tdb->file->allrecord_lock.count--;
return;
}
tdb->file->allrecord_lock.count = 0;
tdb->file->allrecord_lock.ltype = 0;
tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, 0);
}
bool tdb_has_expansion_lock(struct tdb_context *tdb)
{
return find_nestlock(tdb, TDB_EXPANSION_LOCK, tdb) != NULL;
}
bool tdb_has_hash_locks(struct tdb_context *tdb)
{
unsigned int i;
for (i=0; i<tdb->file->num_lockrecs; i++) {
if (tdb->file->lockrecs[i].off >= TDB_HASH_LOCK_START
&& tdb->file->lockrecs[i].off < (TDB_HASH_LOCK_START
+ TDB_HASH_LOCK_RANGE))
return true;
}
return false;
}
static bool tdb_has_free_lock(struct tdb_context *tdb)
{
unsigned int i;
if (tdb->flags & TDB_NOLOCK)
return false;
for (i=0; i<tdb->file->num_lockrecs; i++) {
if (tdb->file->lockrecs[i].off
> TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE)
return true;
}
return false;
}
enum TDB_ERROR tdb_lock_hashes(struct tdb_context *tdb,
tdb_off_t hash_lock,
tdb_len_t hash_range,
int ltype, enum tdb_lock_flags waitflag)
{
/* FIXME: Do this properly, using hlock_range */
unsigned l = TDB_HASH_LOCK_START
+ (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
/* a allrecord lock allows us to avoid per chain locks */
if (tdb->file->allrecord_lock.count) {
if (!check_lock_pid(tdb, "tdb_lock_hashes", true))
return TDB_ERR_LOCK;
if (tdb->file->allrecord_lock.owner != tdb)
return owner_conflict(tdb, "tdb_lock_hashes");
if (ltype == tdb->file->allrecord_lock.ltype
|| ltype == F_RDLCK) {
return TDB_SUCCESS;
}
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
"tdb_lock_hashes:"
" already have %s allrecordlock",
tdb->file->allrecord_lock.ltype == F_RDLCK
? "read" : "write");
}
if (tdb_has_free_lock(tdb)) {
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_lock_hashes: already have free lock");
}
if (tdb_has_expansion_lock(tdb)) {
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_lock_hashes:"
" already have expansion lock");
}
return tdb_nest_lock(tdb, l, ltype, waitflag);
}
enum TDB_ERROR tdb_unlock_hashes(struct tdb_context *tdb,
tdb_off_t hash_lock,
tdb_len_t hash_range, int ltype)
{
unsigned l = TDB_HASH_LOCK_START
+ (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
if (tdb->flags & TDB_NOLOCK)
return 0;
/* a allrecord lock allows us to avoid per chain locks */
if (tdb->file->allrecord_lock.count) {
if (tdb->file->allrecord_lock.ltype == F_RDLCK
&& ltype == F_WRLCK) {
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_unlock_hashes RO allrecord!");
}
return TDB_SUCCESS;
}
return tdb_nest_unlock(tdb, l, ltype);
}
/* Hash locks use TDB_HASH_LOCK_START + the next 30 bits.
* Then we begin; bucket offsets are sizeof(tdb_len_t) apart, so we divide.
* The result is that on 32 bit systems we don't use lock values > 2^31 on
* files that are less than 4GB.
*/
static tdb_off_t free_lock_off(tdb_off_t b_off)
{
return TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE
+ b_off / sizeof(tdb_off_t);
}
enum TDB_ERROR tdb_lock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off,
enum tdb_lock_flags waitflag)
{
assert(b_off >= sizeof(struct tdb_header));
if (tdb->flags & TDB_NOLOCK)
return 0;
/* a allrecord lock allows us to avoid per chain locks */
if (tdb->file->allrecord_lock.count) {
if (!check_lock_pid(tdb, "tdb_lock_free_bucket", true))
return TDB_ERR_LOCK;
if (tdb->file->allrecord_lock.ltype == F_WRLCK)
return 0;
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_lock_free_bucket with"
" read-only allrecordlock!");
}
#if 0 /* FIXME */
if (tdb_has_expansion_lock(tdb)) {
return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
"tdb_lock_free_bucket:"
" already have expansion lock");
}
#endif
return tdb_nest_lock(tdb, free_lock_off(b_off), F_WRLCK, waitflag);
}
void tdb_unlock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off)
{
if (tdb->file->allrecord_lock.count)
return;
tdb_nest_unlock(tdb, free_lock_off(b_off), F_WRLCK);
}
enum TDB_ERROR tdb_lockall(struct tdb_context *tdb)
{
return tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false);
}
void tdb_unlockall(struct tdb_context *tdb)
{
tdb_allrecord_unlock(tdb, F_WRLCK);
}
enum TDB_ERROR tdb_lockall_read(struct tdb_context *tdb)
{
return tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
}
void tdb_unlockall_read(struct tdb_context *tdb)
{
tdb_allrecord_unlock(tdb, F_RDLCK);
}
void tdb_lock_cleanup(struct tdb_context *tdb)
{
unsigned int i;
/* We don't want to warn: they're allowed to close tdb after fork. */
if (!check_lock_pid(tdb, "tdb_close", false))
return;
while (tdb->file->allrecord_lock.count
&& tdb->file->allrecord_lock.owner == tdb) {
tdb_allrecord_unlock(tdb, tdb->file->allrecord_lock.ltype);
}
for (i=0; i<tdb->file->num_lockrecs; i++) {
if (tdb->file->lockrecs[i].owner == tdb) {
tdb_nest_unlock(tdb,
tdb->file->lockrecs[i].off,
tdb->file->lockrecs[i].ltype);
i--;
}
}
}

661
lib/tdb2/open.c Normal file
View File

@ -0,0 +1,661 @@
#include "private.h"
#include <ccan/hash/hash.h>
#include <assert.h>
/* all lock info, to detect double-opens (fcntl file don't nest!) */
static struct tdb_file *files = NULL;
static struct tdb_file *find_file(dev_t device, ino_t ino)
{
struct tdb_file *i;
for (i = files; i; i = i->next) {
if (i->device == device && i->inode == ino) {
i->refcnt++;
break;
}
}
return i;
}
static bool read_all(int fd, void *buf, size_t len)
{
while (len) {
ssize_t ret;
ret = read(fd, buf, len);
if (ret < 0)
return false;
if (ret == 0) {
/* ETOOSHORT? */
errno = EWOULDBLOCK;
return false;
}
buf = (char *)buf + ret;
len -= ret;
}
return true;
}
static uint64_t random_number(struct tdb_context *tdb)
{
int fd;
uint64_t ret = 0;
struct timeval now;
fd = open("/dev/urandom", O_RDONLY);
if (fd >= 0) {
if (read_all(fd, &ret, sizeof(ret))) {
close(fd);
return ret;
}
close(fd);
}
/* FIXME: Untested! Based on Wikipedia protocol description! */
fd = open("/dev/egd-pool", O_RDWR);
if (fd >= 0) {
/* Command is 1, next byte is size we want to read. */
char cmd[2] = { 1, sizeof(uint64_t) };
if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) {
char reply[1 + sizeof(uint64_t)];
int r = read(fd, reply, sizeof(reply));
if (r > 1) {
/* Copy at least some bytes. */
memcpy(&ret, reply+1, r - 1);
if (reply[0] == sizeof(uint64_t)
&& r == sizeof(reply)) {
close(fd);
return ret;
}
}
}
close(fd);
}
/* Fallback: pid and time. */
gettimeofday(&now, NULL);
ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec;
tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
"tdb_open: random from getpid and time");
return ret;
}
struct new_database {
struct tdb_header hdr;
struct tdb_freetable ftable;
};
/* initialise a new database */
static enum TDB_ERROR tdb_new_database(struct tdb_context *tdb,
struct tdb_attribute_seed *seed,
struct tdb_header *hdr)
{
/* We make it up in memory, then write it out if not internal */
struct new_database newdb;
unsigned int magic_len;
ssize_t rlen;
enum TDB_ERROR ecode;
/* Fill in the header */
newdb.hdr.version = TDB_VERSION;
if (seed)
newdb.hdr.hash_seed = seed->seed;
else
newdb.hdr.hash_seed = random_number(tdb);
newdb.hdr.hash_test = TDB_HASH_MAGIC;
newdb.hdr.hash_test = tdb->hash_fn(&newdb.hdr.hash_test,
sizeof(newdb.hdr.hash_test),
newdb.hdr.hash_seed,
tdb->hash_data);
newdb.hdr.recovery = 0;
newdb.hdr.features_used = newdb.hdr.features_offered = TDB_FEATURE_MASK;
newdb.hdr.seqnum = 0;
memset(newdb.hdr.reserved, 0, sizeof(newdb.hdr.reserved));
/* Initial hashes are empty. */
memset(newdb.hdr.hashtable, 0, sizeof(newdb.hdr.hashtable));
/* Free is empty. */
newdb.hdr.free_table = offsetof(struct new_database, ftable);
memset(&newdb.ftable, 0, sizeof(newdb.ftable));
ecode = set_header(NULL, &newdb.ftable.hdr, TDB_FTABLE_MAGIC, 0,
sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr),
sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr),
0);
if (ecode != TDB_SUCCESS) {
return ecode;
}
/* Magic food */
memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food));
strcpy(newdb.hdr.magic_food, TDB_MAGIC_FOOD);
/* This creates an endian-converted database, as if read from disk */
magic_len = sizeof(newdb.hdr.magic_food);
tdb_convert(tdb,
(char *)&newdb.hdr + magic_len, sizeof(newdb) - magic_len);
*hdr = newdb.hdr;
if (tdb->flags & TDB_INTERNAL) {
tdb->file->map_size = sizeof(newdb);
tdb->file->map_ptr = malloc(tdb->file->map_size);
if (!tdb->file->map_ptr) {
return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
"tdb_new_database:"
" failed to allocate");
}
memcpy(tdb->file->map_ptr, &newdb, tdb->file->map_size);
return TDB_SUCCESS;
}
if (lseek(tdb->file->fd, 0, SEEK_SET) == -1) {
return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"tdb_new_database:"
" failed to seek: %s", strerror(errno));
}
if (ftruncate(tdb->file->fd, 0) == -1) {
return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"tdb_new_database:"
" failed to truncate: %s", strerror(errno));
}
rlen = write(tdb->file->fd, &newdb, sizeof(newdb));
if (rlen != sizeof(newdb)) {
if (rlen >= 0)
errno = ENOSPC;
return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"tdb_new_database: %zi writing header: %s",
rlen, strerror(errno));
}
return TDB_SUCCESS;
}
static enum TDB_ERROR tdb_new_file(struct tdb_context *tdb)
{
tdb->file = malloc(sizeof(*tdb->file));
if (!tdb->file)
return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
"tdb_open: cannot alloc tdb_file structure");
tdb->file->num_lockrecs = 0;
tdb->file->lockrecs = NULL;
tdb->file->allrecord_lock.count = 0;
tdb->file->refcnt = 1;
return TDB_SUCCESS;
}
enum TDB_ERROR tdb_set_attribute(struct tdb_context *tdb,
const union tdb_attribute *attr)
{
switch (attr->base.attr) {
case TDB_ATTRIBUTE_LOG:
tdb->log_fn = attr->log.fn;
tdb->log_data = attr->log.data;
break;
case TDB_ATTRIBUTE_HASH:
case TDB_ATTRIBUTE_SEED:
case TDB_ATTRIBUTE_OPENHOOK:
return tdb->last_error
= tdb_logerr(tdb, TDB_ERR_EINVAL,
TDB_LOG_USE_ERROR,
"tdb_set_attribute:"
" cannot set %s after opening",
attr->base.attr == TDB_ATTRIBUTE_HASH
? "TDB_ATTRIBUTE_HASH"
: attr->base.attr == TDB_ATTRIBUTE_SEED
? "TDB_ATTRIBUTE_SEED"
: "TDB_ATTRIBUTE_OPENHOOK");
case TDB_ATTRIBUTE_STATS:
return tdb->last_error
= tdb_logerr(tdb, TDB_ERR_EINVAL,
TDB_LOG_USE_ERROR,
"tdb_set_attribute:"
" cannot set TDB_ATTRIBUTE_STATS");
case TDB_ATTRIBUTE_FLOCK:
tdb->lock_fn = attr->flock.lock;
tdb->unlock_fn = attr->flock.unlock;
tdb->lock_data = attr->flock.data;
break;
default:
return tdb->last_error
= tdb_logerr(tdb, TDB_ERR_EINVAL,
TDB_LOG_USE_ERROR,
"tdb_set_attribute:"
" unknown attribute type %u",
attr->base.attr);
}
return TDB_SUCCESS;
}
static uint64_t jenkins_hash(const void *key, size_t length, uint64_t seed,
void *unused)
{
uint64_t ret;
/* hash64_stable assumes lower bits are more important; they are a
* slightly better hash. We use the upper bits first, so swap them. */
ret = hash64_stable((const unsigned char *)key, length, seed);
return (ret >> 32) | (ret << 32);
}
enum TDB_ERROR tdb_get_attribute(struct tdb_context *tdb,
union tdb_attribute *attr)
{
switch (attr->base.attr) {
case TDB_ATTRIBUTE_LOG:
if (!tdb->log_fn)
return tdb->last_error = TDB_ERR_NOEXIST;
attr->log.fn = tdb->log_fn;
attr->log.data = tdb->log_data;
break;
case TDB_ATTRIBUTE_HASH:
attr->hash.fn = tdb->hash_fn;
attr->hash.data = tdb->hash_data;
break;
case TDB_ATTRIBUTE_SEED:
attr->seed.seed = tdb->hash_seed;
break;
case TDB_ATTRIBUTE_OPENHOOK:
return tdb->last_error
= tdb_logerr(tdb, TDB_ERR_EINVAL,
TDB_LOG_USE_ERROR,
"tdb_get_attribute:"
" cannot get TDB_ATTRIBUTE_OPENHOOK");
case TDB_ATTRIBUTE_STATS: {
size_t size = attr->stats.size;
if (size > tdb->stats.size)
size = tdb->stats.size;
memcpy(&attr->stats, &tdb->stats, size);
break;
}
case TDB_ATTRIBUTE_FLOCK:
attr->flock.lock = tdb->lock_fn;
attr->flock.unlock = tdb->unlock_fn;
attr->flock.data = tdb->lock_data;
break;
default:
return tdb->last_error
= tdb_logerr(tdb, TDB_ERR_EINVAL,
TDB_LOG_USE_ERROR,
"tdb_get_attribute:"
" unknown attribute type %u",
attr->base.attr);
}
attr->base.next = NULL;
return TDB_SUCCESS;
}
void tdb_unset_attribute(struct tdb_context *tdb,
enum tdb_attribute_type type)
{
switch (type) {
case TDB_ATTRIBUTE_LOG:
tdb->log_fn = NULL;
break;
case TDB_ATTRIBUTE_HASH:
case TDB_ATTRIBUTE_SEED:
case TDB_ATTRIBUTE_OPENHOOK:
tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
"tdb_unset_attribute: cannot unset %s after opening",
type == TDB_ATTRIBUTE_HASH
? "TDB_ATTRIBUTE_HASH"
: type == TDB_ATTRIBUTE_SEED
? "TDB_ATTRIBUTE_SEED"
: "TDB_ATTRIBUTE_OPENHOOK");
break;
case TDB_ATTRIBUTE_STATS:
tdb_logerr(tdb, TDB_ERR_EINVAL,
TDB_LOG_USE_ERROR,
"tdb_unset_attribute:"
"cannot unset TDB_ATTRIBUTE_STATS");
break;
case TDB_ATTRIBUTE_FLOCK:
tdb->lock_fn = tdb_fcntl_lock;
tdb->unlock_fn = tdb_fcntl_unlock;
break;
default:
tdb_logerr(tdb, TDB_ERR_EINVAL,
TDB_LOG_USE_ERROR,
"tdb_unset_attribute: unknown attribute type %u",
type);
}
}
struct tdb_context *tdb_open(const char *name, int tdb_flags,
int open_flags, mode_t mode,
union tdb_attribute *attr)
{
struct tdb_context *tdb;
struct stat st;
int saved_errno = 0;
uint64_t hash_test;
unsigned v;
ssize_t rlen;
struct tdb_header hdr;
struct tdb_attribute_seed *seed = NULL;
struct tdb_attribute_openhook *openhook = NULL;
tdb_bool_err berr;
enum TDB_ERROR ecode;
int openlock;
tdb = malloc(sizeof(*tdb) + (name ? strlen(name) + 1 : 0));
if (!tdb) {
/* Can't log this */
errno = ENOMEM;
return NULL;
}
/* Set name immediately for logging functions. */
if (name) {
tdb->name = strcpy((char *)(tdb + 1), name);
} else {
tdb->name = NULL;
}
tdb->direct_access = 0;
tdb->flags = tdb_flags;
tdb->log_fn = NULL;
tdb->transaction = NULL;
tdb->access = NULL;
tdb->last_error = TDB_SUCCESS;
tdb->file = NULL;
tdb->lock_fn = tdb_fcntl_lock;
tdb->unlock_fn = tdb_fcntl_unlock;
tdb->hash_fn = jenkins_hash;
memset(&tdb->stats, 0, sizeof(tdb->stats));
tdb->stats.base.attr = TDB_ATTRIBUTE_STATS;
tdb->stats.size = sizeof(tdb->stats);
tdb_io_init(tdb);
while (attr) {
switch (attr->base.attr) {
case TDB_ATTRIBUTE_HASH:
tdb->hash_fn = attr->hash.fn;
tdb->hash_data = attr->hash.data;
break;
case TDB_ATTRIBUTE_SEED:
seed = &attr->seed;
break;
case TDB_ATTRIBUTE_OPENHOOK:
openhook = &attr->openhook;
break;
default:
/* These are set as normal. */
ecode = tdb_set_attribute(tdb, attr);
if (ecode != TDB_SUCCESS)
goto fail;
}
attr = attr->base.next;
}
if (tdb_flags & ~(TDB_INTERNAL | TDB_NOLOCK | TDB_NOMMAP | TDB_CONVERT
| TDB_NOSYNC | TDB_SEQNUM | TDB_ALLOW_NESTING)) {
ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
"tdb_open: unknown flags %u", tdb_flags);
goto fail;
}
if ((open_flags & O_ACCMODE) == O_WRONLY) {
ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
"tdb_open: can't open tdb %s write-only",
name);
goto fail;
}
if ((open_flags & O_ACCMODE) == O_RDONLY) {
tdb->read_only = true;
tdb->mmap_flags = PROT_READ;
openlock = F_RDLCK;
} else {
tdb->read_only = false;
tdb->mmap_flags = PROT_READ | PROT_WRITE;
openlock = F_WRLCK;
}
/* internal databases don't need any of the rest. */
if (tdb->flags & TDB_INTERNAL) {
tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP);
ecode = tdb_new_file(tdb);
if (ecode != TDB_SUCCESS) {
goto fail;
}
tdb->file->fd = -1;
ecode = tdb_new_database(tdb, seed, &hdr);
if (ecode != TDB_SUCCESS) {
goto fail;
}
tdb_convert(tdb, &hdr.hash_seed, sizeof(hdr.hash_seed));
tdb->hash_seed = hdr.hash_seed;
tdb_ftable_init(tdb);
return tdb;
}
if (stat(name, &st) != -1)
tdb->file = find_file(st.st_dev, st.st_ino);
if (!tdb->file) {
int fd;
if ((fd = open(name, open_flags, mode)) == -1) {
/* errno set by open(2) */
saved_errno = errno;
tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"tdb_open: could not open file %s: %s",
name, strerror(errno));
goto fail_errno;
}
/* on exec, don't inherit the fd */
v = fcntl(fd, F_GETFD, 0);
fcntl(fd, F_SETFD, v | FD_CLOEXEC);
if (fstat(fd, &st) == -1) {
saved_errno = errno;
tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"tdb_open: could not stat open %s: %s",
name, strerror(errno));
close(fd);
goto fail_errno;
}
ecode = tdb_new_file(tdb);
if (ecode != TDB_SUCCESS) {
close(fd);
goto fail;
}
tdb->file->next = files;
tdb->file->fd = fd;
tdb->file->device = st.st_dev;
tdb->file->inode = st.st_ino;
tdb->file->map_ptr = NULL;
tdb->file->map_size = sizeof(struct tdb_header);
}
/* ensure there is only one process initialising at once */
ecode = tdb_lock_open(tdb, openlock, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
if (ecode != TDB_SUCCESS) {
saved_errno = errno;
goto fail_errno;
}
/* call their open hook if they gave us one. */
if (openhook) {
ecode = openhook->fn(tdb->file->fd, openhook->data);
if (ecode != TDB_SUCCESS) {
tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
"tdb_open: open hook failed");
goto fail;
}
open_flags |= O_CREAT;
}
/* If they used O_TRUNC, read will return 0. */
rlen = pread(tdb->file->fd, &hdr, sizeof(hdr), 0);
if (rlen == 0 && (open_flags & O_CREAT)) {
ecode = tdb_new_database(tdb, seed, &hdr);
if (ecode != TDB_SUCCESS) {
goto fail;
}
} else if (rlen < 0) {
ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"tdb_open: error %s reading %s",
strerror(errno), name);
goto fail;
} else if (rlen < sizeof(hdr)
|| strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"tdb_open: %s is not a tdb file", name);
goto fail;
}
if (hdr.version != TDB_VERSION) {
if (hdr.version == bswap_64(TDB_VERSION))
tdb->flags |= TDB_CONVERT;
else {
/* wrong version */
ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"tdb_open:"
" %s is unknown version 0x%llx",
name, (long long)hdr.version);
goto fail;
}
}
tdb_convert(tdb, &hdr, sizeof(hdr));
tdb->hash_seed = hdr.hash_seed;
hash_test = TDB_HASH_MAGIC;
hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test));
if (hdr.hash_test != hash_test) {
/* wrong hash variant */
ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"tdb_open:"
" %s uses a different hash function",
name);
goto fail;
}
/* Clear any features we don't understand. */
if ((open_flags & O_ACCMODE) != O_RDONLY) {
hdr.features_used &= TDB_FEATURE_MASK;
if (tdb_write_convert(tdb, offsetof(struct tdb_header,
features_used),
&hdr.features_used,
sizeof(hdr.features_used)) == -1)
goto fail;
}
tdb_unlock_open(tdb, openlock);
/* This make sure we have current map_size and mmap. */
tdb->methods->oob(tdb, tdb->file->map_size + 1, true);
/* Now it's fully formed, recover if necessary. */
berr = tdb_needs_recovery(tdb);
if (unlikely(berr != false)) {
if (berr < 0) {
ecode = berr;
goto fail;
}
ecode = tdb_lock_and_recover(tdb);
if (ecode != TDB_SUCCESS) {
goto fail;
}
}
ecode = tdb_ftable_init(tdb);
if (ecode != TDB_SUCCESS) {
goto fail;
}
/* Add to linked list if we're new. */
if (tdb->file->refcnt == 1)
files = tdb->file;
return tdb;
fail:
/* Map ecode to some logical errno. */
switch (ecode) {
case TDB_ERR_CORRUPT:
case TDB_ERR_IO:
saved_errno = EIO;
break;
case TDB_ERR_LOCK:
saved_errno = EWOULDBLOCK;
break;
case TDB_ERR_OOM:
saved_errno = ENOMEM;
break;
case TDB_ERR_EINVAL:
saved_errno = EINVAL;
break;
default:
saved_errno = EINVAL;
break;
}
fail_errno:
#ifdef TDB_TRACE
close(tdb->tracefd);
#endif
if (tdb->file) {
tdb_lock_cleanup(tdb);
if (--tdb->file->refcnt == 0) {
assert(tdb->file->num_lockrecs == 0);
if (tdb->file->map_ptr) {
if (tdb->flags & TDB_INTERNAL) {
free(tdb->file->map_ptr);
} else
tdb_munmap(tdb->file);
}
if (close(tdb->file->fd) != 0)
tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
"tdb_open: failed to close tdb fd"
" on error: %s", strerror(errno));
free(tdb->file->lockrecs);
free(tdb->file);
}
}
free(tdb);
errno = saved_errno;
return NULL;
}
int tdb_close(struct tdb_context *tdb)
{
int ret = 0;
tdb_trace(tdb, "tdb_close");
if (tdb->transaction) {
tdb_transaction_cancel(tdb);
}
if (tdb->file->map_ptr) {
if (tdb->flags & TDB_INTERNAL)
free(tdb->file->map_ptr);
else
tdb_munmap(tdb->file);
}
if (tdb->file) {
struct tdb_file **i;
tdb_lock_cleanup(tdb);
if (--tdb->file->refcnt == 0) {
ret = close(tdb->file->fd);
/* Remove from files list */
for (i = &files; *i; i = &(*i)->next) {
if (*i == tdb->file) {
*i = tdb->file->next;
break;
}
}
free(tdb->file->lockrecs);
free(tdb->file);
}
}
#ifdef TDB_TRACE
close(tdb->tracefd);
#endif
free(tdb);
return ret;
}

614
lib/tdb2/private.h Normal file
View File

@ -0,0 +1,614 @@
#ifndef TDB_PRIVATE_H
#define TDB_PRIVATE_H
/*
Trivial Database 2: private types and prototypes
Copyright (C) Rusty Russell 2010
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "config.h"
#if HAVE_FILE_OFFSET_BITS
#define _FILE_OFFSET_BITS 64
#endif
#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stddef.h>
#include <sys/time.h>
#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include <utime.h>
#include <unistd.h>
#include <ccan/tdb2/tdb2.h>
#include <ccan/likely/likely.h>
#include <ccan/compiler/compiler.h>
#include <ccan/endian/endian.h>
#ifndef TEST_IT
#define TEST_IT(cond)
#endif
/* #define TDB_TRACE 1 */
#ifndef __STRING
#define __STRING(x) #x
#endif
#ifndef __STRINGSTRING
#define __STRINGSTRING(x) __STRING(x)
#endif
#ifndef __location__
#define __location__ __FILE__ ":" __STRINGSTRING(__LINE__)
#endif
typedef uint64_t tdb_len_t;
typedef uint64_t tdb_off_t;
#define TDB_MAGIC_FOOD "TDB file\n"
#define TDB_VERSION ((uint64_t)(0x26011967 + 7))
#define TDB_USED_MAGIC ((uint64_t)0x1999)
#define TDB_HTABLE_MAGIC ((uint64_t)0x1888)
#define TDB_CHAIN_MAGIC ((uint64_t)0x1777)
#define TDB_FTABLE_MAGIC ((uint64_t)0x1666)
#define TDB_FREE_MAGIC ((uint64_t)0xFE)
#define TDB_HASH_MAGIC (0xA1ABE11A01092008ULL)
#define TDB_RECOVERY_MAGIC (0xf53bc0e7ad124589ULL)
#define TDB_RECOVERY_INVALID_MAGIC (0x0ULL)
#define TDB_OFF_IS_ERR(off) unlikely(off >= (tdb_off_t)TDB_ERR_LAST)
/* Packing errors into pointers and v.v. */
#define TDB_PTR_IS_ERR(ptr) \
unlikely((unsigned long)(ptr) >= (unsigned long)TDB_ERR_LAST)
#define TDB_PTR_ERR(p) ((enum TDB_ERROR)(long)(p))
#define TDB_ERR_PTR(err) ((void *)(long)(err))
/* Common case of returning true, false or -ve error. */
typedef int tdb_bool_err;
/* Prevent others from opening the file. */
#define TDB_OPEN_LOCK 0
/* Doing a transaction. */
#define TDB_TRANSACTION_LOCK 1
/* Expanding file. */
#define TDB_EXPANSION_LOCK 2
/* Hash chain locks. */
#define TDB_HASH_LOCK_START 64
/* Range for hash locks. */
#define TDB_HASH_LOCK_RANGE_BITS 30
#define TDB_HASH_LOCK_RANGE (1 << TDB_HASH_LOCK_RANGE_BITS)
/* We have 1024 entries in the top level. */
#define TDB_TOPLEVEL_HASH_BITS 10
/* And 64 entries in each sub-level: thus 64 bits exactly after 9 levels. */
#define TDB_SUBLEVEL_HASH_BITS 6
/* And 8 entries in each group, ie 8 groups per sublevel. */
#define TDB_HASH_GROUP_BITS 3
/* This is currently 10: beyond this we chain. */
#define TDB_MAX_LEVELS (1+(64-TDB_TOPLEVEL_HASH_BITS) / TDB_SUBLEVEL_HASH_BITS)
/* Extend file by least 100 times larger than needed. */
#define TDB_EXTENSION_FACTOR 100
/* We steal bits from the offsets to store hash info. */
#define TDB_OFF_HASH_GROUP_MASK ((1ULL << TDB_HASH_GROUP_BITS) - 1)
/* We steal this many upper bits, giving a maximum offset of 64 exabytes. */
#define TDB_OFF_UPPER_STEAL 8
#define TDB_OFF_UPPER_STEAL_EXTRA 7
/* The bit number where we store extra hash bits. */
#define TDB_OFF_HASH_EXTRA_BIT 57
#define TDB_OFF_UPPER_STEAL_SUBHASH_BIT 56
/* Additional features we understand. Currently: none. */
#define TDB_FEATURE_MASK ((uint64_t)0)
/* The bit number where we store the extra hash bits. */
/* Convenience mask to get actual offset. */
#define TDB_OFF_MASK \
(((1ULL << (64 - TDB_OFF_UPPER_STEAL)) - 1) - TDB_OFF_HASH_GROUP_MASK)
/* How many buckets in a free list: see size_to_bucket(). */
#define TDB_FREE_BUCKETS (64 - TDB_OFF_UPPER_STEAL)
/* We have to be able to fit a free record here. */
#define TDB_MIN_DATA_LEN \
(sizeof(struct tdb_free_record) - sizeof(struct tdb_used_record))
/* Indicates this entry is not on an flist (can happen during coalescing) */
#define TDB_FTABLE_NONE ((1ULL << TDB_OFF_UPPER_STEAL) - 1)
struct tdb_used_record {
/* For on-disk compatibility, we avoid bitfields:
magic: 16, (highest)
key_len_bits: 5,
extra_padding: 32
hash_bits: 11
*/
uint64_t magic_and_meta;
/* The bottom key_len_bits*2 are key length, rest is data length. */
uint64_t key_and_data_len;
};
static inline unsigned rec_key_bits(const struct tdb_used_record *r)
{
return ((r->magic_and_meta >> 43) & ((1 << 5)-1)) * 2;
}
static inline uint64_t rec_key_length(const struct tdb_used_record *r)
{
return r->key_and_data_len & ((1ULL << rec_key_bits(r)) - 1);
}
static inline uint64_t rec_data_length(const struct tdb_used_record *r)
{
return r->key_and_data_len >> rec_key_bits(r);
}
static inline uint64_t rec_extra_padding(const struct tdb_used_record *r)
{
return (r->magic_and_meta >> 11) & 0xFFFFFFFF;
}
static inline uint32_t rec_hash(const struct tdb_used_record *r)
{
return r->magic_and_meta & ((1 << 11) - 1);
}
static inline uint16_t rec_magic(const struct tdb_used_record *r)
{
return (r->magic_and_meta >> 48);
}
struct tdb_free_record {
uint64_t magic_and_prev; /* TDB_OFF_UPPER_STEAL bits magic, then prev */
uint64_t ftable_and_len; /* Len not counting these two fields. */
/* This is why the minimum record size is 8 bytes. */
uint64_t next;
};
static inline uint64_t frec_prev(const struct tdb_free_record *f)
{
return f->magic_and_prev & ((1ULL << (64 - TDB_OFF_UPPER_STEAL)) - 1);
}
static inline uint64_t frec_magic(const struct tdb_free_record *f)
{
return f->magic_and_prev >> (64 - TDB_OFF_UPPER_STEAL);
}
static inline uint64_t frec_len(const struct tdb_free_record *f)
{
return f->ftable_and_len & ((1ULL << (64 - TDB_OFF_UPPER_STEAL))-1);
}
static inline unsigned frec_ftable(const struct tdb_free_record *f)
{
return f->ftable_and_len >> (64 - TDB_OFF_UPPER_STEAL);
}
struct tdb_recovery_record {
uint64_t magic;
/* Length of record (add this header to get total length). */
uint64_t max_len;
/* Length used. */
uint64_t len;
/* Old length of file before transaction. */
uint64_t eof;
};
/* If we bottom out of the subhashes, we chain. */
struct tdb_chain {
tdb_off_t rec[1 << TDB_HASH_GROUP_BITS];
tdb_off_t next;
};
/* this is stored at the front of every database */
struct tdb_header {
char magic_food[64]; /* for /etc/magic */
/* FIXME: Make me 32 bit? */
uint64_t version; /* version of the code */
uint64_t hash_test; /* result of hashing HASH_MAGIC. */
uint64_t hash_seed; /* "random" seed written at creation time. */
tdb_off_t free_table; /* (First) free table. */
tdb_off_t recovery; /* Transaction recovery area. */
uint64_t features_used; /* Features all writers understand */
uint64_t features_offered; /* Features offered */
uint64_t seqnum; /* Sequence number for TDB_SEQNUM */
tdb_off_t reserved[23];
/* Top level hash table. */
tdb_off_t hashtable[1ULL << TDB_TOPLEVEL_HASH_BITS];
};
struct tdb_freetable {
struct tdb_used_record hdr;
tdb_off_t next;
tdb_off_t buckets[TDB_FREE_BUCKETS];
};
/* Information about a particular (locked) hash entry. */
struct hash_info {
/* Full hash value of entry. */
uint64_t h;
/* Start and length of lock acquired. */
tdb_off_t hlock_start;
tdb_len_t hlock_range;
/* Start of hash group. */
tdb_off_t group_start;
/* Bucket we belong in. */
unsigned int home_bucket;
/* Bucket we (or an empty space) were found in. */
unsigned int found_bucket;
/* How many bits of the hash are already used. */
unsigned int hash_used;
/* Current working group. */
tdb_off_t group[1 << TDB_HASH_GROUP_BITS];
};
struct traverse_info {
struct traverse_level {
tdb_off_t hashtable;
/* We ignore groups here, and treat it as a big array. */
unsigned entry;
unsigned int total_buckets;
} levels[TDB_MAX_LEVELS + 1];
unsigned int num_levels;
unsigned int toplevel_group;
/* This makes delete-everything-inside-traverse work as expected. */
tdb_off_t prev;
};
enum tdb_lock_flags {
/* WAIT == F_SETLKW, NOWAIT == F_SETLK */
TDB_LOCK_NOWAIT = 0,
TDB_LOCK_WAIT = 1,
/* If set, don't log an error on failure. */
TDB_LOCK_PROBE = 2,
/* If set, don't check for recovery (used by recovery code). */
TDB_LOCK_NOCHECK = 4,
};
struct tdb_lock {
struct tdb_context *owner;
uint32_t off;
uint32_t count;
uint32_t ltype;
};
/* This is only needed for tdb_access_commit, but used everywhere to
* simplify. */
struct tdb_access_hdr {
struct tdb_access_hdr *next;
tdb_off_t off;
tdb_len_t len;
bool convert;
};
struct tdb_file {
/* Single list of all TDBs, to detect multiple opens. */
struct tdb_file *next;
/* How many are sharing us? */
unsigned int refcnt;
/* Mmap (if any), or malloc (for TDB_INTERNAL). */
void *map_ptr;
/* How much space has been mapped (<= current file size) */
tdb_len_t map_size;
/* The file descriptor (-1 for TDB_INTERNAL). */
int fd;
/* Lock information */
pid_t locker;
struct tdb_lock allrecord_lock;
size_t num_lockrecs;
struct tdb_lock *lockrecs;
/* Identity of this file. */
dev_t device;
ino_t inode;
};
struct tdb_context {
/* Filename of the database. */
const char *name;
/* Are we accessing directly? (debugging check). */
int direct_access;
/* Operating read-only? (Opened O_RDONLY, or in traverse_read) */
bool read_only;
/* mmap read only? */
int mmap_flags;
/* the flags passed to tdb_open, for tdb_reopen. */
uint32_t flags;
/* Logging function */
void (*log_fn)(struct tdb_context *tdb,
enum tdb_log_level level,
const char *message,
void *data);
void *log_data;
/* Hash function. */
uint64_t (*hash_fn)(const void *key, size_t len, uint64_t seed, void *);
void *hash_data;
uint64_t hash_seed;
/* low level (fnctl) lock functions. */
int (*lock_fn)(int fd, int rw, off_t off, off_t len, bool w, void *);
int (*unlock_fn)(int fd, int rw, off_t off, off_t len, void *);
void *lock_data;
/* Set if we are in a transaction. */
struct tdb_transaction *transaction;
/* What free table are we using? */
tdb_off_t ftable_off;
unsigned int ftable;
/* IO methods: changes for transactions. */
const struct tdb_methods *methods;
/* Our statistics. */
struct tdb_attribute_stats stats;
/* Direct access information */
struct tdb_access_hdr *access;
/* Last error we returned. */
enum TDB_ERROR last_error;
/* The actual file information */
struct tdb_file *file;
};
struct tdb_methods {
enum TDB_ERROR (*tread)(struct tdb_context *, tdb_off_t, void *,
tdb_len_t);
enum TDB_ERROR (*twrite)(struct tdb_context *, tdb_off_t, const void *,
tdb_len_t);
enum TDB_ERROR (*oob)(struct tdb_context *, tdb_off_t, bool);
enum TDB_ERROR (*expand_file)(struct tdb_context *, tdb_len_t);
void *(*direct)(struct tdb_context *, tdb_off_t, size_t, bool);
};
/*
internal prototypes
*/
/* hash.c: */
tdb_bool_err first_in_hash(struct tdb_context *tdb,
struct traverse_info *tinfo,
TDB_DATA *kbuf, size_t *dlen);
tdb_bool_err next_in_hash(struct tdb_context *tdb,
struct traverse_info *tinfo,
TDB_DATA *kbuf, size_t *dlen);
/* Hash random memory. */
uint64_t tdb_hash(struct tdb_context *tdb, const void *ptr, size_t len);
/* Hash on disk. */
uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off);
/* Find and lock a hash entry (or where it would be). */
tdb_off_t find_and_lock(struct tdb_context *tdb,
struct tdb_data key,
int ltype,
struct hash_info *h,
struct tdb_used_record *rec,
struct traverse_info *tinfo);
enum TDB_ERROR replace_in_hash(struct tdb_context *tdb,
struct hash_info *h,
tdb_off_t new_off);
enum TDB_ERROR add_to_hash(struct tdb_context *tdb, struct hash_info *h,
tdb_off_t new_off);
enum TDB_ERROR delete_from_hash(struct tdb_context *tdb, struct hash_info *h);
/* For tdb_check */
bool is_subhash(tdb_off_t val);
/* free.c: */
enum TDB_ERROR tdb_ftable_init(struct tdb_context *tdb);
/* check.c needs these to iterate through free lists. */
tdb_off_t first_ftable(struct tdb_context *tdb);
tdb_off_t next_ftable(struct tdb_context *tdb, tdb_off_t ftable);
/* This returns space or -ve error number. */
tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen,
uint64_t hash, unsigned magic, bool growing);
/* Put this record in a free list. */
enum TDB_ERROR add_free_record(struct tdb_context *tdb,
tdb_off_t off, tdb_len_t len_with_header,
enum tdb_lock_flags waitflag,
bool coalesce_ok);
/* Set up header for a used/ftable/htable/chain record. */
enum TDB_ERROR set_header(struct tdb_context *tdb,
struct tdb_used_record *rec,
unsigned magic, uint64_t keylen, uint64_t datalen,
uint64_t actuallen, unsigned hashlow);
/* Used by tdb_check to verify. */
unsigned int size_to_bucket(tdb_len_t data_len);
tdb_off_t bucket_off(tdb_off_t ftable_off, unsigned bucket);
/* Used by tdb_summary */
tdb_off_t dead_space(struct tdb_context *tdb, tdb_off_t off);
/* io.c: */
/* Initialize tdb->methods. */
void tdb_io_init(struct tdb_context *tdb);
/* Convert endian of the buffer if required. */
void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size);
/* Unmap and try to map the tdb. */
void tdb_munmap(struct tdb_file *file);
void tdb_mmap(struct tdb_context *tdb);
/* Either alloc a copy, or give direct access. Release frees or noop. */
const void *tdb_access_read(struct tdb_context *tdb,
tdb_off_t off, tdb_len_t len, bool convert);
void *tdb_access_write(struct tdb_context *tdb,
tdb_off_t off, tdb_len_t len, bool convert);
/* Release result of tdb_access_read/write. */
void tdb_access_release(struct tdb_context *tdb, const void *p);
/* Commit result of tdb_acces_write. */
enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p);
/* Convenience routine to get an offset. */
tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off);
/* Write an offset at an offset. */
enum TDB_ERROR tdb_write_off(struct tdb_context *tdb, tdb_off_t off,
tdb_off_t val);
/* Clear an ondisk area. */
enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len);
/* Return a non-zero offset between >= start < end in this array (or end). */
tdb_off_t tdb_find_nonzero_off(struct tdb_context *tdb,
tdb_off_t base,
uint64_t start,
uint64_t end);
/* Return a zero offset in this array, or num. */
tdb_off_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
uint64_t num);
/* Allocate and make a copy of some offset. */
void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len);
/* Writes a converted copy of a record. */
enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
const void *rec, size_t len);
/* Reads record and converts it */
enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
void *rec, size_t len);
/* Bump the seqnum (caller checks for tdb->flags & TDB_SEQNUM) */
void tdb_inc_seqnum(struct tdb_context *tdb);
/* lock.c: */
/* Lock/unlock a range of hashes. */
enum TDB_ERROR tdb_lock_hashes(struct tdb_context *tdb,
tdb_off_t hash_lock, tdb_len_t hash_range,
int ltype, enum tdb_lock_flags waitflag);
enum TDB_ERROR tdb_unlock_hashes(struct tdb_context *tdb,
tdb_off_t hash_lock,
tdb_len_t hash_range, int ltype);
/* For closing the file. */
void tdb_lock_cleanup(struct tdb_context *tdb);
/* Lock/unlock a particular free bucket. */
enum TDB_ERROR tdb_lock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off,
enum tdb_lock_flags waitflag);
void tdb_unlock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off);
/* Serialize transaction start. */
enum TDB_ERROR tdb_transaction_lock(struct tdb_context *tdb, int ltype);
void tdb_transaction_unlock(struct tdb_context *tdb, int ltype);
/* Do we have any hash locks (ie. via tdb_chainlock) ? */
bool tdb_has_hash_locks(struct tdb_context *tdb);
/* Lock entire database. */
enum TDB_ERROR tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
enum tdb_lock_flags flags, bool upgradable);
void tdb_allrecord_unlock(struct tdb_context *tdb, int ltype);
enum TDB_ERROR tdb_allrecord_upgrade(struct tdb_context *tdb);
/* Serialize db open. */
enum TDB_ERROR tdb_lock_open(struct tdb_context *tdb,
int ltype, enum tdb_lock_flags flags);
void tdb_unlock_open(struct tdb_context *tdb, int ltype);
bool tdb_has_open_lock(struct tdb_context *tdb);
/* Serialize db expand. */
enum TDB_ERROR tdb_lock_expand(struct tdb_context *tdb, int ltype);
void tdb_unlock_expand(struct tdb_context *tdb, int ltype);
bool tdb_has_expansion_lock(struct tdb_context *tdb);
/* If it needs recovery, grab all the locks and do it. */
enum TDB_ERROR tdb_lock_and_recover(struct tdb_context *tdb);
/* Default lock and unlock functions. */
int tdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag, void *);
int tdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *);
/* transaction.c: */
enum TDB_ERROR tdb_transaction_recover(struct tdb_context *tdb);
tdb_bool_err tdb_needs_recovery(struct tdb_context *tdb);
/* tdb.c: */
enum TDB_ERROR COLD tdb_logerr(struct tdb_context *tdb,
enum TDB_ERROR ecode,
enum tdb_log_level level,
const char *fmt, ...);
#ifdef TDB_TRACE
void tdb_trace(struct tdb_context *tdb, const char *op);
void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op);
void tdb_trace_open(struct tdb_context *tdb, const char *op,
unsigned hash_size, unsigned tdb_flags, unsigned open_flags);
void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret);
void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret);
void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
TDB_DATA rec);
void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
TDB_DATA rec, int ret);
void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
TDB_DATA rec, TDB_DATA ret);
void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
int ret);
void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret);
#else
#define tdb_trace(tdb, op)
#define tdb_trace_seqnum(tdb, seqnum, op)
#define tdb_trace_open(tdb, op, hash_size, tdb_flags, open_flags)
#define tdb_trace_ret(tdb, op, ret)
#define tdb_trace_retrec(tdb, op, ret)
#define tdb_trace_1rec(tdb, op, rec)
#define tdb_trace_1rec_ret(tdb, op, rec, ret)
#define tdb_trace_1rec_retrec(tdb, op, rec, ret)
#define tdb_trace_2rec_flag_ret(tdb, op, rec1, rec2, flag, ret)
#define tdb_trace_2rec_retrec(tdb, op, rec1, rec2, ret)
#endif /* !TDB_TRACE */
#endif

282
lib/tdb2/summary.c Normal file
View File

@ -0,0 +1,282 @@
/*
Trivial Database 2: human-readable summary code
Copyright (C) Rusty Russell 2010
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "private.h"
#include <assert.h>
#include <ccan/tally/tally.h>
static tdb_off_t count_hash(struct tdb_context *tdb,
tdb_off_t hash_off, unsigned bits)
{
const tdb_off_t *h;
tdb_off_t count = 0;
unsigned int i;
h = tdb_access_read(tdb, hash_off, sizeof(*h) << bits, true);
if (TDB_PTR_IS_ERR(h)) {
return TDB_PTR_ERR(h);
}
for (i = 0; i < (1 << bits); i++)
count += (h[i] != 0);
tdb_access_release(tdb, h);
return count;
}
static enum TDB_ERROR summarize(struct tdb_context *tdb,
struct tally *hashes,
struct tally *ftables,
struct tally *fr,
struct tally *keys,
struct tally *data,
struct tally *extra,
struct tally *uncoal,
struct tally *chains)
{
tdb_off_t off;
tdb_len_t len;
tdb_len_t unc = 0;
for (off = sizeof(struct tdb_header);
off < tdb->file->map_size;
off += len) {
const union {
struct tdb_used_record u;
struct tdb_free_record f;
struct tdb_recovery_record r;
} *p;
/* We might not be able to get the whole thing. */
p = tdb_access_read(tdb, off, sizeof(p->f), true);
if (TDB_PTR_IS_ERR(p)) {
return TDB_PTR_ERR(p);
}
if (frec_magic(&p->f) != TDB_FREE_MAGIC) {
if (unc > 1) {
tally_add(uncoal, unc);
unc = 0;
}
}
if (p->r.magic == TDB_RECOVERY_INVALID_MAGIC
|| p->r.magic == TDB_RECOVERY_MAGIC) {
len = sizeof(p->r) + p->r.max_len;
} else if (frec_magic(&p->f) == TDB_FREE_MAGIC) {
len = frec_len(&p->f);
tally_add(fr, len);
len += sizeof(p->u);
unc++;
} else if (rec_magic(&p->u) == TDB_USED_MAGIC) {
len = sizeof(p->u)
+ rec_key_length(&p->u)
+ rec_data_length(&p->u)
+ rec_extra_padding(&p->u);
tally_add(keys, rec_key_length(&p->u));
tally_add(data, rec_data_length(&p->u));
tally_add(extra, rec_extra_padding(&p->u));
} else if (rec_magic(&p->u) == TDB_HTABLE_MAGIC) {
tdb_off_t count = count_hash(tdb,
off + sizeof(p->u),
TDB_SUBLEVEL_HASH_BITS);
if (TDB_OFF_IS_ERR(count)) {
return count;
}
tally_add(hashes, count);
tally_add(extra, rec_extra_padding(&p->u));
len = sizeof(p->u)
+ rec_data_length(&p->u)
+ rec_extra_padding(&p->u);
} else if (rec_magic(&p->u) == TDB_FTABLE_MAGIC) {
len = sizeof(p->u)
+ rec_data_length(&p->u)
+ rec_extra_padding(&p->u);
tally_add(ftables, rec_data_length(&p->u));
tally_add(extra, rec_extra_padding(&p->u));
} else if (rec_magic(&p->u) == TDB_CHAIN_MAGIC) {
len = sizeof(p->u)
+ rec_data_length(&p->u)
+ rec_extra_padding(&p->u);
tally_add(chains, 1);
tally_add(extra, rec_extra_padding(&p->u));
} else {
len = dead_space(tdb, off);
if (TDB_OFF_IS_ERR(len)) {
return len;
}
}
tdb_access_release(tdb, p);
}
if (unc)
tally_add(uncoal, unc);
return TDB_SUCCESS;
}
#define SUMMARY_FORMAT \
"Size of file/data: %zu/%zu\n" \
"Number of records: %zu\n" \
"Smallest/average/largest keys: %zu/%zu/%zu\n%s" \
"Smallest/average/largest data: %zu/%zu/%zu\n%s" \
"Smallest/average/largest padding: %zu/%zu/%zu\n%s" \
"Number of free records: %zu\n" \
"Smallest/average/largest free records: %zu/%zu/%zu\n%s" \
"Number of uncoalesced records: %zu\n" \
"Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \
"Toplevel hash used: %u of %u\n" \
"Number of chains: %zu\n" \
"Number of subhashes: %zu\n" \
"Smallest/average/largest subhash entries: %zu/%zu/%zu\n%s" \
"Percentage keys/data/padding/free/rechdrs/freehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n"
#define BUCKET_SUMMARY_FORMAT_A \
"Free bucket %zu: total entries %zu.\n" \
"Smallest/average/largest length: %zu/%zu/%zu\n%s"
#define BUCKET_SUMMARY_FORMAT_B \
"Free bucket %zu-%zu: total entries %zu.\n" \
"Smallest/average/largest length: %zu/%zu/%zu\n%s"
#define HISTO_WIDTH 70
#define HISTO_HEIGHT 20
enum TDB_ERROR tdb_summary(struct tdb_context *tdb,
enum tdb_summary_flags flags,
char **summary)
{
tdb_len_t len;
struct tally *ftables, *hashes, *freet, *keys, *data, *extra, *uncoal,
*chains;
char *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg;
enum TDB_ERROR ecode;
hashesg = freeg = keysg = datag = extrag = uncoalg = NULL;
ecode = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
if (ecode != TDB_SUCCESS) {
return tdb->last_error = ecode;
}
ecode = tdb_lock_expand(tdb, F_RDLCK);
if (ecode != TDB_SUCCESS) {
tdb_allrecord_unlock(tdb, F_RDLCK);
return tdb->last_error = ecode;
}
/* Start stats off empty. */
ftables = tally_new(HISTO_HEIGHT);
hashes = tally_new(HISTO_HEIGHT);
freet = tally_new(HISTO_HEIGHT);
keys = tally_new(HISTO_HEIGHT);
data = tally_new(HISTO_HEIGHT);
extra = tally_new(HISTO_HEIGHT);
uncoal = tally_new(HISTO_HEIGHT);
chains = tally_new(HISTO_HEIGHT);
if (!ftables || !hashes || !freet || !keys || !data || !extra
|| !uncoal || !chains) {
ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
"tdb_summary: failed to allocate"
" tally structures");
goto unlock;
}
ecode = summarize(tdb, hashes, ftables, freet, keys, data, extra,
uncoal, chains);
if (ecode != TDB_SUCCESS) {
goto unlock;
}
if (flags & TDB_SUMMARY_HISTOGRAMS) {
hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT);
freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT);
keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT);
datag = tally_histogram(data, HISTO_WIDTH, HISTO_HEIGHT);
extrag = tally_histogram(extra, HISTO_WIDTH, HISTO_HEIGHT);
uncoalg = tally_histogram(uncoal, HISTO_WIDTH, HISTO_HEIGHT);
}
/* 20 is max length of a %llu. */
len = strlen(SUMMARY_FORMAT) + 33*20 + 1
+ (hashesg ? strlen(hashesg) : 0)
+ (freeg ? strlen(freeg) : 0)
+ (keysg ? strlen(keysg) : 0)
+ (datag ? strlen(datag) : 0)
+ (extrag ? strlen(extrag) : 0)
+ (uncoalg ? strlen(uncoalg) : 0);
*summary = malloc(len);
if (!*summary) {
ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
"tdb_summary: failed to allocate string");
goto unlock;
}
sprintf(*summary, SUMMARY_FORMAT,
(size_t)tdb->file->map_size,
tally_total(keys, NULL) + tally_total(data, NULL),
tally_num(keys),
tally_min(keys), tally_mean(keys), tally_max(keys),
keysg ? keysg : "",
tally_min(data), tally_mean(data), tally_max(data),
datag ? datag : "",
tally_min(extra), tally_mean(extra), tally_max(extra),
extrag ? extrag : "",
tally_num(freet),
tally_min(freet), tally_mean(freet), tally_max(freet),
freeg ? freeg : "",
tally_total(uncoal, NULL),
tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal),
uncoalg ? uncoalg : "",
(unsigned)count_hash(tdb, offsetof(struct tdb_header,
hashtable),
TDB_TOPLEVEL_HASH_BITS),
1 << TDB_TOPLEVEL_HASH_BITS,
tally_num(chains),
tally_num(hashes),
tally_min(hashes), tally_mean(hashes), tally_max(hashes),
hashesg ? hashesg : "",
tally_total(keys, NULL) * 100.0 / tdb->file->map_size,
tally_total(data, NULL) * 100.0 / tdb->file->map_size,
tally_total(extra, NULL) * 100.0 / tdb->file->map_size,
tally_total(freet, NULL) * 100.0 / tdb->file->map_size,
(tally_num(keys) + tally_num(freet) + tally_num(hashes))
* sizeof(struct tdb_used_record) * 100.0 / tdb->file->map_size,
tally_num(ftables) * sizeof(struct tdb_freetable)
* 100.0 / tdb->file->map_size,
(tally_num(hashes)
* (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
+ (sizeof(tdb_off_t) << TDB_TOPLEVEL_HASH_BITS)
+ sizeof(struct tdb_chain) * tally_num(chains))
* 100.0 / tdb->file->map_size);
unlock:
free(hashesg);
free(freeg);
free(keysg);
free(datag);
free(extrag);
free(uncoalg);
free(hashes);
free(freet);
free(keys);
free(data);
free(extra);
free(uncoal);
free(ftables);
free(chains);
tdb_allrecord_unlock(tdb, F_RDLCK);
tdb_unlock_expand(tdb, F_RDLCK);
return tdb->last_error = ecode;
}

484
lib/tdb2/tdb.c Normal file
View File

@ -0,0 +1,484 @@
#include "private.h"
#include <ccan/asprintf/asprintf.h>
#include <stdarg.h>
static enum TDB_ERROR update_rec_hdr(struct tdb_context *tdb,
tdb_off_t off,
tdb_len_t keylen,
tdb_len_t datalen,
struct tdb_used_record *rec,
uint64_t h)
{
uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
enum TDB_ERROR ecode;
ecode = set_header(tdb, rec, TDB_USED_MAGIC, keylen, datalen,
keylen + dataroom, h);
if (ecode == TDB_SUCCESS) {
ecode = tdb_write_convert(tdb, off, rec, sizeof(*rec));
}
return ecode;
}
static enum TDB_ERROR replace_data(struct tdb_context *tdb,
struct hash_info *h,
struct tdb_data key, struct tdb_data dbuf,
tdb_off_t old_off, tdb_len_t old_room,
bool growing)
{
tdb_off_t new_off;
enum TDB_ERROR ecode;
/* Allocate a new record. */
new_off = alloc(tdb, key.dsize, dbuf.dsize, h->h, TDB_USED_MAGIC,
growing);
if (TDB_OFF_IS_ERR(new_off)) {
return new_off;
}
/* We didn't like the existing one: remove it. */
if (old_off) {
tdb->stats.frees++;
ecode = add_free_record(tdb, old_off,
sizeof(struct tdb_used_record)
+ key.dsize + old_room,
TDB_LOCK_WAIT, true);
if (ecode == TDB_SUCCESS)
ecode = replace_in_hash(tdb, h, new_off);
} else {
ecode = add_to_hash(tdb, h, new_off);
}
if (ecode != TDB_SUCCESS) {
return ecode;
}
new_off += sizeof(struct tdb_used_record);
ecode = tdb->methods->twrite(tdb, new_off, key.dptr, key.dsize);
if (ecode != TDB_SUCCESS) {
return ecode;
}
new_off += key.dsize;
ecode = tdb->methods->twrite(tdb, new_off, dbuf.dptr, dbuf.dsize);
if (ecode != TDB_SUCCESS) {
return ecode;
}
if (tdb->flags & TDB_SEQNUM)
tdb_inc_seqnum(tdb);
return TDB_SUCCESS;
}
static enum TDB_ERROR update_data(struct tdb_context *tdb,
tdb_off_t off,
struct tdb_data dbuf,
tdb_len_t extra)
{
enum TDB_ERROR ecode;
ecode = tdb->methods->twrite(tdb, off, dbuf.dptr, dbuf.dsize);
if (ecode == TDB_SUCCESS && extra) {
/* Put a zero in; future versions may append other data. */
ecode = tdb->methods->twrite(tdb, off + dbuf.dsize, "", 1);
}
if (tdb->flags & TDB_SEQNUM)
tdb_inc_seqnum(tdb);
return ecode;
}
enum TDB_ERROR tdb_store(struct tdb_context *tdb,
struct tdb_data key, struct tdb_data dbuf, int flag)
{
struct hash_info h;
tdb_off_t off;
tdb_len_t old_room = 0;
struct tdb_used_record rec;
enum TDB_ERROR ecode;
off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
if (TDB_OFF_IS_ERR(off)) {
return tdb->last_error = off;
}
/* Now we have lock on this hash bucket. */
if (flag == TDB_INSERT) {
if (off) {
ecode = TDB_ERR_EXISTS;
goto out;
}
} else {
if (off) {
old_room = rec_data_length(&rec)
+ rec_extra_padding(&rec);
if (old_room >= dbuf.dsize) {
/* Can modify in-place. Easy! */
ecode = update_rec_hdr(tdb, off,
key.dsize, dbuf.dsize,
&rec, h.h);
if (ecode != TDB_SUCCESS) {
goto out;
}
ecode = update_data(tdb,
off + sizeof(rec)
+ key.dsize, dbuf,
old_room - dbuf.dsize);
if (ecode != TDB_SUCCESS) {
goto out;
}
tdb_unlock_hashes(tdb, h.hlock_start,
h.hlock_range, F_WRLCK);
return tdb->last_error = TDB_SUCCESS;
}
} else {
if (flag == TDB_MODIFY) {
/* if the record doesn't exist and we
are in TDB_MODIFY mode then we should fail
the store */
ecode = TDB_ERR_NOEXIST;
goto out;
}
}
}
/* If we didn't use the old record, this implies we're growing. */
ecode = replace_data(tdb, &h, key, dbuf, off, old_room, off);
out:
tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
return tdb->last_error = ecode;
}
enum TDB_ERROR tdb_append(struct tdb_context *tdb,
struct tdb_data key, struct tdb_data dbuf)
{
struct hash_info h;
tdb_off_t off;
struct tdb_used_record rec;
tdb_len_t old_room = 0, old_dlen;
unsigned char *newdata;
struct tdb_data new_dbuf;
enum TDB_ERROR ecode;
off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
if (TDB_OFF_IS_ERR(off)) {
return tdb->last_error = off;
}
if (off) {
old_dlen = rec_data_length(&rec);
old_room = old_dlen + rec_extra_padding(&rec);
/* Fast path: can append in place. */
if (rec_extra_padding(&rec) >= dbuf.dsize) {
ecode = update_rec_hdr(tdb, off, key.dsize,
old_dlen + dbuf.dsize, &rec,
h.h);
if (ecode != TDB_SUCCESS) {
goto out;
}
off += sizeof(rec) + key.dsize + old_dlen;
ecode = update_data(tdb, off, dbuf,
rec_extra_padding(&rec));
goto out;
}
/* Slow path. */
newdata = malloc(key.dsize + old_dlen + dbuf.dsize);
if (!newdata) {
ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
"tdb_append:"
" failed to allocate %zu bytes",
(size_t)(key.dsize + old_dlen
+ dbuf.dsize));
goto out;
}
ecode = tdb->methods->tread(tdb, off + sizeof(rec) + key.dsize,
newdata, old_dlen);
if (ecode != TDB_SUCCESS) {
goto out_free_newdata;
}
memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize);
new_dbuf.dptr = newdata;
new_dbuf.dsize = old_dlen + dbuf.dsize;
} else {
newdata = NULL;
new_dbuf = dbuf;
}
/* If they're using tdb_append(), it implies they're growing record. */
ecode = replace_data(tdb, &h, key, new_dbuf, off, old_room, true);
out_free_newdata:
free(newdata);
out:
tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
return tdb->last_error = ecode;
}
enum TDB_ERROR tdb_fetch(struct tdb_context *tdb, struct tdb_data key,
struct tdb_data *data)
{
tdb_off_t off;
struct tdb_used_record rec;
struct hash_info h;
enum TDB_ERROR ecode;
off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
if (TDB_OFF_IS_ERR(off)) {
return tdb->last_error = off;
}
if (!off) {
ecode = TDB_ERR_NOEXIST;
} else {
data->dsize = rec_data_length(&rec);
data->dptr = tdb_alloc_read(tdb, off + sizeof(rec) + key.dsize,
data->dsize);
if (TDB_PTR_IS_ERR(data->dptr)) {
ecode = TDB_PTR_ERR(data->dptr);
} else
ecode = TDB_SUCCESS;
}
tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
return tdb->last_error = ecode;
}
bool tdb_exists(struct tdb_context *tdb, TDB_DATA key)
{
tdb_off_t off;
struct tdb_used_record rec;
struct hash_info h;
off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
if (TDB_OFF_IS_ERR(off)) {
tdb->last_error = off;
return false;
}
tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
tdb->last_error = TDB_SUCCESS;
return off ? true : false;
}
enum TDB_ERROR tdb_delete(struct tdb_context *tdb, struct tdb_data key)
{
tdb_off_t off;
struct tdb_used_record rec;
struct hash_info h;
enum TDB_ERROR ecode;
off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
if (TDB_OFF_IS_ERR(off)) {
return tdb->last_error = off;
}
if (!off) {
ecode = TDB_ERR_NOEXIST;
goto unlock;
}
ecode = delete_from_hash(tdb, &h);
if (ecode != TDB_SUCCESS) {
goto unlock;
}
/* Free the deleted entry. */
tdb->stats.frees++;
ecode = add_free_record(tdb, off,
sizeof(struct tdb_used_record)
+ rec_key_length(&rec)
+ rec_data_length(&rec)
+ rec_extra_padding(&rec),
TDB_LOCK_WAIT, true);
if (tdb->flags & TDB_SEQNUM)
tdb_inc_seqnum(tdb);
unlock:
tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
return tdb->last_error = ecode;
}
unsigned int tdb_get_flags(struct tdb_context *tdb)
{
return tdb->flags;
}
void tdb_add_flag(struct tdb_context *tdb, unsigned flag)
{
if (tdb->flags & TDB_INTERNAL) {
tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
TDB_LOG_USE_ERROR,
"tdb_add_flag: internal db");
return;
}
switch (flag) {
case TDB_NOLOCK:
tdb->flags |= TDB_NOLOCK;
break;
case TDB_NOMMAP:
tdb->flags |= TDB_NOMMAP;
tdb_munmap(tdb->file);
break;
case TDB_NOSYNC:
tdb->flags |= TDB_NOSYNC;
break;
case TDB_SEQNUM:
tdb->flags |= TDB_SEQNUM;
break;
case TDB_ALLOW_NESTING:
tdb->flags |= TDB_ALLOW_NESTING;
break;
default:
tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
TDB_LOG_USE_ERROR,
"tdb_add_flag: Unknown flag %u",
flag);
}
}
void tdb_remove_flag(struct tdb_context *tdb, unsigned flag)
{
if (tdb->flags & TDB_INTERNAL) {
tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
TDB_LOG_USE_ERROR,
"tdb_remove_flag: internal db");
return;
}
switch (flag) {
case TDB_NOLOCK:
tdb->flags &= ~TDB_NOLOCK;
break;
case TDB_NOMMAP:
tdb->flags &= ~TDB_NOMMAP;
tdb_mmap(tdb);
break;
case TDB_NOSYNC:
tdb->flags &= ~TDB_NOSYNC;
break;
case TDB_SEQNUM:
tdb->flags &= ~TDB_SEQNUM;
break;
case TDB_ALLOW_NESTING:
tdb->flags &= ~TDB_ALLOW_NESTING;
break;
default:
tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
TDB_LOG_USE_ERROR,
"tdb_remove_flag: Unknown flag %u",
flag);
}
}
const char *tdb_errorstr(enum TDB_ERROR ecode)
{
/* Gcc warns if you miss a case in the switch, so use that. */
switch (ecode) {
case TDB_SUCCESS: return "Success";
case TDB_ERR_CORRUPT: return "Corrupt database";
case TDB_ERR_IO: return "IO Error";
case TDB_ERR_LOCK: return "Locking error";
case TDB_ERR_OOM: return "Out of memory";
case TDB_ERR_EXISTS: return "Record exists";
case TDB_ERR_EINVAL: return "Invalid parameter";
case TDB_ERR_NOEXIST: return "Record does not exist";
case TDB_ERR_RDONLY: return "write not permitted";
}
return "Invalid error code";
}
enum TDB_ERROR tdb_error(struct tdb_context *tdb)
{
return tdb->last_error;
}
enum TDB_ERROR COLD tdb_logerr(struct tdb_context *tdb,
enum TDB_ERROR ecode,
enum tdb_log_level level,
const char *fmt, ...)
{
char *message;
va_list ap;
size_t len;
/* tdb_open paths care about errno, so save it. */
int saved_errno = errno;
if (!tdb->log_fn)
return ecode;
va_start(ap, fmt);
len = vasprintf(&message, fmt, ap);
va_end(ap);
if (len < 0) {
tdb->log_fn(tdb, TDB_LOG_ERROR,
"out of memory formatting message:", tdb->log_data);
tdb->log_fn(tdb, level, fmt, tdb->log_data);
} else {
tdb->log_fn(tdb, level, message, tdb->log_data);
free(message);
}
errno = saved_errno;
return ecode;
}
enum TDB_ERROR tdb_parse_record_(struct tdb_context *tdb,
TDB_DATA key,
enum TDB_ERROR (*parse)(TDB_DATA k,
TDB_DATA d,
void *data),
void *data)
{
tdb_off_t off;
struct tdb_used_record rec;
struct hash_info h;
enum TDB_ERROR ecode;
off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
if (TDB_OFF_IS_ERR(off)) {
return tdb->last_error = off;
}
if (!off) {
ecode = TDB_ERR_NOEXIST;
} else {
const void *dptr;
dptr = tdb_access_read(tdb, off + sizeof(rec) + key.dsize,
rec_data_length(&rec), false);
if (TDB_PTR_IS_ERR(dptr)) {
ecode = TDB_PTR_ERR(dptr);
} else {
TDB_DATA d = tdb_mkdata(dptr, rec_data_length(&rec));
ecode = parse(key, d, data);
tdb_access_release(tdb, dptr);
}
}
tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
return tdb->last_error = ecode;
}
const char *tdb_name(const struct tdb_context *tdb)
{
return tdb->name;
}
int64_t tdb_get_seqnum(struct tdb_context *tdb)
{
tdb_off_t off = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
if (TDB_OFF_IS_ERR(off))
tdb->last_error = off;
else
tdb->last_error = TDB_SUCCESS;
return off;
}
int tdb_fd(const struct tdb_context *tdb)
{
return tdb->file->fd;
}

846
lib/tdb2/tdb2.h Normal file
View File

@ -0,0 +1,846 @@
#ifndef CCAN_TDB2_H
#define CCAN_TDB2_H
/*
TDB version 2: trivial database library
Copyright (C) Andrew Tridgell 1999-2004
Copyright (C) Rusty Russell 2010-2011
** NOTE! The following LGPL license applies to the tdb
** library. This does NOT imply that all of Samba is released
** under the LGPL
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifdef __cplusplus
extern "C" {
#endif
#ifndef _SAMBA_BUILD_
/* For mode_t */
#include <sys/types.h>
/* For O_* flags. */
#include <sys/stat.h>
/* For sig_atomic_t. */
#include <signal.h>
/* For uint64_t */
#include <stdint.h>
/* For bool */
#include <stdbool.h>
/* For memcmp */
#include <string.h>
#endif
#include <ccan/compiler/compiler.h>
#include <ccan/typesafe_cb/typesafe_cb.h>
#include <ccan/cast/cast.h>
union tdb_attribute;
struct tdb_context;
/**
* tdb_open - open a database file
* @name: the file name (can be NULL if flags contains TDB_INTERNAL)
* @tdb_flags: options for this database
* @open_flags: flags argument for tdb's open() call.
* @mode: mode argument for tdb's open() call.
* @attributes: linked list of extra attributes for this tdb.
*
* This call opens (and potentially creates) a database file.
* Multiple processes can have the TDB file open at once.
*
* On failure it will return NULL, and set errno: it may also call
* any log attribute found in @attributes.
*
* See also:
* union tdb_attribute
*/
struct tdb_context *tdb_open(const char *name, int tdb_flags,
int open_flags, mode_t mode,
union tdb_attribute *attributes);
/* flags for tdb_open() */
#define TDB_DEFAULT 0 /* just a readability place holder */
#define TDB_INTERNAL 2 /* don't store on disk */
#define TDB_NOLOCK 4 /* don't do any locking */
#define TDB_NOMMAP 8 /* don't use mmap */
#define TDB_CONVERT 16 /* convert endian */
#define TDB_NOSYNC 64 /* don't use synchronous transactions */
#define TDB_SEQNUM 128 /* maintain a sequence number */
#define TDB_ALLOW_NESTING 256 /* fake nested transactions */
/**
* tdb_close - close and free a tdb.
* @tdb: the tdb context returned from tdb_open()
*
* This always succeeds, in that @tdb is unusable after this call. But if
* some unexpected error occurred while closing, it will return non-zero
* (the only clue as to cause will be via the log attribute).
*/
int tdb_close(struct tdb_context *tdb);
/**
* struct tdb_data - representation of keys or values.
* @dptr: the data pointer
* @dsize: the size of the data pointed to by dptr.
*
* This is the "blob" representation of keys and data used by TDB.
*/
typedef struct tdb_data {
unsigned char *dptr;
size_t dsize;
} TDB_DATA;
/**
* enum TDB_ERROR - error returns for TDB
*
* See Also:
* tdb_errorstr()
*/
enum TDB_ERROR {
TDB_SUCCESS = 0, /* No error. */
TDB_ERR_CORRUPT = -1, /* We read the db, and it was bogus. */
TDB_ERR_IO = -2, /* We couldn't read/write the db. */
TDB_ERR_LOCK = -3, /* Locking failed. */
TDB_ERR_OOM = -4, /* Out of Memory. */
TDB_ERR_EXISTS = -5, /* The key already exists. */
TDB_ERR_NOEXIST = -6, /* The key does not exist. */
TDB_ERR_EINVAL = -7, /* You're using it wrong. */
TDB_ERR_RDONLY = -8, /* The database is read-only. */
TDB_ERR_LAST = TDB_ERR_RDONLY
};
/**
* tdb_store - store a key/value pair in a tdb.
* @tdb: the tdb context returned from tdb_open()
* @key: the key
* @dbuf: the data to associate with the key.
* @flag: TDB_REPLACE, TDB_INSERT or TDB_MODIFY.
*
* This inserts (or overwrites) a key/value pair in the TDB. If flag
* is TDB_REPLACE, it doesn't matter whether the key exists or not;
* TDB_INSERT means it must not exist (returns TDB_ERR_EXISTS otherwise),
* and TDB_MODIFY means it must exist (returns TDB_ERR_NOEXIST otherwise).
*
* On success, this returns TDB_SUCCESS.
*
* See also:
* tdb_fetch, tdb_transaction_start, tdb_append, tdb_delete.
*/
enum TDB_ERROR tdb_store(struct tdb_context *tdb,
struct tdb_data key,
struct tdb_data dbuf,
int flag);
/* flags to tdb_store() */
#define TDB_REPLACE 1 /* A readability place holder */
#define TDB_INSERT 2 /* Don't overwrite an existing entry */
#define TDB_MODIFY 3 /* Don't create an existing entry */
/**
* tdb_fetch - fetch a value from a tdb.
* @tdb: the tdb context returned from tdb_open()
* @key: the key
* @data: pointer to data.
*
* This looks up a key in the database and sets it in @data.
*
* If it returns TDB_SUCCESS, the key was found: it is your
* responsibility to call free() on @data->dptr.
*
* Otherwise, it returns an error (usually, TDB_ERR_NOEXIST) and @data is
* undefined.
*/
enum TDB_ERROR tdb_fetch(struct tdb_context *tdb, struct tdb_data key,
struct tdb_data *data);
/**
* tdb_errorstr - map the tdb error onto a constant readable string
* @ecode: the enum TDB_ERROR to map.
*
* This is useful for displaying errors to users.
*/
const char *tdb_errorstr(enum TDB_ERROR ecode);
/**
* tdb_append - append a value to a key/value pair in a tdb.
* @tdb: the tdb context returned from tdb_open()
* @key: the key
* @dbuf: the data to append.
*
* This is equivalent to fetching a record, reallocating .dptr to add the
* data, and writing it back, only it's much more efficient. If the key
* doesn't exist, it's equivalent to tdb_store (with an additional hint that
* you expect to expand the record in future).
*
* See Also:
* tdb_fetch(), tdb_store()
*/
enum TDB_ERROR tdb_append(struct tdb_context *tdb,
struct tdb_data key, struct tdb_data dbuf);
/**
* tdb_delete - delete a key from a tdb.
* @tdb: the tdb context returned from tdb_open()
* @key: the key to delete.
*
* Returns TDB_SUCCESS on success, or an error (usually TDB_ERR_NOEXIST).
*
* See Also:
* tdb_fetch(), tdb_store()
*/
enum TDB_ERROR tdb_delete(struct tdb_context *tdb, struct tdb_data key);
/**
* tdb_exists - does a key exist in the database?
* @tdb: the tdb context returned from tdb_open()
* @key: the key to search for.
*
* Returns true if it exists, or false if it doesn't or any other error.
*/
bool tdb_exists(struct tdb_context *tdb, TDB_DATA key);
/**
* tdb_deq - are struct tdb_data equal?
* @a: one struct tdb_data
* @b: another struct tdb_data
*/
static inline bool tdb_deq(struct tdb_data a, struct tdb_data b)
{
return a.dsize == b.dsize && memcmp(a.dptr, b.dptr, a.dsize) == 0;
}
/**
* tdb_mkdata - make a struct tdb_data from const data
* @p: the constant pointer
* @len: the length
*
* As the dptr member of struct tdb_data is not constant, you need to
* cast it. This function keeps thost casts in one place, as well as
* suppressing the warning some compilers give when casting away a
* qualifier (eg. gcc with -Wcast-qual)
*/
static inline struct tdb_data tdb_mkdata(const void *p, size_t len)
{
struct tdb_data d;
d.dptr = cast_const(void *, p);
d.dsize = len;
return d;
}
/**
* tdb_transaction_start - start a transaction
* @tdb: the tdb context returned from tdb_open()
*
* This begins a series of atomic operations. Other processes will be able
* to read the tdb, but not alter it (they will block), nor will they see
* any changes until tdb_transaction_commit() is called.
*
* Note that if the TDB_ALLOW_NESTING flag is set, a tdb_transaction_start()
* within a transaction will succeed, but it's not a real transaction:
* (1) An inner transaction which is committed is not actually committed until
* the outer transaction is; if the outer transaction is cancelled, the
* inner ones are discarded.
* (2) tdb_transaction_cancel() marks the outer transaction as having an error,
* so the final tdb_transaction_commit() will fail.
* (3) the outer transaction will see the results of the inner transaction.
*
* See Also:
* tdb_transaction_cancel, tdb_transaction_commit.
*/
enum TDB_ERROR tdb_transaction_start(struct tdb_context *tdb);
/**
* tdb_transaction_cancel - abandon a transaction
* @tdb: the tdb context returned from tdb_open()
*
* This aborts a transaction, discarding any changes which were made.
* tdb_close() does this implicitly.
*/
void tdb_transaction_cancel(struct tdb_context *tdb);
/**
* tdb_transaction_commit - commit a transaction
* @tdb: the tdb context returned from tdb_open()
*
* This completes a transaction, writing any changes which were made.
*
* fsync() is used to commit the transaction (unless TDB_NOSYNC is set),
* making it robust against machine crashes, but very slow compared to
* other TDB operations.
*
* A failure can only be caused by unexpected errors (eg. I/O or
* memory); this is no point looping on transaction failure.
*
* See Also:
* tdb_transaction_prepare_commit()
*/
enum TDB_ERROR tdb_transaction_commit(struct tdb_context *tdb);
/**
* tdb_transaction_prepare_commit - prepare to commit a transaction
* @tdb: the tdb context returned from tdb_open()
*
* This ensures we have the resources to commit a transaction (using
* tdb_transaction_commit): if this succeeds then a transaction will only
* fail if the write() or fsync() calls fail.
*
* If this fails you must still call tdb_transaction_cancel() to cancel
* the transaction.
*
* See Also:
* tdb_transaction_commit()
*/
enum TDB_ERROR tdb_transaction_prepare_commit(struct tdb_context *tdb);
/**
* tdb_traverse - traverse a TDB
* @tdb: the tdb context returned from tdb_open()
* @fn: the function to call for every key/value pair (or NULL)
* @p: the pointer to hand to @f
*
* This walks the TDB until all they keys have been traversed, or @fn
* returns non-zero. If the traverse function or other processes are
* changing data or adding or deleting keys, the traverse may be
* unreliable: keys may be skipped or (rarely) visited twice.
*
* There is one specific exception: the special case of deleting the
* current key does not undermine the reliability of the traversal.
*
* On success, returns the number of keys iterated. On error returns
* a negative enum TDB_ERROR value.
*/
#define tdb_traverse(tdb, fn, p) \
tdb_traverse_(tdb, typesafe_cb_preargs(int, void *, (fn), (p), \
struct tdb_context *, \
TDB_DATA, TDB_DATA), (p))
int64_t tdb_traverse_(struct tdb_context *tdb,
int (*fn)(struct tdb_context *,
TDB_DATA, TDB_DATA, void *), void *p);
/**
* tdb_parse_record - operate directly on data in the database.
* @tdb: the tdb context returned from tdb_open()
* @key: the key whose record we should hand to @parse
* @parse: the function to call for the data
* @data: the private pointer to hand to @parse (types must match).
*
* This avoids a copy for many cases, by handing you a pointer into
* the memory-mapped database. It also locks the record to prevent
* other accesses at the same time.
*
* Do not alter the data handed to parse()!
*/
#define tdb_parse_record(tdb, key, parse, data) \
tdb_parse_record_((tdb), (key), \
typesafe_cb_preargs(enum TDB_ERROR, void *, \
(parse), (data), \
TDB_DATA, TDB_DATA), (data))
enum TDB_ERROR tdb_parse_record_(struct tdb_context *tdb,
TDB_DATA key,
enum TDB_ERROR (*parse)(TDB_DATA k,
TDB_DATA d,
void *data),
void *data);
/**
* tdb_get_seqnum - get a database sequence number
* @tdb: the tdb context returned from tdb_open()
*
* This returns a sequence number: any change to the database from a
* tdb context opened with the TDB_SEQNUM flag will cause that number
* to increment. Note that the incrementing is unreliable (it is done
* without locking), so this is only useful as an optimization.
*
* For example, you may have a regular database backup routine which
* does not operate if the sequence number is unchanged. In the
* unlikely event of a failed increment, it will be backed up next
* time any way.
*
* Returns an enum TDB_ERROR (ie. negative) on error.
*/
int64_t tdb_get_seqnum(struct tdb_context *tdb);
/**
* tdb_firstkey - get the "first" key in a TDB
* @tdb: the tdb context returned from tdb_open()
* @key: pointer to key.
*
* This returns an arbitrary key in the database; with tdb_nextkey() it allows
* open-coded traversal of the database, though it is slightly less efficient
* than tdb_traverse.
*
* It is your responsibility to free @key->dptr on success.
*
* Returns TDB_ERR_NOEXIST if the database is empty.
*/
enum TDB_ERROR tdb_firstkey(struct tdb_context *tdb, struct tdb_data *key);
/**
* tdb_nextkey - get the "next" key in a TDB
* @tdb: the tdb context returned from tdb_open()
* @key: a key returned by tdb_firstkey() or tdb_nextkey().
*
* This returns another key in the database; it will free @key.dptr for
* your convenience.
*
* Returns TDB_ERR_NOEXIST if there are no more keys.
*/
enum TDB_ERROR tdb_nextkey(struct tdb_context *tdb, struct tdb_data *key);
/**
* tdb_chainlock - lock a record in the TDB
* @tdb: the tdb context returned from tdb_open()
* @key: the key to lock.
*
* This prevents any access occurring to a group of keys including @key,
* even if @key does not exist. This allows primitive atomic updates of
* records without using transactions.
*
* You cannot begin a transaction while holding a tdb_chainlock(), nor can
* you do any operations on any other keys in the database. This also means
* that you cannot hold more than one tdb_chainlock() at a time.
*
* See Also:
* tdb_chainunlock()
*/
enum TDB_ERROR tdb_chainlock(struct tdb_context *tdb, TDB_DATA key);
/**
* tdb_chainunlock - unlock a record in the TDB
* @tdb: the tdb context returned from tdb_open()
* @key: the key to unlock.
*
* The key must have previously been locked by tdb_chainlock().
*/
void tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key);
/**
* tdb_chainlock_read - lock a record in the TDB, for reading
* @tdb: the tdb context returned from tdb_open()
* @key: the key to lock.
*
* This prevents any changes from occurring to a group of keys including @key,
* even if @key does not exist. This allows primitive atomic updates of
* records without using transactions.
*
* You cannot begin a transaction while holding a tdb_chainlock_read(), nor can
* you do any operations on any other keys in the database. This also means
* that you cannot hold more than one tdb_chainlock()/read() at a time.
*
* See Also:
* tdb_chainlock()
*/
enum TDB_ERROR tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key);
/**
* tdb_chainunlock_read - unlock a record in the TDB for reading
* @tdb: the tdb context returned from tdb_open()
* @key: the key to unlock.
*
* The key must have previously been locked by tdb_chainlock_read().
*/
void tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key);
/**
* tdb_lockall - lock the entire TDB
* @tdb: the tdb context returned from tdb_open()
*
* You cannot hold a tdb_chainlock while calling this. It nests, so you
* must call tdb_unlockall as many times as you call tdb_lockall.
*/
enum TDB_ERROR tdb_lockall(struct tdb_context *tdb);
/**
* tdb_unlockall - unlock the entire TDB
* @tdb: the tdb context returned from tdb_open()
*/
void tdb_unlockall(struct tdb_context *tdb);
/**
* tdb_lockall_read - lock the entire TDB for reading
* @tdb: the tdb context returned from tdb_open()
*
* This prevents others writing to the database, eg. tdb_delete, tdb_store,
* tdb_append, but not tdb_fetch.
*
* You cannot hold a tdb_chainlock while calling this. It nests, so you
* must call tdb_unlockall_read as many times as you call tdb_lockall_read.
*/
enum TDB_ERROR tdb_lockall_read(struct tdb_context *tdb);
/**
* tdb_unlockall_read - unlock the entire TDB for reading
* @tdb: the tdb context returned from tdb_open()
*/
void tdb_unlockall_read(struct tdb_context *tdb);
/**
* tdb_wipe_all - wipe the database clean
* @tdb: the tdb context returned from tdb_open()
*
* Completely erase the database. This is faster than iterating through
* each key and doing tdb_delete.
*/
enum TDB_ERROR tdb_wipe_all(struct tdb_context *tdb);
/**
* tdb_check - check a TDB for consistency
* @tdb: the tdb context returned from tdb_open()
* @check: function to check each key/data pair (or NULL)
* @data: argument for @check, must match type.
*
* This performs a consistency check of the open database, optionally calling
* a check() function on each record so you can do your own data consistency
* checks as well. If check() returns an error, that is returned from
* tdb_check().
*
* Returns TDB_SUCCESS or an error.
*/
#define tdb_check(tdb, check, data) \
tdb_check_((tdb), typesafe_cb_preargs(enum TDB_ERROR, void *, \
(check), (data), \
struct tdb_data, \
struct tdb_data), \
(data))
enum TDB_ERROR tdb_check_(struct tdb_context *tdb,
enum TDB_ERROR (*check)(struct tdb_data k,
struct tdb_data d,
void *data),
void *data);
/**
* tdb_error - get the last error (not threadsafe)
* @tdb: the tdb context returned from tdb_open()
*
* Returns the last error returned by a TDB function.
*
* This makes porting from TDB1 easier, but note that the last error is not
* reliable in threaded programs.
*/
enum TDB_ERROR tdb_error(struct tdb_context *tdb);
/**
* enum tdb_summary_flags - flags for tdb_summary.
*/
enum tdb_summary_flags {
TDB_SUMMARY_HISTOGRAMS = 1 /* Draw graphs in the summary. */
};
/**
* tdb_summary - return a string describing the TDB state
* @tdb: the tdb context returned from tdb_open()
* @flags: flags to control the summary output.
* @summary: pointer to string to allocate.
*
* This returns a developer-readable string describing the overall
* state of the tdb, such as the percentage used and sizes of records.
* It is designed to provide information about the tdb at a glance
* without displaying any keys or data in the database.
*
* On success, sets @summary to point to a malloc()'ed nul-terminated
* multi-line string. It is your responsibility to free() it.
*/
enum TDB_ERROR tdb_summary(struct tdb_context *tdb,
enum tdb_summary_flags flags,
char **summary);
/**
* tdb_get_flags - return the flags for a tdb
* @tdb: the tdb context returned from tdb_open()
*
* This returns the flags on the current tdb. Some of these are caused by
* the flags argument to tdb_open(), others (such as TDB_CONVERT) are
* intuited.
*/
unsigned int tdb_get_flags(struct tdb_context *tdb);
/**
* tdb_add_flag - set a flag for a tdb
* @tdb: the tdb context returned from tdb_open()
* @flag: one of TDB_NOLOCK, TDB_NOMMAP, TDB_NOSYNC or TDB_ALLOW_NESTING.
*
* You can use this to set a flag on the TDB. You cannot set these flags
* on a TDB_INTERNAL tdb.
*/
void tdb_add_flag(struct tdb_context *tdb, unsigned flag);
/**
* tdb_remove_flag - unset a flag for a tdb
* @tdb: the tdb context returned from tdb_open()
* @flag: one of TDB_NOLOCK, TDB_NOMMAP, TDB_NOSYNC or TDB_ALLOW_NESTING.
*
* You can use this to clear a flag on the TDB. You cannot clear flags
* on a TDB_INTERNAL tdb.
*/
void tdb_remove_flag(struct tdb_context *tdb, unsigned flag);
/**
* enum tdb_attribute_type - descriminator for union tdb_attribute.
*/
enum tdb_attribute_type {
TDB_ATTRIBUTE_LOG = 0,
TDB_ATTRIBUTE_HASH = 1,
TDB_ATTRIBUTE_SEED = 2,
TDB_ATTRIBUTE_STATS = 3,
TDB_ATTRIBUTE_OPENHOOK = 4,
TDB_ATTRIBUTE_FLOCK = 5
};
/**
* tdb_get_attribute - get an attribute for an existing tdb
* @tdb: the tdb context returned from tdb_open()
* @attr: the union tdb_attribute to set.
*
* This gets an attribute from a TDB which has previously been set (or
* may return the default values). Set @attr.base.attr to the
* attribute type you want get.
*
* Currently this does not work for TDB_ATTRIBUTE_OPENHOOK.
*/
enum TDB_ERROR tdb_get_attribute(struct tdb_context *tdb,
union tdb_attribute *attr);
/**
* tdb_set_attribute - set an attribute for an existing tdb
* @tdb: the tdb context returned from tdb_open()
* @attr: the union tdb_attribute to set.
*
* This sets an attribute on a TDB, overriding any previous attribute
* of the same type. It returns TDB_ERR_EINVAL if the attribute is
* unknown or invalid.
*
* Note that TDB_ATTRIBUTE_HASH, TDB_ATTRIBUTE_SEED and
* TDB_ATTRIBUTE_OPENHOOK cannot currently be set after tdb_open.
*/
enum TDB_ERROR tdb_set_attribute(struct tdb_context *tdb,
const union tdb_attribute *attr);
/**
* tdb_unset_attribute - reset an attribute for an existing tdb
* @tdb: the tdb context returned from tdb_open()
* @type: the attribute type to unset.
*
* This unsets an attribute on a TDB, returning it to the defaults
* (where applicable).
*
* Note that it only makes sense for TDB_ATTRIBUTE_LOG and TDB_ATTRIBUTE_FLOCK
* to be unset.
*/
void tdb_unset_attribute(struct tdb_context *tdb,
enum tdb_attribute_type type);
/**
* tdb_name - get the name of a tdb
* @tdb: the tdb context returned from tdb_open()
*
* This returns a copy of the name string, made at tdb_open() time. If that
* argument was NULL (possible for a TDB_INTERNAL db) this will return NULL.
*
* This is mostly useful for logging.
*/
const char *tdb_name(const struct tdb_context *tdb);
/**
* tdb_fd - get the file descriptor of a tdb
* @tdb: the tdb context returned from tdb_open()
*
* This returns the file descriptor for the underlying database file, or -1
* for TDB_INTERNAL.
*/
int tdb_fd(const struct tdb_context *tdb);
/**
* struct tdb_attribute_base - common fields for all tdb attributes.
*/
struct tdb_attribute_base {
enum tdb_attribute_type attr;
union tdb_attribute *next;
};
/**
* enum tdb_log_level - log levels for tdb_attribute_log
* @TDB_LOG_ERROR: used to log unrecoverable errors such as I/O errors
* or internal consistency failures.
* @TDB_LOG_USE_ERROR: used to log usage errors such as invalid parameters
* or writing to a read-only database.
* @TDB_LOG_WARNING: used for informational messages on issues which
* are unusual but handled by TDB internally, such
* as a failure to mmap or failure to open /dev/urandom.
*/
enum tdb_log_level {
TDB_LOG_ERROR,
TDB_LOG_USE_ERROR,
TDB_LOG_WARNING
};
/**
* struct tdb_attribute_log - log function attribute
*
* This attribute provides a hook for you to log errors.
*/
struct tdb_attribute_log {
struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG */
void (*fn)(struct tdb_context *tdb,
enum tdb_log_level level,
const char *message,
void *data);
void *data;
};
/**
* struct tdb_attribute_hash - hash function attribute
*
* This attribute allows you to provide an alternative hash function.
* This hash function will be handed keys from the database; it will also
* be handed the 8-byte TDB_HASH_MAGIC value for checking the header (the
* tdb_open() will fail if the hash value doesn't match the header).
*
* Note that if your hash function gives different results on
* different machine endians, your tdb will no longer work across
* different architectures!
*/
struct tdb_attribute_hash {
struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH */
uint64_t (*fn)(const void *key, size_t len, uint64_t seed,
void *data);
void *data;
};
/**
* struct tdb_attribute_seed - hash function seed attribute
*
* The hash function seed is normally taken from /dev/urandom (or equivalent)
* but can be set manually here. This is mainly for testing purposes.
*/
struct tdb_attribute_seed {
struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_SEED */
uint64_t seed;
};
/**
* struct tdb_attribute_stats - tdb operational statistics
*
* This attribute records statistics of various low-level TDB operations.
* This can be used to assist performance evaluation. This is only
* useful for tdb_get_attribute().
*
* New fields will be added at the end, hence the "size" argument which
* indicates how large your structure is: it must be filled in before
* calling tdb_get_attribute(), which will overwrite it with the size
* tdb knows about.
*/
struct tdb_attribute_stats {
struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_STATS */
size_t size; /* = sizeof(struct tdb_attribute_stats) */
uint64_t allocs;
uint64_t alloc_subhash;
uint64_t alloc_chain;
uint64_t alloc_bucket_exact;
uint64_t alloc_bucket_max;
uint64_t alloc_leftover;
uint64_t alloc_coalesce_tried;
uint64_t alloc_coalesce_iterate_clash;
uint64_t alloc_coalesce_lockfail;
uint64_t alloc_coalesce_race;
uint64_t alloc_coalesce_succeeded;
uint64_t alloc_coalesce_num_merged;
uint64_t compares;
uint64_t compare_wrong_bucket;
uint64_t compare_wrong_offsetbits;
uint64_t compare_wrong_keylen;
uint64_t compare_wrong_rechash;
uint64_t compare_wrong_keycmp;
uint64_t transactions;
uint64_t transaction_cancel;
uint64_t transaction_nest;
uint64_t transaction_expand_file;
uint64_t transaction_read_direct;
uint64_t transaction_read_direct_fail;
uint64_t transaction_write_direct;
uint64_t transaction_write_direct_fail;
uint64_t expands;
uint64_t frees;
uint64_t locks;
uint64_t lock_lowlevel;
uint64_t lock_nonblock;
uint64_t lock_nonblock_fail;
};
/**
* struct tdb_attribute_openhook - tdb special effects hook for open
*
* This attribute contains a function to call once we have the OPEN_LOCK
* for the tdb, but before we've examined its contents. If this succeeds,
* the tdb will be populated if it's then zero-length.
*
* This is a hack to allow support for TDB1-style TDB_CLEAR_IF_FIRST
* behaviour.
*/
struct tdb_attribute_openhook {
struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_OPENHOOK */
enum TDB_ERROR (*fn)(int fd, void *data);
void *data;
};
/**
* struct tdb_attribute_flock - tdb special effects hook for file locking
*
* This attribute contains function to call to place locks on a file; it can
* be used to support non-blocking operations or lock proxying.
*
* They should return 0 on success, -1 on failure and set errno.
*
* An error will be logged on error if errno is neither EAGAIN nor EINTR
* (normally it would only return EAGAIN if waitflag is false, and
* loop internally on EINTR).
*/
struct tdb_attribute_flock {
struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_FLOCK */
int (*lock)(int fd,int rw, off_t off, off_t len, bool waitflag, void *);
int (*unlock)(int fd, int rw, off_t off, off_t len, void *);
void *data;
};
/**
* union tdb_attribute - tdb attributes.
*
* This represents all the known attributes.
*
* See also:
* struct tdb_attribute_log, struct tdb_attribute_hash,
* struct tdb_attribute_seed, struct tdb_attribute_stats,
* struct tdb_attribute_openhook, struct tdb_attribute_flock.
*/
union tdb_attribute {
struct tdb_attribute_base base;
struct tdb_attribute_log log;
struct tdb_attribute_hash hash;
struct tdb_attribute_seed seed;
struct tdb_attribute_stats stats;
struct tdb_attribute_openhook openhook;
struct tdb_attribute_flock flock;
};
#ifdef __cplusplus
}
#endif
#endif /* tdb2.h */

View File

@ -0,0 +1,250 @@
#include "external-agent.h"
#include "logging.h"
#include "lock-tracking.h"
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <err.h>
#include <fcntl.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <errno.h>
#include <ccan/tdb2/private.h>
#include <ccan/tap/tap.h>
#include <stdio.h>
#include <stdarg.h>
static struct tdb_context *tdb;
static enum TDB_ERROR clear_if_first(int fd, void *arg)
{
/* We hold a lock offset 63 always, so we can tell if anyone is holding it. */
struct flock fl;
fl.l_type = F_WRLCK;
fl.l_whence = SEEK_SET;
fl.l_start = 63;
fl.l_len = 1;
if (fcntl(fd, F_SETLK, &fl) == 0) {
/* We must be first ones to open it! */
diag("agent truncating file!");
if (ftruncate(fd, 0) != 0) {
return TDB_ERR_IO;
}
}
fl.l_type = F_RDLCK;
if (fcntl(fd, F_SETLKW, &fl) != 0) {
return TDB_ERR_IO;
}
return TDB_SUCCESS;
}
static enum agent_return do_operation(enum operation op, const char *name)
{
TDB_DATA k;
enum agent_return ret;
TDB_DATA data;
enum TDB_ERROR ecode;
union tdb_attribute cif;
if (op != OPEN && op != OPEN_WITH_HOOK && !tdb) {
diag("external: No tdb open!");
return OTHER_FAILURE;
}
diag("external: %s", operation_name(op));
k = tdb_mkdata(name, strlen(name));
locking_would_block = 0;
switch (op) {
case OPEN:
if (tdb) {
diag("Already have tdb %s open", tdb->name);
return OTHER_FAILURE;
}
tdb = tdb_open(name, TDB_DEFAULT, O_RDWR, 0, &tap_log_attr);
if (!tdb) {
if (!locking_would_block)
diag("Opening tdb gave %s", strerror(errno));
forget_locking();
ret = OTHER_FAILURE;
} else
ret = SUCCESS;
break;
case OPEN_WITH_HOOK:
if (tdb) {
diag("Already have tdb %s open", tdb->name);
return OTHER_FAILURE;
}
cif.openhook.base.attr = TDB_ATTRIBUTE_OPENHOOK;
cif.openhook.base.next = &tap_log_attr;
cif.openhook.fn = clear_if_first;
tdb = tdb_open(name, TDB_DEFAULT, O_RDWR, 0, &cif);
if (!tdb) {
if (!locking_would_block)
diag("Opening tdb gave %s", strerror(errno));
forget_locking();
ret = OTHER_FAILURE;
} else
ret = SUCCESS;
break;
case FETCH:
ecode = tdb_fetch(tdb, k, &data);
if (ecode == TDB_ERR_NOEXIST) {
ret = FAILED;
} else if (ecode < 0) {
ret = OTHER_FAILURE;
} else if (!tdb_deq(data, k)) {
ret = OTHER_FAILURE;
free(data.dptr);
} else {
ret = SUCCESS;
free(data.dptr);
}
break;
case STORE:
ret = tdb_store(tdb, k, k, 0) == 0 ? SUCCESS : OTHER_FAILURE;
break;
case TRANSACTION_START:
ret = tdb_transaction_start(tdb) == 0 ? SUCCESS : OTHER_FAILURE;
break;
case TRANSACTION_COMMIT:
ret = tdb_transaction_commit(tdb)==0 ? SUCCESS : OTHER_FAILURE;
break;
case NEEDS_RECOVERY:
ret = tdb_needs_recovery(tdb) ? SUCCESS : FAILED;
break;
case CHECK:
ret = tdb_check(tdb, NULL, NULL) == 0 ? SUCCESS : OTHER_FAILURE;
break;
case CLOSE:
ret = tdb_close(tdb) == 0 ? SUCCESS : OTHER_FAILURE;
tdb = NULL;
break;
case SEND_SIGNAL:
/* We do this async */
ret = SUCCESS;
break;
default:
ret = OTHER_FAILURE;
}
if (locking_would_block)
ret = WOULD_HAVE_BLOCKED;
return ret;
}
struct agent {
int cmdfd, responsefd;
};
/* Do this before doing any tdb stuff. Return handle, or NULL. */
struct agent *prepare_external_agent(void)
{
int pid, ret;
int command[2], response[2];
char name[1+PATH_MAX];
if (pipe(command) != 0 || pipe(response) != 0)
return NULL;
pid = fork();
if (pid < 0)
return NULL;
if (pid != 0) {
struct agent *agent = malloc(sizeof(*agent));
close(command[0]);
close(response[1]);
agent->cmdfd = command[1];
agent->responsefd = response[0];
return agent;
}
close(command[1]);
close(response[0]);
/* We want to fail, not block. */
nonblocking_locks = true;
log_prefix = "external: ";
while ((ret = read(command[0], name, sizeof(name))) > 0) {
enum agent_return result;
result = do_operation(name[0], name+1);
if (write(response[1], &result, sizeof(result))
!= sizeof(result))
err(1, "Writing response");
if (name[0] == SEND_SIGNAL) {
struct timeval ten_ms;
ten_ms.tv_sec = 0;
ten_ms.tv_usec = 10000;
select(0, NULL, NULL, NULL, &ten_ms);
kill(getppid(), SIGUSR1);
}
}
exit(0);
}
/* Ask the external agent to try to do an operation. */
enum agent_return external_agent_operation(struct agent *agent,
enum operation op,
const char *name)
{
enum agent_return res;
unsigned int len;
char *string;
if (!name)
name = "";
len = 1 + strlen(name) + 1;
string = malloc(len);
string[0] = op;
strcpy(string+1, name);
if (write(agent->cmdfd, string, len) != len
|| read(agent->responsefd, &res, sizeof(res)) != sizeof(res))
res = AGENT_DIED;
free(string);
return res;
}
const char *agent_return_name(enum agent_return ret)
{
return ret == SUCCESS ? "SUCCESS"
: ret == WOULD_HAVE_BLOCKED ? "WOULD_HAVE_BLOCKED"
: ret == AGENT_DIED ? "AGENT_DIED"
: ret == FAILED ? "FAILED"
: ret == OTHER_FAILURE ? "OTHER_FAILURE"
: "**INVALID**";
}
const char *operation_name(enum operation op)
{
switch (op) {
case OPEN: return "OPEN";
case OPEN_WITH_HOOK: return "OPEN_WITH_HOOK";
case FETCH: return "FETCH";
case STORE: return "STORE";
case CHECK: return "CHECK";
case TRANSACTION_START: return "TRANSACTION_START";
case TRANSACTION_COMMIT: return "TRANSACTION_COMMIT";
case NEEDS_RECOVERY: return "NEEDS_RECOVERY";
case SEND_SIGNAL: return "SEND_SIGNAL";
case CLOSE: return "CLOSE";
}
return "**INVALID**";
}
void free_external_agent(struct agent *agent)
{
close(agent->cmdfd);
close(agent->responsefd);
free(agent);
}

View File

@ -0,0 +1,43 @@
#ifndef TDB2_TEST_EXTERNAL_AGENT_H
#define TDB2_TEST_EXTERNAL_AGENT_H
/* For locking tests, we need a different process to try things at
* various times. */
enum operation {
OPEN,
OPEN_WITH_HOOK,
FETCH,
STORE,
TRANSACTION_START,
TRANSACTION_COMMIT,
NEEDS_RECOVERY,
CHECK,
SEND_SIGNAL,
CLOSE,
};
/* Do this before doing any tdb stuff. Return handle, or -1. */
struct agent *prepare_external_agent(void);
enum agent_return {
SUCCESS,
WOULD_HAVE_BLOCKED,
AGENT_DIED,
FAILED, /* For fetch, or NEEDS_RECOVERY */
OTHER_FAILURE,
};
/* Ask the external agent to try to do an operation.
* name == tdb name for OPEN/OPEN_WITH_CLEAR_IF_FIRST,
* record name for FETCH/STORE (store stores name as data too)
*/
enum agent_return external_agent_operation(struct agent *handle,
enum operation op,
const char *name);
/* Mapping enum -> string. */
const char *agent_return_name(enum agent_return ret);
const char *operation_name(enum operation op);
void free_external_agent(struct agent *agent);
#endif /* TDB2_TEST_EXTERNAL_AGENT_H */

View File

@ -0,0 +1,117 @@
#include "failtest_helper.h"
#include "logging.h"
#include <string.h>
#include <ccan/tap/tap.h>
/* FIXME: From ccan/str */
static inline bool strends(const char *str, const char *postfix)
{
if (strlen(str) < strlen(postfix))
return false;
return !strcmp(str + strlen(str) - strlen(postfix), postfix);
}
bool failmatch(const struct failtest_call *call,
const char *file, int line, enum failtest_call_type type)
{
return call->type == type
&& call->line == line
&& ((strcmp(call->file, file) == 0)
|| (strends(call->file, file)
&& (call->file[strlen(call->file) - strlen(file) - 1]
== '/')));
}
static const struct failtest_call *
find_repeat(const struct failtest_call *start, const struct failtest_call *end,
const struct failtest_call *call)
{
const struct failtest_call *i;
for (i = start; i < end; i++) {
if (failmatch(i, call->file, call->line, call->type))
return i;
}
return NULL;
}
static bool is_nonblocking_lock(const struct failtest_call *call)
{
return call->type == FAILTEST_FCNTL && call->u.fcntl.cmd == F_SETLK;
}
static bool is_unlock(const struct failtest_call *call)
{
return call->type == FAILTEST_FCNTL
&& call->u.fcntl.arg.fl.l_type == F_UNLCK;
}
bool exit_check_log(struct failtest_call *history, unsigned num)
{
unsigned int i;
for (i = 0; i < num; i++) {
if (!history[i].fail)
continue;
/* Failing the /dev/urandom open doesn't count: we fall back. */
if (failmatch(&history[i], URANDOM_OPEN))
continue;
/* Similarly with read fail. */
if (failmatch(&history[i], URANDOM_READ))
continue;
/* Initial allocation of tdb doesn't log. */
if (failmatch(&history[i], INITIAL_TDB_MALLOC))
continue;
/* We don't block "failures" on non-blocking locks. */
if (is_nonblocking_lock(&history[i]))
continue;
if (!tap_log_messages)
diag("We didn't log for %u (%s:%u)",
i, history[i].file, history[i].line);
return tap_log_messages != 0;
}
return true;
}
/* Some places we soldier on despite errors: only fail them once. */
enum failtest_result
block_repeat_failures(struct failtest_call *history, unsigned num)
{
const struct failtest_call *i, *last = &history[num-1];
if (failmatch(last, INITIAL_TDB_MALLOC)
|| failmatch(last, URANDOM_OPEN)
|| failmatch(last, URANDOM_READ)) {
if (find_repeat(history, last, last))
return FAIL_DONT_FAIL;
return FAIL_PROBE;
}
/* Unlock or non-blocking lock is fail-once. */
if (is_unlock(last)) {
/* Find a previous unlock at this point? */
for (i = find_repeat(history, last, last);
i;
i = find_repeat(history, i, last)) {
if (is_unlock(i))
return FAIL_DONT_FAIL;
}
return FAIL_PROBE;
} else if (is_nonblocking_lock(last)) {
/* Find a previous non-blocking lock at this point? */
for (i = find_repeat(history, last, last);
i;
i = find_repeat(history, i, last)) {
if (is_nonblocking_lock(i))
return FAIL_DONT_FAIL;
}
return FAIL_PROBE;
}
return FAIL_OK;
}

View File

@ -0,0 +1,17 @@
#ifndef TDB2_TEST_FAILTEST_HELPER_H
#define TDB2_TEST_FAILTEST_HELPER_H
#include <ccan/failtest/failtest.h>
#include <stdbool.h>
/* FIXME: Check these! */
#define INITIAL_TDB_MALLOC "open.c", 338, FAILTEST_MALLOC
#define URANDOM_OPEN "open.c", 45, FAILTEST_OPEN
#define URANDOM_READ "open.c", 25, FAILTEST_READ
bool exit_check_log(struct failtest_call *history, unsigned num);
bool failmatch(const struct failtest_call *call,
const char *file, int line, enum failtest_call_type type);
enum failtest_result
block_repeat_failures(struct failtest_call *history, unsigned num);
#endif /* TDB2_TEST_LOGGING_H */

348
lib/tdb2/test/layout.c Normal file
View File

@ -0,0 +1,348 @@
/* TDB tools to create various canned database layouts. */
#include "layout.h"
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <err.h>
#include "logging.h"
struct tdb_layout *new_tdb_layout(const char *filename)
{
struct tdb_layout *layout = malloc(sizeof(*layout));
layout->filename = filename;
layout->num_elems = 0;
layout->elem = NULL;
return layout;
}
static void add(struct tdb_layout *layout, union tdb_layout_elem elem)
{
layout->elem = realloc(layout->elem,
sizeof(layout->elem[0])
* (layout->num_elems+1));
layout->elem[layout->num_elems++] = elem;
}
void tdb_layout_add_freetable(struct tdb_layout *layout)
{
union tdb_layout_elem elem;
elem.base.type = FREETABLE;
add(layout, elem);
}
void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len,
unsigned ftable)
{
union tdb_layout_elem elem;
elem.base.type = FREE;
elem.free.len = len;
elem.free.ftable_num = ftable;
add(layout, elem);
}
static struct tdb_data dup_key(struct tdb_data key)
{
struct tdb_data ret;
ret.dsize = key.dsize;
ret.dptr = malloc(ret.dsize);
memcpy(ret.dptr, key.dptr, ret.dsize);
return ret;
}
void tdb_layout_add_used(struct tdb_layout *layout,
TDB_DATA key, TDB_DATA data,
tdb_len_t extra)
{
union tdb_layout_elem elem;
elem.base.type = DATA;
elem.used.key = dup_key(key);
elem.used.data = dup_key(data);
elem.used.extra = extra;
add(layout, elem);
}
static tdb_len_t free_record_len(tdb_len_t len)
{
return sizeof(struct tdb_used_record) + len;
}
static tdb_len_t data_record_len(struct tle_used *used)
{
tdb_len_t len;
len = sizeof(struct tdb_used_record)
+ used->key.dsize + used->data.dsize + used->extra;
assert(len >= sizeof(struct tdb_free_record));
return len;
}
static tdb_len_t hashtable_len(struct tle_hashtable *htable)
{
return sizeof(struct tdb_used_record)
+ (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
+ htable->extra;
}
static tdb_len_t freetable_len(struct tle_freetable *ftable)
{
return sizeof(struct tdb_freetable);
}
static void set_free_record(void *mem, tdb_len_t len)
{
/* We do all the work in add_to_freetable */
}
static void add_zero_pad(struct tdb_used_record *u, size_t len, size_t extra)
{
if (extra)
((char *)(u + 1))[len] = '\0';
}
static void set_data_record(void *mem, struct tdb_context *tdb,
struct tle_used *used)
{
struct tdb_used_record *u = mem;
set_header(tdb, u, TDB_USED_MAGIC, used->key.dsize, used->data.dsize,
used->key.dsize + used->data.dsize + used->extra,
tdb_hash(tdb, used->key.dptr, used->key.dsize));
memcpy(u + 1, used->key.dptr, used->key.dsize);
memcpy((char *)(u + 1) + used->key.dsize,
used->data.dptr, used->data.dsize);
add_zero_pad(u, used->key.dsize + used->data.dsize, used->extra);
}
static void set_hashtable(void *mem, struct tdb_context *tdb,
struct tle_hashtable *htable)
{
struct tdb_used_record *u = mem;
tdb_len_t len = sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS;
set_header(tdb, u, TDB_HTABLE_MAGIC, 0, len, len + htable->extra, 0);
memset(u + 1, 0, len);
add_zero_pad(u, len, htable->extra);
}
static void set_freetable(void *mem, struct tdb_context *tdb,
struct tle_freetable *freetable, struct tdb_header *hdr,
tdb_off_t last_ftable)
{
struct tdb_freetable *ftable = mem;
memset(ftable, 0, sizeof(*ftable));
set_header(tdb, &ftable->hdr, TDB_FTABLE_MAGIC, 0,
sizeof(*ftable) - sizeof(ftable->hdr),
sizeof(*ftable) - sizeof(ftable->hdr), 0);
if (last_ftable) {
ftable = (struct tdb_freetable *)((char *)hdr + last_ftable);
ftable->next = freetable->base.off;
} else {
hdr->free_table = freetable->base.off;
}
}
static void add_to_freetable(struct tdb_context *tdb,
tdb_off_t eoff,
tdb_off_t elen,
unsigned ftable,
struct tle_freetable *freetable)
{
tdb->ftable_off = freetable->base.off;
tdb->ftable = ftable;
add_free_record(tdb, eoff, sizeof(struct tdb_used_record) + elen,
TDB_LOCK_WAIT, false);
}
static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned ingroup)
{
return group_start
+ (ingroup % (1 << TDB_HASH_GROUP_BITS)) * sizeof(tdb_off_t);
}
/* Get bits from a value. */
static uint32_t bits(uint64_t val, unsigned start, unsigned num)
{
assert(num <= 32);
return (val >> start) & ((1U << num) - 1);
}
/* We take bits from the top: that way we can lock whole sections of the hash
* by using lock ranges. */
static uint32_t use_bits(uint64_t h, unsigned num, unsigned *used)
{
*used += num;
return bits(h, 64 - *used, num);
}
static tdb_off_t encode_offset(tdb_off_t new_off, unsigned bucket,
uint64_t h)
{
return bucket
| new_off
| ((uint64_t)bits(h, 64 - TDB_OFF_UPPER_STEAL_EXTRA,
TDB_OFF_UPPER_STEAL_EXTRA)
<< TDB_OFF_HASH_EXTRA_BIT);
}
/* FIXME: Our hash table handling here is primitive: we don't expand! */
static void add_to_hashtable(struct tdb_context *tdb,
tdb_off_t eoff,
struct tdb_data key)
{
uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
tdb_off_t b_off, group_start;
unsigned i, group, in_group;
unsigned used = 0;
group = use_bits(h, TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS, &used);
in_group = use_bits(h, TDB_HASH_GROUP_BITS, &used);
group_start = offsetof(struct tdb_header, hashtable)
+ group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
unsigned bucket = (in_group + i) % (1 << TDB_HASH_GROUP_BITS);
b_off = hbucket_off(group_start, bucket);
if (tdb_read_off(tdb, b_off) == 0) {
tdb_write_off(tdb, b_off,
encode_offset(eoff, bucket, h));
return;
}
}
abort();
}
static struct tle_freetable *find_ftable(struct tdb_layout *layout, unsigned num)
{
unsigned i;
for (i = 0; i < layout->num_elems; i++) {
if (layout->elem[i].base.type != FREETABLE)
continue;
if (num == 0)
return &layout->elem[i].ftable;
num--;
}
abort();
}
/* FIXME: Support TDB_CONVERT */
struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
{
unsigned int i;
tdb_off_t off, len, last_ftable;
char *mem;
struct tdb_context *tdb;
off = sizeof(struct tdb_header);
/* First pass of layout: calc lengths */
for (i = 0; i < layout->num_elems; i++) {
union tdb_layout_elem *e = &layout->elem[i];
e->base.off = off;
switch (e->base.type) {
case FREETABLE:
len = freetable_len(&e->ftable);
break;
case FREE:
len = free_record_len(e->free.len);
break;
case DATA:
len = data_record_len(&e->used);
break;
case HASHTABLE:
len = hashtable_len(&e->hashtable);
break;
default:
abort();
}
off += len;
}
mem = malloc(off);
/* Fill with some weird pattern. */
memset(mem, 0x99, off);
/* Now populate our header, cribbing from a real TDB header. */
tdb = tdb_open(NULL, TDB_INTERNAL, O_RDWR, 0, &tap_log_attr);
memcpy(mem, tdb->file->map_ptr, sizeof(struct tdb_header));
/* Mug the tdb we have to make it use this. */
free(tdb->file->map_ptr);
tdb->file->map_ptr = mem;
tdb->file->map_size = off;
last_ftable = 0;
for (i = 0; i < layout->num_elems; i++) {
union tdb_layout_elem *e = &layout->elem[i];
switch (e->base.type) {
case FREETABLE:
set_freetable(mem + e->base.off, tdb, &e->ftable,
(struct tdb_header *)mem, last_ftable);
last_ftable = e->base.off;
break;
case FREE:
set_free_record(mem + e->base.off, e->free.len);
break;
case DATA:
set_data_record(mem + e->base.off, tdb, &e->used);
break;
case HASHTABLE:
set_hashtable(mem + e->base.off, tdb, &e->hashtable);
break;
}
}
/* Must have a free table! */
assert(last_ftable);
/* Now fill the free and hash tables. */
for (i = 0; i < layout->num_elems; i++) {
union tdb_layout_elem *e = &layout->elem[i];
switch (e->base.type) {
case FREE:
add_to_freetable(tdb, e->base.off, e->free.len,
e->free.ftable_num,
find_ftable(layout, e->free.ftable_num));
break;
case DATA:
add_to_hashtable(tdb, e->base.off, e->used.key);
break;
default:
break;
}
}
tdb->ftable_off = find_ftable(layout, 0)->base.off;
/* Get physical if they asked for it. */
if (layout->filename) {
int fd = open(layout->filename, O_WRONLY|O_TRUNC|O_CREAT,
0600);
if (fd < 0)
err(1, "opening %s for writing", layout->filename);
if (write(fd, tdb->file->map_ptr, tdb->file->map_size)
!= tdb->file->map_size)
err(1, "writing %s", layout->filename);
close(fd);
tdb_close(tdb);
/* NOMMAP is for lockcheck. */
tdb = tdb_open(layout->filename, TDB_NOMMAP, O_RDWR, 0,
&tap_log_attr);
}
return tdb;
}
void tdb_layout_free(struct tdb_layout *layout)
{
unsigned int i;
for (i = 0; i < layout->num_elems; i++) {
if (layout->elem[i].base.type == DATA) {
free(layout->elem[i].used.key.dptr);
free(layout->elem[i].used.data.dptr);
}
}
free(layout->elem);
free(layout);
}

68
lib/tdb2/test/layout.h Normal file
View File

@ -0,0 +1,68 @@
#ifndef TDB2_TEST_LAYOUT_H
#define TDB2_TEST_LAYOUT_H
#include <ccan/tdb2/private.h>
struct tdb_layout *new_tdb_layout(const char *filename);
void tdb_layout_add_freetable(struct tdb_layout *layout);
void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len,
unsigned ftable);
void tdb_layout_add_used(struct tdb_layout *layout,
TDB_DATA key, TDB_DATA data,
tdb_len_t extra);
#if 0 /* FIXME: Allow allocation of subtables */
void tdb_layout_add_hashtable(struct tdb_layout *layout,
int htable_parent, /* -1 == toplevel */
unsigned int bucket,
tdb_len_t extra);
#endif
struct tdb_context *tdb_layout_get(struct tdb_layout *layout);
void tdb_layout_free(struct tdb_layout *layout);
enum layout_type {
FREETABLE, FREE, DATA, HASHTABLE,
};
/* Shared by all union members. */
struct tle_base {
enum layout_type type;
tdb_off_t off;
};
struct tle_freetable {
struct tle_base base;
};
struct tle_free {
struct tle_base base;
tdb_len_t len;
unsigned ftable_num;
};
struct tle_used {
struct tle_base base;
TDB_DATA key;
TDB_DATA data;
tdb_len_t extra;
};
struct tle_hashtable {
struct tle_base base;
int parent;
unsigned int bucket;
tdb_len_t extra;
};
union tdb_layout_elem {
struct tle_base base;
struct tle_freetable ftable;
struct tle_free free;
struct tle_used used;
struct tle_hashtable hashtable;
};
struct tdb_layout {
const char *filename;
unsigned int num_elems;
union tdb_layout_elem *elem;
};
#endif /* TDB2_TEST_LAYOUT_H */

View File

@ -0,0 +1,147 @@
/* We save the locks so we can reaquire them. */
#include <unistd.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdlib.h>
#include <ccan/tap/tap.h>
#include <ccan/tdb2/private.h>
#include "lock-tracking.h"
struct lock {
struct lock *next;
unsigned int off;
unsigned int len;
int type;
};
static struct lock *locks;
int locking_errors = 0;
bool suppress_lockcheck = false;
bool nonblocking_locks;
int locking_would_block = 0;
void (*unlock_callback)(int fd);
int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ )
{
va_list ap;
int ret, arg3;
struct flock *fl;
bool may_block = false;
if (cmd != F_SETLK && cmd != F_SETLKW) {
/* This may be totally bogus, but we don't know in general. */
va_start(ap, cmd);
arg3 = va_arg(ap, int);
va_end(ap);
return fcntl(fd, cmd, arg3);
}
va_start(ap, cmd);
fl = va_arg(ap, struct flock *);
va_end(ap);
if (cmd == F_SETLKW && nonblocking_locks) {
cmd = F_SETLK;
may_block = true;
}
ret = fcntl(fd, cmd, fl);
/* Detect when we failed, but might have been OK if we waited. */
if (may_block && ret == -1 && (errno == EAGAIN || errno == EACCES)) {
locking_would_block++;
}
if (fl->l_type == F_UNLCK) {
struct lock **l;
struct lock *old = NULL;
for (l = &locks; *l; l = &(*l)->next) {
if ((*l)->off == fl->l_start
&& (*l)->len == fl->l_len) {
if (ret == 0) {
old = *l;
*l = (*l)->next;
free(old);
}
break;
}
}
if (!old && !suppress_lockcheck) {
diag("Unknown unlock %u@%u - %i",
(int)fl->l_len, (int)fl->l_start, ret);
locking_errors++;
}
} else {
struct lock *new, *i;
unsigned int fl_end = fl->l_start + fl->l_len;
if (fl->l_len == 0)
fl_end = (unsigned int)-1;
/* Check for overlaps: we shouldn't do this. */
for (i = locks; i; i = i->next) {
unsigned int i_end = i->off + i->len;
if (i->len == 0)
i_end = (unsigned int)-1;
if (fl->l_start >= i->off && fl->l_start < i_end)
break;
if (fl_end > i->off && fl_end < i_end)
break;
/* tdb_allrecord_lock does this, handle adjacent: */
if (fl->l_start > TDB_HASH_LOCK_START
&& fl->l_start == i_end && fl->l_type == i->type) {
if (ret == 0) {
i->len = fl->l_len
? i->len + fl->l_len
: 0;
}
goto done;
}
}
if (i) {
/* Special case: upgrade of allrecord lock. */
if (i->type == F_RDLCK && fl->l_type == F_WRLCK
&& i->off == TDB_HASH_LOCK_START
&& fl->l_start == TDB_HASH_LOCK_START
&& i->len == 0
&& fl->l_len == 0) {
if (ret == 0)
i->type = F_WRLCK;
goto done;
}
if (!suppress_lockcheck) {
diag("%s lock %u@%u overlaps %u@%u",
fl->l_type == F_WRLCK ? "write" : "read",
(int)fl->l_len, (int)fl->l_start,
i->len, (int)i->off);
locking_errors++;
}
}
if (ret == 0) {
new = malloc(sizeof *new);
new->off = fl->l_start;
new->len = fl->l_len;
new->type = fl->l_type;
new->next = locks;
locks = new;
}
}
done:
if (ret == 0 && fl->l_type == F_UNLCK && unlock_callback)
unlock_callback(fd);
return ret;
}
unsigned int forget_locking(void)
{
unsigned int num = 0;
while (locks) {
struct lock *next = locks->next;
free(locks);
locks = next;
num++;
}
return num;
}

View File

@ -0,0 +1,25 @@
#ifndef LOCK_TRACKING_H
#define LOCK_TRACKING_H
#include <stdbool.h>
/* Set this if you want a callback after fnctl unlock. */
extern void (*unlock_callback)(int fd);
/* Replacement fcntl. */
int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ );
/* Discard locking info: returns number of locks outstanding. */
unsigned int forget_locking(void);
/* Number of errors in locking. */
extern int locking_errors;
/* Suppress lock checking. */
extern bool suppress_lockcheck;
/* Make all locks non-blocking. */
extern bool nonblocking_locks;
/* Number of times we failed a lock because we made it non-blocking. */
extern int locking_would_block;
#endif /* LOCK_TRACKING_H */

24
lib/tdb2/test/logging.c Normal file
View File

@ -0,0 +1,24 @@
#include <stdio.h>
#include <stdlib.h>
#include <ccan/tap/tap.h>
#include "logging.h"
unsigned tap_log_messages;
const char *log_prefix = "";
bool suppress_logging;
union tdb_attribute tap_log_attr = {
.log = { .base = { .attr = TDB_ATTRIBUTE_LOG },
.fn = tap_log_fn }
};
void tap_log_fn(struct tdb_context *tdb,
enum tdb_log_level level,
const char *message, void *priv)
{
if (suppress_logging)
return;
diag("tdb log level %u: %s%s", level, log_prefix, message);
tap_log_messages++;
}

15
lib/tdb2/test/logging.h Normal file
View File

@ -0,0 +1,15 @@
#ifndef TDB2_TEST_LOGGING_H
#define TDB2_TEST_LOGGING_H
#include <ccan/tdb2/tdb2.h>
#include <stdbool.h>
#include <string.h>
extern bool suppress_logging;
extern const char *log_prefix;
extern unsigned tap_log_messages;
extern union tdb_attribute tap_log_attr;
void tap_log_fn(struct tdb_context *tdb,
enum tdb_log_level level,
const char *message, void *priv);
#endif /* TDB2_TEST_LOGGING_H */

View File

@ -0,0 +1,48 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_used_record rec;
struct tdb_context tdb = { .log_fn = tap_log_fn };
plan_tests(64 + 32 + 48*6 + 1);
/* We should be able to encode any data value. */
for (i = 0; i < 64; i++)
ok1(set_header(&tdb, &rec, TDB_USED_MAGIC, 0, 1ULL << i,
1ULL << i, 0) == 0);
/* And any key and data with < 64 bits between them. */
for (i = 0; i < 32; i++) {
tdb_len_t dlen = 1ULL >> (63 - i), klen = 1ULL << i;
ok1(set_header(&tdb, &rec, TDB_USED_MAGIC, klen, dlen,
klen + dlen, 0) == 0);
}
/* We should neatly encode all values. */
for (i = 0; i < 48; i++) {
uint64_t h = 1ULL << (i < 5 ? i : 4);
uint64_t klen = 1ULL << (i < 16 ? i : 15);
uint64_t dlen = 1ULL << i;
uint64_t xlen = 1ULL << (i < 32 ? i : 31);
ok1(set_header(&tdb, &rec, TDB_USED_MAGIC, klen, dlen,
klen+dlen+xlen, h) == 0);
ok1(rec_key_length(&rec) == klen);
ok1(rec_data_length(&rec) == dlen);
ok1(rec_extra_padding(&rec) == xlen);
ok1((uint64_t)rec_hash(&rec) == h);
ok1(rec_magic(&rec) == TDB_USED_MAGIC);
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,40 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
static unsigned int dumb_fls(uint64_t num)
{
int i;
for (i = 63; i >= 0; i--) {
if (num & (1ULL << i))
break;
}
return i + 1;
}
int main(int argc, char *argv[])
{
unsigned int i, j;
plan_tests(64 * 64 + 2);
ok1(fls64(0) == 0);
ok1(dumb_fls(0) == 0);
for (i = 0; i < 64; i++) {
for (j = 0; j < 64; j++) {
uint64_t val = (1ULL << i) | (1ULL << j);
ok(fls64(val) == dumb_fls(val),
"%llu -> %u should be %u", (long long)val,
fls64(val), dumb_fls(val));
}
}
return exit_status();
}

View File

@ -0,0 +1,42 @@
#include <ccan/failtest/failtest_override.h>
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include <ccan/failtest/failtest.h>
#include "logging.h"
#include "failtest_helper.h"
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
failtest_init(argc, argv);
failtest_hook = block_repeat_failures;
failtest_exit_check = exit_check_log;
plan_tests(sizeof(flags) / sizeof(flags[0]) * 3);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-new_database.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
if (!ok1(tdb))
failtest_exit(exit_status());
if (tdb) {
bool ok = ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
if (!ok)
failtest_exit(exit_status());
}
if (!ok1(tap_log_messages == 0))
break;
}
failtest_exit(exit_status());
}

View File

@ -0,0 +1,80 @@
#include <ccan/failtest/failtest_override.h>
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tap/tap.h>
#include <ccan/failtest/failtest.h>
#include "logging.h"
#include "failtest_helper.h"
static bool failtest_suppress = false;
/* Don't need to test everything here, just want expand testing. */
static enum failtest_result
suppress_failure(struct failtest_call *history, unsigned num)
{
if (failtest_suppress)
return FAIL_DONT_FAIL;
return block_repeat_failures(history, num);
}
int main(int argc, char *argv[])
{
unsigned int i;
uint64_t val;
struct tdb_context *tdb;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
plan_tests(sizeof(flags) / sizeof(flags[0]) * 11 + 1);
failtest_init(argc, argv);
failtest_hook = suppress_failure;
failtest_exit_check = exit_check_log;
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
failtest_suppress = true;
tdb = tdb_open("run-expand.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
if (!ok1(tdb))
break;
val = tdb->file->map_size;
/* Need some hash lock for expand. */
ok1(tdb_lock_hashes(tdb, 0, 1, F_WRLCK, TDB_LOCK_WAIT) == 0);
failtest_suppress = false;
if (!ok1(tdb_expand(tdb, 1) == 0)) {
failtest_suppress = true;
tdb_close(tdb);
break;
}
failtest_suppress = true;
ok1(tdb->file->map_size >= val + 1 * TDB_EXTENSION_FACTOR);
ok1(tdb_unlock_hashes(tdb, 0, 1, F_WRLCK) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
val = tdb->file->map_size;
ok1(tdb_lock_hashes(tdb, 0, 1, F_WRLCK, TDB_LOCK_WAIT) == 0);
failtest_suppress = false;
if (!ok1(tdb_expand(tdb, 1024) == 0)) {
failtest_suppress = true;
tdb_close(tdb);
break;
}
failtest_suppress = true;
ok1(tdb_unlock_hashes(tdb, 0, 1, F_WRLCK) == 0);
ok1(tdb->file->map_size >= val + 1024 * TDB_EXTENSION_FACTOR);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
failtest_exit(exit_status());
}

View File

@ -0,0 +1,170 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
#include "layout.h"
static tdb_len_t free_record_length(struct tdb_context *tdb, tdb_off_t off)
{
struct tdb_free_record f;
enum TDB_ERROR ecode;
ecode = tdb_read_convert(tdb, off, &f, sizeof(f));
if (ecode != TDB_SUCCESS)
return ecode;
if (frec_magic(&f) != TDB_FREE_MAGIC)
return TDB_ERR_CORRUPT;
return frec_len(&f);
}
int main(int argc, char *argv[])
{
tdb_off_t b_off, test;
struct tdb_context *tdb;
struct tdb_layout *layout;
struct tdb_data data, key;
tdb_len_t len;
/* FIXME: Test TDB_CONVERT */
/* FIXME: Test lock order fail. */
plan_tests(42);
data = tdb_mkdata("world", 5);
key = tdb_mkdata("hello", 5);
/* No coalescing can be done due to EOF */
layout = new_tdb_layout("run-03-coalesce.tdb");
tdb_layout_add_freetable(layout);
len = 1024;
tdb_layout_add_free(layout, len, 0);
tdb = tdb_layout_get(layout);
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(free_record_length(tdb, layout->elem[1].base.off) == len);
/* Figure out which bucket free entry is. */
b_off = bucket_off(tdb->ftable_off, size_to_bucket(len));
/* Lock and fail to coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
test = layout->elem[1].base.off;
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, len, &test)
== 0);
tdb_unlock_free_bucket(tdb, b_off);
ok1(free_record_length(tdb, layout->elem[1].base.off) == len);
ok1(test == layout->elem[1].base.off);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
tdb_layout_free(layout);
/* No coalescing can be done due to used record */
layout = new_tdb_layout("run-03-coalesce.tdb");
tdb_layout_add_freetable(layout);
tdb_layout_add_free(layout, 1024, 0);
tdb_layout_add_used(layout, key, data, 6);
tdb = tdb_layout_get(layout);
ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Figure out which bucket free entry is. */
b_off = bucket_off(tdb->ftable_off, size_to_bucket(1024));
/* Lock and fail to coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
test = layout->elem[1].base.off;
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test)
== 0);
tdb_unlock_free_bucket(tdb, b_off);
ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
ok1(test == layout->elem[1].base.off);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
tdb_layout_free(layout);
/* Coalescing can be done due to two free records, then EOF */
layout = new_tdb_layout("run-03-coalesce.tdb");
tdb_layout_add_freetable(layout);
tdb_layout_add_free(layout, 1024, 0);
tdb_layout_add_free(layout, 2048, 0);
tdb = tdb_layout_get(layout);
ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
ok1(free_record_length(tdb, layout->elem[2].base.off) == 2048);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Figure out which bucket (first) free entry is. */
b_off = bucket_off(tdb->ftable_off, size_to_bucket(1024));
/* Lock and coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
test = layout->elem[2].base.off;
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test)
== 1024 + sizeof(struct tdb_used_record) + 2048);
/* Should tell us it's erased this one... */
ok1(test == TDB_ERR_NOEXIST);
ok1(tdb->file->allrecord_lock.count == 0 && tdb->file->num_lockrecs == 0);
ok1(free_record_length(tdb, layout->elem[1].base.off)
== 1024 + sizeof(struct tdb_used_record) + 2048);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
tdb_layout_free(layout);
/* Coalescing can be done due to two free records, then data */
layout = new_tdb_layout("run-03-coalesce.tdb");
tdb_layout_add_freetable(layout);
tdb_layout_add_free(layout, 1024, 0);
tdb_layout_add_free(layout, 512, 0);
tdb_layout_add_used(layout, key, data, 6);
tdb = tdb_layout_get(layout);
ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
ok1(free_record_length(tdb, layout->elem[2].base.off) == 512);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Figure out which bucket free entry is. */
b_off = bucket_off(tdb->ftable_off, size_to_bucket(1024));
/* Lock and coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
test = layout->elem[2].base.off;
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test)
== 1024 + sizeof(struct tdb_used_record) + 512);
ok1(tdb->file->allrecord_lock.count == 0 && tdb->file->num_lockrecs == 0);
ok1(free_record_length(tdb, layout->elem[1].base.off)
== 1024 + sizeof(struct tdb_used_record) + 512);
ok1(test == TDB_ERR_NOEXIST);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
tdb_layout_free(layout);
/* Coalescing can be done due to three free records, then EOF */
layout = new_tdb_layout("run-03-coalesce.tdb");
tdb_layout_add_freetable(layout);
tdb_layout_add_free(layout, 1024, 0);
tdb_layout_add_free(layout, 512, 0);
tdb_layout_add_free(layout, 256, 0);
tdb = tdb_layout_get(layout);
ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024);
ok1(free_record_length(tdb, layout->elem[2].base.off) == 512);
ok1(free_record_length(tdb, layout->elem[3].base.off) == 256);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Figure out which bucket free entry is. */
b_off = bucket_off(tdb->ftable_off, size_to_bucket(1024));
/* Lock and coalesce. */
ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0);
test = layout->elem[2].base.off;
ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test)
== 1024 + sizeof(struct tdb_used_record) + 512
+ sizeof(struct tdb_used_record) + 256);
ok1(tdb->file->allrecord_lock.count == 0
&& tdb->file->num_lockrecs == 0);
ok1(free_record_length(tdb, layout->elem[1].base.off)
== 1024 + sizeof(struct tdb_used_record) + 512
+ sizeof(struct tdb_used_record) + 256);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
tdb_layout_free(layout);
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,267 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
/* We rig the hash so adjacent-numbered records always clash. */
static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv)
{
return ((uint64_t)*(const unsigned int *)key)
<< (64 - TDB_TOPLEVEL_HASH_BITS - 1);
}
int main(int argc, char *argv[])
{
unsigned int i, j;
struct tdb_context *tdb;
unsigned int v;
struct tdb_used_record rec;
struct tdb_data key = { (unsigned char *)&v, sizeof(v) };
struct tdb_data dbuf = { (unsigned char *)&v, sizeof(v) };
union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.fn = clash } };
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT,
};
hattr.base.next = &tap_log_attr;
plan_tests(sizeof(flags) / sizeof(flags[0])
* (91 + (2 * ((1 << TDB_HASH_GROUP_BITS) - 1))) + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
struct hash_info h;
tdb_off_t new_off, off, subhash;
tdb = tdb_open("run-04-basichash.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
ok1(tdb);
if (!tdb)
continue;
v = 0;
/* Should not find it. */
ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have located space in group 0, bucket 0. */
ok1(h.group_start == offsetof(struct tdb_header, hashtable));
ok1(h.home_bucket == 0);
ok1(h.found_bucket == 0);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS);
/* Should have lock on bucket 0 */
ok1(h.hlock_start == 0);
ok1(h.hlock_range ==
1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1);
ok1((tdb->flags & TDB_NOLOCK)
|| tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START);
/* FIXME: Check lock length */
/* Allocate a new record. */
new_off = alloc(tdb, key.dsize, dbuf.dsize, h.h,
TDB_USED_MAGIC, false);
ok1(!TDB_OFF_IS_ERR(new_off));
/* We should be able to add it now. */
ok1(add_to_hash(tdb, &h, new_off) == 0);
/* Make sure we fill it in for later finding. */
off = new_off + sizeof(struct tdb_used_record);
ok1(!tdb->methods->twrite(tdb, off, key.dptr, key.dsize));
off += key.dsize;
ok1(!tdb->methods->twrite(tdb, off, dbuf.dptr, dbuf.dsize));
/* We should be able to unlock that OK. */
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_WRLCK) == 0);
/* Database should be consistent. */
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Now, this should give a successful lookup. */
ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL)
== new_off);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have located space in group 0, bucket 0. */
ok1(h.group_start == offsetof(struct tdb_header, hashtable));
ok1(h.home_bucket == 0);
ok1(h.found_bucket == 0);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS);
/* Should have lock on bucket 0 */
ok1(h.hlock_start == 0);
ok1(h.hlock_range ==
1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1);
ok1((tdb->flags & TDB_NOLOCK)
|| tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START);
/* FIXME: Check lock length */
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_WRLCK) == 0);
/* Database should be consistent. */
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Test expansion. */
v = 1;
ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have located space in group 0, bucket 1. */
ok1(h.group_start == offsetof(struct tdb_header, hashtable));
ok1(h.home_bucket == 0);
ok1(h.found_bucket == 1);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS);
/* Should have lock on bucket 0 */
ok1(h.hlock_start == 0);
ok1(h.hlock_range ==
1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1);
ok1((tdb->flags & TDB_NOLOCK)
|| tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START);
/* FIXME: Check lock length */
/* Make it expand 0'th bucket. */
ok1(expand_group(tdb, &h) == 0);
/* First one should be subhash, next should be empty. */
ok1(is_subhash(h.group[0]));
subhash = (h.group[0] & TDB_OFF_MASK);
for (j = 1; j < (1 << TDB_HASH_GROUP_BITS); j++)
ok1(h.group[j] == 0);
ok1(tdb_write_convert(tdb, h.group_start,
h.group, sizeof(h.group)) == 0);
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_WRLCK) == 0);
/* Should be happy with expansion. */
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Should be able to find it. */
v = 0;
ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL)
== new_off);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have located space in expanded group 0, bucket 0. */
ok1(h.group_start == subhash + sizeof(struct tdb_used_record));
ok1(h.home_bucket == 0);
ok1(h.found_bucket == 0);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
+ TDB_SUBLEVEL_HASH_BITS);
/* Should have lock on bucket 0 */
ok1(h.hlock_start == 0);
ok1(h.hlock_range ==
1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1);
ok1((tdb->flags & TDB_NOLOCK)
|| tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START);
/* FIXME: Check lock length */
/* Simple delete should work. */
ok1(delete_from_hash(tdb, &h) == 0);
ok1(add_free_record(tdb, new_off,
sizeof(struct tdb_used_record)
+ rec_key_length(&rec)
+ rec_data_length(&rec)
+ rec_extra_padding(&rec),
TDB_LOCK_NOWAIT, false) == 0);
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_WRLCK) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Test second-level expansion: should expand 0th bucket. */
v = 0;
ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have located space in group 0, bucket 0. */
ok1(h.group_start == subhash + sizeof(struct tdb_used_record));
ok1(h.home_bucket == 0);
ok1(h.found_bucket == 0);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS+TDB_SUBLEVEL_HASH_BITS);
/* Should have lock on bucket 0 */
ok1(h.hlock_start == 0);
ok1(h.hlock_range ==
1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS)));
ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1);
ok1((tdb->flags & TDB_NOLOCK)
|| tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START);
/* FIXME: Check lock length */
ok1(expand_group(tdb, &h) == 0);
/* First one should be subhash, next should be empty. */
ok1(is_subhash(h.group[0]));
subhash = (h.group[0] & TDB_OFF_MASK);
for (j = 1; j < (1 << TDB_HASH_GROUP_BITS); j++)
ok1(h.group[j] == 0);
ok1(tdb_write_convert(tdb, h.group_start,
h.group, sizeof(h.group)) == 0);
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_WRLCK) == 0);
/* Should be happy with expansion. */
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have located space in group 0, bucket 0. */
ok1(h.group_start == subhash + sizeof(struct tdb_used_record));
ok1(h.home_bucket == 0);
ok1(h.found_bucket == 0);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
+ TDB_SUBLEVEL_HASH_BITS * 2);
/* We should be able to add it now. */
/* Allocate a new record. */
new_off = alloc(tdb, key.dsize, dbuf.dsize, h.h,
TDB_USED_MAGIC, false);
ok1(!TDB_OFF_IS_ERR(new_off));
ok1(add_to_hash(tdb, &h, new_off) == 0);
/* Make sure we fill it in for later finding. */
off = new_off + sizeof(struct tdb_used_record);
ok1(!tdb->methods->twrite(tdb, off, key.dptr, key.dsize));
off += key.dsize;
ok1(!tdb->methods->twrite(tdb, off, dbuf.dptr, dbuf.dsize));
/* We should be able to unlock that OK. */
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_WRLCK) == 0);
/* Database should be consistent. */
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Should be able to find it. */
v = 0;
ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL)
== new_off);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have located space in expanded group 0, bucket 0. */
ok1(h.group_start == subhash + sizeof(struct tdb_used_record));
ok1(h.home_bucket == 0);
ok1(h.found_bucket == 0);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
+ TDB_SUBLEVEL_HASH_BITS * 2);
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,88 @@
#include <ccan/failtest/failtest_override.h>
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include <ccan/failtest/failtest.h>
#include "logging.h"
#include "failtest_helper.h"
static bool failtest_suppress = false;
/* Don't need to test everything here, just want expand testing. */
static enum failtest_result
suppress_failure(struct failtest_call *history, unsigned num)
{
if (failtest_suppress)
return FAIL_DONT_FAIL;
return block_repeat_failures(history, num);
}
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
struct tdb_data key = tdb_mkdata("key", 3);
struct tdb_data data = tdb_mkdata("data", 4), d;
union tdb_attribute seed_attr;
unsigned int msgs = 0;
failtest_init(argc, argv);
failtest_hook = suppress_failure;
failtest_exit_check = exit_check_log;
seed_attr.base.attr = TDB_ATTRIBUTE_SEED;
seed_attr.base.next = &tap_log_attr;
seed_attr.seed.seed = 0;
failtest_suppress = true;
plan_tests(sizeof(flags) / sizeof(flags[0]) * 11);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-05-readonly-open.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &seed_attr);
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
tdb_close(tdb);
failtest_suppress = false;
tdb = tdb_open("run-05-readonly-open.tdb", flags[i],
O_RDONLY, 0600, &tap_log_attr);
if (!ok1(tdb))
break;
ok1(tap_log_messages == msgs);
/* Fetch should succeed, stores should fail. */
if (!ok1(tdb_fetch(tdb, key, &d) == 0))
goto fail;
ok1(tdb_deq(d, data));
free(d.dptr);
if (!ok1(tdb_store(tdb, key, data, TDB_MODIFY)
== TDB_ERR_RDONLY))
goto fail;
ok1(tap_log_messages == ++msgs);
if (!ok1(tdb_store(tdb, key, data, TDB_INSERT)
== TDB_ERR_RDONLY))
goto fail;
ok1(tap_log_messages == ++msgs);
failtest_suppress = true;
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
ok1(tap_log_messages == msgs);
/* SIGH: failtest bug, it doesn't save the tdb file because
* we have it read-only. If we go around again, it gets
* changed underneath us and things get screwy. */
if (failtest_has_failed())
break;
}
failtest_exit(exit_status());
fail:
failtest_suppress = true;
tdb_close(tdb);
failtest_exit(exit_status());
}

View File

@ -0,0 +1,76 @@
#include <ccan/failtest/failtest_override.h>
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include <ccan/failtest/failtest.h>
#include "logging.h"
#include "failtest_helper.h"
static bool failtest_suppress = false;
/* Don't need to test everything here, just want expand testing. */
static enum failtest_result
suppress_failure(struct failtest_call *history, unsigned num)
{
if (failtest_suppress)
return FAIL_DONT_FAIL;
return block_repeat_failures(history, num);
}
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
struct tdb_data key = tdb_mkdata("key", 3);
struct tdb_data data = tdb_mkdata("data", 4);
failtest_init(argc, argv);
failtest_hook = suppress_failure;
failtest_exit_check = exit_check_log;
failtest_suppress = true;
plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-10-simple-store.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
if (!ok1(tdb))
break;
/* Modify should fail. */
failtest_suppress = false;
if (!ok1(tdb_store(tdb, key, data, TDB_MODIFY)
== TDB_ERR_NOEXIST))
goto fail;
failtest_suppress = true;
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Insert should succeed. */
failtest_suppress = false;
if (!ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0))
goto fail;
failtest_suppress = true;
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Second insert should fail. */
failtest_suppress = false;
if (!ok1(tdb_store(tdb, key, data, TDB_INSERT)
== TDB_ERR_EXISTS))
goto fail;
failtest_suppress = true;
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
failtest_exit(exit_status());
fail:
failtest_suppress = true;
tdb_close(tdb);
failtest_exit(exit_status());
}

View File

@ -0,0 +1,76 @@
#include <ccan/failtest/failtest_override.h>
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include <ccan/failtest/failtest.h>
#include "logging.h"
#include "failtest_helper.h"
static bool failtest_suppress = false;
/* Don't need to test everything here, just want fetch testing. */
static enum failtest_result
suppress_failure(struct failtest_call *history, unsigned num)
{
if (failtest_suppress)
return FAIL_DONT_FAIL;
return block_repeat_failures(history, num);
}
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
struct tdb_data key = tdb_mkdata("key", 3);
struct tdb_data data = tdb_mkdata("data", 4);
failtest_init(argc, argv);
failtest_hook = suppress_failure;
failtest_exit_check = exit_check_log;
failtest_suppress = true;
plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-11-simple-fetch.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (tdb) {
struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */
/* fetch should fail. */
failtest_suppress = false;
if (!ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_NOEXIST))
goto fail;
failtest_suppress = true;
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Insert should succeed. */
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Fetch should now work. */
failtest_suppress = false;
if (!ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS))
goto fail;
failtest_suppress = true;
ok1(tdb_deq(d, data));
free(d.dptr);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
}
}
ok1(tap_log_messages == 0);
return exit_status();
fail:
failtest_suppress = true;
tdb_close(tdb);
failtest_exit(exit_status());
}

View File

@ -0,0 +1,58 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
/* We use the same seed which we saw a failure on. */
static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p)
{
return hash64_stable((const unsigned char *)key, len,
*(uint64_t *)p);
}
int main(int argc, char *argv[])
{
unsigned int i, j;
struct tdb_context *tdb;
uint64_t seed = 16014841315512641303ULL;
union tdb_attribute fixed_hattr
= { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.fn = fixedhash,
.data = &seed } };
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
struct tdb_data key = { (unsigned char *)&j, sizeof(j) };
struct tdb_data data = { (unsigned char *)&j, sizeof(j) };
fixed_hattr.base.next = &tap_log_attr;
plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 500 * 3) + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-12-store.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr);
ok1(tdb);
if (!tdb)
continue;
/* We seemed to lose some keys.
* Insert and check they're in there! */
for (j = 0; j < 500; j++) {
struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */
ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0);
ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
ok1(tdb_deq(d, data));
free(d.dptr);
}
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,207 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
/* We rig the hash so adjacent-numbered records always clash. */
static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv)
{
return ((uint64_t)*(const unsigned int *)key)
<< (64 - TDB_TOPLEVEL_HASH_BITS - 1);
}
/* We use the same seed which we saw a failure on. */
static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p)
{
return hash64_stable((const unsigned char *)key, len,
*(uint64_t *)p);
}
static bool store_records(struct tdb_context *tdb)
{
int i;
struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
struct tdb_data d, data = { (unsigned char *)&i, sizeof(i) };
for (i = 0; i < 1000; i++) {
if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
return false;
tdb_fetch(tdb, key, &d);
if (!tdb_deq(d, data))
return false;
free(d.dptr);
}
return true;
}
static void test_val(struct tdb_context *tdb, uint64_t val)
{
uint64_t v;
struct tdb_data key = { (unsigned char *)&v, sizeof(v) };
struct tdb_data d, data = { (unsigned char *)&v, sizeof(v) };
/* Insert an entry, then delete it. */
v = val;
/* Delete should fail. */
ok1(tdb_delete(tdb, key) == TDB_ERR_NOEXIST);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Insert should succeed. */
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Delete should succeed. */
ok1(tdb_delete(tdb, key) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Re-add it, then add collision. */
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
v = val + 1;
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Can find both? */
ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
ok1(d.dsize == data.dsize);
free(d.dptr);
v = val;
ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
ok1(d.dsize == data.dsize);
free(d.dptr);
/* Delete second one. */
v = val + 1;
ok1(tdb_delete(tdb, key) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Re-add */
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Now, try deleting first one. */
v = val;
ok1(tdb_delete(tdb, key) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Can still find second? */
v = val + 1;
ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
ok1(d.dsize == data.dsize);
free(d.dptr);
/* Now, this will be ideally placed. */
v = val + 2;
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* This will collide with both. */
v = val;
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
/* We can still find them all, right? */
ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
ok1(d.dsize == data.dsize);
free(d.dptr);
v = val + 1;
ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
ok1(d.dsize == data.dsize);
free(d.dptr);
v = val + 2;
ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
ok1(d.dsize == data.dsize);
free(d.dptr);
/* And if we delete val + 1, that val + 2 should not move! */
v = val + 1;
ok1(tdb_delete(tdb, key) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
v = val;
ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
ok1(d.dsize == data.dsize);
free(d.dptr);
v = val + 2;
ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
ok1(d.dsize == data.dsize);
free(d.dptr);
/* Delete those two, so we are empty. */
ok1(tdb_delete(tdb, key) == 0);
v = val;
ok1(tdb_delete(tdb, key) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
}
int main(int argc, char *argv[])
{
unsigned int i, j;
struct tdb_context *tdb;
uint64_t seed = 16014841315512641303ULL;
union tdb_attribute clash_hattr
= { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.fn = clash } };
union tdb_attribute fixed_hattr
= { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.fn = fixedhash,
.data = &seed } };
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
/* These two values gave trouble before. */
int vals[] = { 755, 837 };
clash_hattr.base.next = &tap_log_attr;
fixed_hattr.base.next = &tap_log_attr;
plan_tests(sizeof(flags) / sizeof(flags[0])
* (39 * 3 + 5 + sizeof(vals)/sizeof(vals[0])*2) + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-13-delete.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &clash_hattr);
ok1(tdb);
if (!tdb)
continue;
/* Check start of hash table. */
test_val(tdb, 0);
/* Check end of hash table. */
test_val(tdb, -1ULL);
/* Check mixed bitpattern. */
test_val(tdb, 0x123456789ABCDEF0ULL);
ok1(!tdb->file || (tdb->file->allrecord_lock.count == 0
&& tdb->file->num_lockrecs == 0));
tdb_close(tdb);
/* Deleting these entries in the db gave problems. */
tdb = tdb_open("run-13-delete.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr);
ok1(tdb);
if (!tdb)
continue;
ok1(store_records(tdb));
ok1(tdb_check(tdb, NULL, NULL) == 0);
for (j = 0; j < sizeof(vals)/sizeof(vals[0]); j++) {
struct tdb_data key;
key.dptr = (unsigned char *)&vals[j];
key.dsize = sizeof(vals[j]);
ok1(tdb_delete(tdb, key) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
}
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,57 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
static bool test_records(struct tdb_context *tdb)
{
int i;
struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
for (i = 0; i < 1000; i++) {
if (tdb_exists(tdb, key))
return false;
if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
return false;
if (!tdb_exists(tdb, key))
return false;
}
for (i = 0; i < 1000; i++) {
if (!tdb_exists(tdb, key))
return false;
if (tdb_delete(tdb, key) != 0)
return false;
if (tdb_exists(tdb, key))
return false;
}
return true;
}
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-14-exists.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
if (ok1(tdb))
ok1(test_records(tdb));
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,135 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include <ccan/ilog/ilog.h>
#include "logging.h"
#define MAX_SIZE 13100
#define SIZE_STEP 131
static tdb_off_t tdb_offset(struct tdb_context *tdb, struct tdb_data key)
{
tdb_off_t off;
struct tdb_used_record rec;
struct hash_info h;
off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
if (TDB_OFF_IS_ERR(off))
return 0;
tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
return off;
}
int main(int argc, char *argv[])
{
unsigned int i, j, moves;
struct tdb_context *tdb;
unsigned char *buffer;
tdb_off_t oldoff = 0, newoff;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
struct tdb_data key = tdb_mkdata("key", 3);
struct tdb_data data;
buffer = malloc(MAX_SIZE);
for (i = 0; i < MAX_SIZE; i++)
buffer[i] = i;
plan_tests(sizeof(flags) / sizeof(flags[0])
* ((3 + MAX_SIZE/SIZE_STEP * 5) * 2 + 7)
+ 1);
/* Using tdb_store. */
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-append.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (!tdb)
continue;
moves = 0;
for (j = 0; j < MAX_SIZE; j += SIZE_STEP) {
data.dptr = buffer;
data.dsize = j;
ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
ok1(data.dsize == j);
ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
free(data.dptr);
newoff = tdb_offset(tdb, key);
if (newoff != oldoff)
moves++;
oldoff = newoff;
}
ok1(!tdb->file || (tdb->file->allrecord_lock.count == 0
&& tdb->file->num_lockrecs == 0));
/* We should increase by 50% each time... */
ok(moves <= ilog64(j / SIZE_STEP)*2, "Moved %u times", moves);
tdb_close(tdb);
}
/* Using tdb_append. */
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
size_t prev_len = 0;
tdb = tdb_open("run-append.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (!tdb)
continue;
moves = 0;
for (j = 0; j < MAX_SIZE; j += SIZE_STEP) {
data.dptr = buffer + prev_len;
data.dsize = j - prev_len;
ok1(tdb_append(tdb, key, data) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
ok1(data.dsize == j);
ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
free(data.dptr);
prev_len = data.dsize;
newoff = tdb_offset(tdb, key);
if (newoff != oldoff)
moves++;
oldoff = newoff;
}
ok1(!tdb->file || (tdb->file->allrecord_lock.count == 0
&& tdb->file->num_lockrecs == 0));
/* We should increase by 50% each time... */
ok(moves <= ilog64(j / SIZE_STEP)*2, "Moved %u times", moves);
tdb_close(tdb);
}
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-append.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (!tdb)
continue;
/* Huge initial store. */
data.dptr = buffer;
data.dsize = MAX_SIZE;
ok1(tdb_append(tdb, key, data) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
ok1(data.dsize == MAX_SIZE);
ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
free(data.dptr);
ok1(!tdb->file || (tdb->file->allrecord_lock.count == 0
&& tdb->file->num_lockrecs == 0));
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
free(buffer);
return exit_status();
}

View File

@ -0,0 +1,50 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/traverse.c>
#include <ccan/tap/tap.h>
#include "logging.h"
static bool add_records(struct tdb_context *tdb)
{
int i;
struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
for (i = 0; i < 1000; i++) {
if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
return false;
}
return true;
}
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-16-wipe_all.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
if (ok1(tdb)) {
struct tdb_data key;
ok1(add_records(tdb));
ok1(tdb_wipe_all(tdb) == TDB_SUCCESS);
ok1(tdb_firstkey(tdb, &key) == TDB_ERR_NOEXIST);
tdb_close(tdb);
}
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,144 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
static uint64_t myhash(const void *key, size_t len, uint64_t seed, void *priv)
{
return *(const uint64_t *)key;
}
static void add_bits(uint64_t *val, unsigned new, unsigned new_bits,
unsigned *done)
{
*done += new_bits;
*val |= ((uint64_t)new << (64 - *done));
}
static uint64_t make_key(unsigned topgroup, unsigned topbucket,
unsigned subgroup1, unsigned subbucket1,
unsigned subgroup2, unsigned subbucket2)
{
uint64_t key = 0;
unsigned done = 0;
add_bits(&key, topgroup, TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS,
&done);
add_bits(&key, topbucket, TDB_HASH_GROUP_BITS, &done);
add_bits(&key, subgroup1, TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS,
&done);
add_bits(&key, subbucket1, TDB_HASH_GROUP_BITS, &done);
add_bits(&key, subgroup2, TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS,
&done);
add_bits(&key, subbucket2, TDB_HASH_GROUP_BITS, &done);
return key;
}
int main(int argc, char *argv[])
{
unsigned int i, j;
struct tdb_context *tdb;
uint64_t kdata;
struct tdb_used_record rec;
struct tdb_data key = { (unsigned char *)&kdata, sizeof(kdata) };
struct tdb_data dbuf = { (unsigned char *)&kdata, sizeof(kdata) };
union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.fn = myhash } };
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT,
};
hattr.base.next = &tap_log_attr;
plan_tests(sizeof(flags) / sizeof(flags[0])
* (9 + (20 + 2 * ((1 << TDB_HASH_GROUP_BITS) - 2))
* (1 << TDB_HASH_GROUP_BITS)) + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
struct hash_info h;
tdb = tdb_open("run-04-basichash.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
ok1(tdb);
if (!tdb)
continue;
/* Fill a group. */
for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) {
kdata = make_key(0, j, 0, 0, 0, 0);
ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
}
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Check first still exists. */
kdata = make_key(0, 0, 0, 0, 0, 0);
ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL) != 0);
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have located space in group 0, bucket 0. */
ok1(h.group_start == offsetof(struct tdb_header, hashtable));
ok1(h.home_bucket == 0);
ok1(h.found_bucket == 0);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS);
/* Entire group should be full! */
for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++)
ok1(h.group[j] != 0);
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_RDLCK) == 0);
/* Now, add one more to each should expand (that) bucket. */
for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) {
unsigned int k;
kdata = make_key(0, j, 0, 1, 0, 0);
ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL));
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have moved to subhash */
ok1(h.group_start >= sizeof(struct tdb_header));
ok1(h.home_bucket == 1);
ok1(h.found_bucket == 1);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
+ TDB_SUBLEVEL_HASH_BITS);
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_RDLCK) == 0);
/* Keep adding, make it expand again. */
for (k = 2; k < (1 << TDB_HASH_GROUP_BITS); k++) {
kdata = make_key(0, j, 0, k, 0, 0);
ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
}
/* This should tip it over to sub-sub-hash. */
kdata = make_key(0, j, 0, 0, 0, 1);
ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL));
/* Should have created correct hash. */
ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize));
/* Should have moved to subhash */
ok1(h.group_start >= sizeof(struct tdb_header));
ok1(h.home_bucket == 1);
ok1(h.found_bucket == 1);
ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS
+ TDB_SUBLEVEL_HASH_BITS + TDB_SUBLEVEL_HASH_BITS);
ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
F_RDLCK) == 0);
}
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,70 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
static enum TDB_ERROR parse(TDB_DATA key, TDB_DATA data, TDB_DATA *expected)
{
if (!tdb_deq(data, *expected))
return TDB_ERR_EINVAL;
return TDB_SUCCESS;
}
static enum TDB_ERROR parse_err(TDB_DATA key, TDB_DATA data, void *unused)
{
return 100;
}
static bool test_records(struct tdb_context *tdb)
{
int i;
struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
for (i = 0; i < 1000; i++) {
if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
return false;
}
for (i = 0; i < 1000; i++) {
if (tdb_parse_record(tdb, key, parse, &data) != TDB_SUCCESS)
return false;
}
if (tdb_parse_record(tdb, key, parse, &data) != TDB_ERR_NOEXIST)
return false;
/* Test error return from parse function. */
i = 0;
if (tdb_parse_record(tdb, key, parse_err, NULL) != 100)
return false;
return true;
}
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-14-exists.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
if (ok1(tdb))
ok1(test_records(tdb));
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,121 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/traverse.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
static uint64_t badhash(const void *key, size_t len, uint64_t seed, void *priv)
{
return 0;
}
static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *p)
{
if (p)
return tdb_delete(tdb, key);
return 0;
}
int main(int argc, char *argv[])
{
unsigned int i, j;
struct tdb_context *tdb;
struct tdb_data key = { (unsigned char *)&j, sizeof(j) };
struct tdb_data dbuf = { (unsigned char *)&j, sizeof(j) };
union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.fn = badhash } };
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT,
};
hattr.base.next = &tap_log_attr;
plan_tests(6883);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */
tdb = tdb_open("run-25-hashoverload.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
ok1(tdb);
if (!tdb)
continue;
/* Fill a group. */
for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) {
ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
}
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Now store one last value: should form chain. */
ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Check we can find them all. */
for (j = 0; j < (1 << TDB_HASH_GROUP_BITS) + 1; j++) {
ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
ok1(d.dsize == sizeof(j));
ok1(d.dptr != NULL);
ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
free(d.dptr);
}
/* Now add a *lot* more. */
for (j = (1 << TDB_HASH_GROUP_BITS) + 1;
j < (16 << TDB_HASH_GROUP_BITS);
j++) {
ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
ok1(d.dsize == sizeof(j));
ok1(d.dptr != NULL);
ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
free(d.dptr);
}
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Traverse through them. */
ok1(tdb_traverse(tdb, trav, NULL) == j);
/* Empty the first chain-worth. */
for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++)
ok1(tdb_delete(tdb, key) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
for (j = (1 << TDB_HASH_GROUP_BITS);
j < (16 << TDB_HASH_GROUP_BITS);
j++) {
ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
ok1(d.dsize == sizeof(j));
ok1(d.dptr != NULL);
ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
free(d.dptr);
}
/* Traverse through them. */
ok1(tdb_traverse(tdb, trav, NULL)
== (15 << TDB_HASH_GROUP_BITS));
/* Re-add */
for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) {
ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0);
}
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Now try deleting as we go. */
ok1(tdb_traverse(tdb, trav, trav)
== (16 << TDB_HASH_GROUP_BITS));
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(tdb_traverse(tdb, trav, NULL) == 0);
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,79 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include <err.h>
#include "logging.h"
static bool empty_freetable(struct tdb_context *tdb)
{
struct tdb_freetable ftab;
unsigned int i;
/* Now, free table should be completely exhausted in zone 0 */
if (tdb_read_convert(tdb, tdb->ftable_off, &ftab, sizeof(ftab)) != 0)
abort();
for (i = 0; i < sizeof(ftab.buckets)/sizeof(ftab.buckets[0]); i++) {
if (ftab.buckets[i])
return false;
}
return true;
}
int main(int argc, char *argv[])
{
unsigned int i, j;
struct tdb_context *tdb;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
plan_tests(sizeof(flags) / sizeof(flags[0]) * 9 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
TDB_DATA k;
uint64_t size;
bool was_empty = false;
k.dptr = (void *)&j;
k.dsize = sizeof(j);
tdb = tdb_open("run-30-exhaust-before-expand.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (!tdb)
continue;
ok1(empty_freetable(tdb));
/* Need some hash lock for expand. */
ok1(tdb_lock_hashes(tdb, 0, 1, F_WRLCK, TDB_LOCK_WAIT) == 0);
/* Create some free space. */
ok1(tdb_expand(tdb, 1) == 0);
ok1(tdb_unlock_hashes(tdb, 0, 1, F_WRLCK) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(!empty_freetable(tdb));
size = tdb->file->map_size;
/* Insert minimal-length records until we expand. */
for (j = 0; tdb->file->map_size == size; j++) {
was_empty = empty_freetable(tdb);
if (tdb_store(tdb, k, k, TDB_INSERT) != 0)
err(1, "Failed to store record %i", j);
}
/* Would have been empty before expansion, but no longer. */
ok1(was_empty);
ok1(!empty_freetable(tdb));
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,71 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include <ccan/tdb2/transaction.c>
#include "logging.h"
#include "layout.h"
int main(int argc, char *argv[])
{
tdb_off_t off;
struct tdb_context *tdb;
struct tdb_layout *layout;
TDB_DATA key, data;
plan_tests(11);
key = tdb_mkdata("Hello", 5);
data = tdb_mkdata("world", 5);
/* Create a TDB with three free tables. */
layout = new_tdb_layout(NULL);
tdb_layout_add_freetable(layout);
tdb_layout_add_freetable(layout);
tdb_layout_add_freetable(layout);
tdb_layout_add_free(layout, 80, 0);
/* Used record prevent coalescing. */
tdb_layout_add_used(layout, key, data, 6);
tdb_layout_add_free(layout, 160, 1);
key.dsize--;
tdb_layout_add_used(layout, key, data, 7);
tdb_layout_add_free(layout, 320, 2);
key.dsize--;
tdb_layout_add_used(layout, key, data, 8);
tdb_layout_add_free(layout, 40, 0);
tdb = tdb_layout_get(layout);
ok1(tdb_check(tdb, NULL, NULL) == 0);
off = get_free(tdb, 0, 80 - sizeof(struct tdb_used_record), 0,
TDB_USED_MAGIC, 0);
ok1(off == layout->elem[3].base.off);
ok1(tdb->ftable_off == layout->elem[0].base.off);
off = get_free(tdb, 0, 160 - sizeof(struct tdb_used_record), 0,
TDB_USED_MAGIC, 0);
ok1(off == layout->elem[5].base.off);
ok1(tdb->ftable_off == layout->elem[1].base.off);
off = get_free(tdb, 0, 320 - sizeof(struct tdb_used_record), 0,
TDB_USED_MAGIC, 0);
ok1(off == layout->elem[7].base.off);
ok1(tdb->ftable_off == layout->elem[2].base.off);
off = get_free(tdb, 0, 40 - sizeof(struct tdb_used_record), 0,
TDB_USED_MAGIC, 0);
ok1(off == layout->elem[9].base.off);
ok1(tdb->ftable_off == layout->elem[0].base.off);
/* Now we fail. */
off = get_free(tdb, 0, 0, 1, TDB_USED_MAGIC, 0);
ok1(off == 0);
tdb_close(tdb);
tdb_layout_free(layout);
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,75 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
unsigned char *buffer;
int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
struct tdb_data key = tdb_mkdata("key", 3);
struct tdb_data data;
buffer = malloc(1000);
for (i = 0; i < 1000; i++)
buffer[i] = i;
plan_tests(sizeof(flags) / sizeof(flags[0]) * 20 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-55-transaction.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (!tdb)
continue;
ok1(tdb_transaction_start(tdb) == 0);
data.dptr = buffer;
data.dsize = 1000;
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
ok1(data.dsize == 1000);
ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
free(data.dptr);
/* Cancelling a transaction means no store */
tdb_transaction_cancel(tdb);
ok1(tdb->file->allrecord_lock.count == 0
&& tdb->file->num_lockrecs == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(tdb_fetch(tdb, key, &data) == TDB_ERR_NOEXIST);
/* Commit the transaction. */
ok1(tdb_transaction_start(tdb) == 0);
data.dptr = buffer;
data.dsize = 1000;
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
ok1(data.dsize == 1000);
ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
free(data.dptr);
ok1(tdb_transaction_commit(tdb) == 0);
ok1(tdb->file->allrecord_lock.count == 0
&& tdb->file->num_lockrecs == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS);
ok1(data.dsize == 1000);
ok1(memcmp(data.dptr, buffer, data.dsize) == 0);
free(data.dptr);
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
free(buffer);
return exit_status();
}

View File

@ -0,0 +1,175 @@
#include "config.h"
#include <unistd.h>
#include "lock-tracking.h"
static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset);
static ssize_t write_check(int fd, const void *buf, size_t count);
static int ftruncate_check(int fd, off_t length);
#define pwrite pwrite_check
#define write write_check
#define fcntl fcntl_with_lockcheck
#define ftruncate ftruncate_check
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdarg.h>
#include <err.h>
#include "external-agent.h"
#include "logging.h"
static struct agent *agent;
static bool opened;
static int errors = 0;
#define TEST_DBNAME "run-56-open-during-transaction.tdb"
#undef write
#undef pwrite
#undef fcntl
#undef ftruncate
static bool is_same(const char *snapshot, const char *latest, off_t len)
{
unsigned i;
for (i = 0; i < len; i++) {
if (snapshot[i] != latest[i])
return false;
}
return true;
}
static bool compare_file(int fd, const char *snapshot, off_t snapshot_len)
{
char *contents;
bool same;
/* over-length read serves as length check. */
contents = malloc(snapshot_len+1);
same = pread(fd, contents, snapshot_len+1, 0) == snapshot_len
&& is_same(snapshot, contents, snapshot_len);
free(contents);
return same;
}
static void check_file_intact(int fd)
{
enum agent_return ret;
struct stat st;
char *contents;
fstat(fd, &st);
contents = malloc(st.st_size);
if (pread(fd, contents, st.st_size, 0) != st.st_size) {
diag("Read fail");
errors++;
return;
}
/* Ask agent to open file. */
ret = external_agent_operation(agent, OPEN, TEST_DBNAME);
/* It's OK to open it, but it must not have changed! */
if (!compare_file(fd, contents, st.st_size)) {
diag("Agent changed file after opening %s",
agent_return_name(ret));
errors++;
}
if (ret == SUCCESS) {
ret = external_agent_operation(agent, CLOSE, NULL);
if (ret != SUCCESS) {
diag("Agent failed to close tdb: %s",
agent_return_name(ret));
errors++;
}
} else if (ret != WOULD_HAVE_BLOCKED) {
diag("Agent opening file gave %s",
agent_return_name(ret));
errors++;
}
free(contents);
}
static void after_unlock(int fd)
{
if (opened)
check_file_intact(fd);
}
static ssize_t pwrite_check(int fd,
const void *buf, size_t count, off_t offset)
{
if (opened)
check_file_intact(fd);
return pwrite(fd, buf, count, offset);
}
static ssize_t write_check(int fd, const void *buf, size_t count)
{
if (opened)
check_file_intact(fd);
return write(fd, buf, count);
}
static int ftruncate_check(int fd, off_t length)
{
if (opened)
check_file_intact(fd);
return ftruncate(fd, length);
}
int main(int argc, char *argv[])
{
const int flags[] = { TDB_DEFAULT,
TDB_NOMMAP,
TDB_CONVERT,
TDB_CONVERT | TDB_NOMMAP };
int i;
struct tdb_context *tdb;
TDB_DATA key, data;
plan_tests(20);
agent = prepare_external_agent();
if (!agent)
err(1, "preparing agent");
unlock_callback = after_unlock;
for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) {
diag("Test with %s and %s\n",
(flags[i] & TDB_CONVERT) ? "CONVERT" : "DEFAULT",
(flags[i] & TDB_NOMMAP) ? "no mmap" : "mmap");
unlink(TEST_DBNAME);
tdb = tdb_open(TEST_DBNAME, flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
opened = true;
ok1(tdb_transaction_start(tdb) == 0);
key = tdb_mkdata("hi", strlen("hi"));
data = tdb_mkdata("world", strlen("world"));
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
ok1(tdb_transaction_commit(tdb) == 0);
ok(!errors, "We had %u open errors", errors);
opened = false;
tdb_close(tdb);
}
return exit_status();
}

View File

@ -0,0 +1,275 @@
#include "config.h"
#include <unistd.h>
#include "lock-tracking.h"
#include <ccan/tap/tap.h>
#include <stdlib.h>
#include <assert.h>
static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset);
static ssize_t write_check(int fd, const void *buf, size_t count);
static int ftruncate_check(int fd, off_t length);
#define pwrite pwrite_check
#define write write_check
#define fcntl fcntl_with_lockcheck
#define ftruncate ftruncate_check
/* There's a malloc inside transaction_setup_recovery, and valgrind complains
* when we longjmp and leak it. */
#define MAX_ALLOCATIONS 200
static void *allocated[MAX_ALLOCATIONS];
static void *malloc_noleak(size_t len)
{
unsigned int i;
for (i = 0; i < MAX_ALLOCATIONS; i++)
if (!allocated[i]) {
allocated[i] = malloc(len);
return allocated[i];
}
diag("Too many allocations!");
abort();
}
static void free_noleak(void *p)
{
unsigned int i;
/* We don't catch realloc, so don't care if we miss one. */
for (i = 0; i < MAX_ALLOCATIONS; i++) {
if (allocated[i] == p) {
allocated[i] = NULL;
break;
}
}
free(p);
}
static void free_all(void)
{
unsigned int i;
for (i = 0; i < MAX_ALLOCATIONS; i++) {
free(allocated[i]);
allocated[i] = NULL;
}
}
#define malloc malloc_noleak
#define free free_noleak
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#undef malloc
#undef free
#undef write
#undef pwrite
#undef fcntl
#undef ftruncate
#include <stdbool.h>
#include <stdarg.h>
#include <err.h>
#include <setjmp.h>
#include "external-agent.h"
#include "logging.h"
static bool in_transaction;
static int target, current;
static jmp_buf jmpbuf;
#define TEST_DBNAME "run-57-die-during-transaction.tdb"
#define KEY_STRING "helloworld"
static void maybe_die(int fd)
{
if (in_transaction && current++ == target) {
longjmp(jmpbuf, 1);
}
}
static ssize_t pwrite_check(int fd,
const void *buf, size_t count, off_t offset)
{
ssize_t ret;
maybe_die(fd);
ret = pwrite(fd, buf, count, offset);
if (ret != count)
return ret;
maybe_die(fd);
return ret;
}
static ssize_t write_check(int fd, const void *buf, size_t count)
{
ssize_t ret;
maybe_die(fd);
ret = write(fd, buf, count);
if (ret != count)
return ret;
maybe_die(fd);
return ret;
}
static int ftruncate_check(int fd, off_t length)
{
int ret;
maybe_die(fd);
ret = ftruncate(fd, length);
maybe_die(fd);
return ret;
}
static bool test_death(enum operation op, struct agent *agent)
{
struct tdb_context *tdb = NULL;
TDB_DATA key;
enum agent_return ret;
int needed_recovery = 0;
current = target = 0;
reset:
unlink(TEST_DBNAME);
tdb = tdb_open(TEST_DBNAME, TDB_NOMMAP,
O_CREAT|O_TRUNC|O_RDWR, 0600, &tap_log_attr);
if (!tdb) {
diag("Failed opening TDB: %s", strerror(errno));
return false;
}
if (setjmp(jmpbuf) != 0) {
/* We're partway through. Simulate our death. */
close(tdb->file->fd);
forget_locking();
in_transaction = false;
ret = external_agent_operation(agent, NEEDS_RECOVERY, "");
if (ret == SUCCESS)
needed_recovery++;
else if (ret != FAILED) {
diag("Step %u agent NEEDS_RECOVERY = %s", current,
agent_return_name(ret));
return false;
}
ret = external_agent_operation(agent, op, KEY_STRING);
if (ret != SUCCESS) {
diag("Step %u op %s failed = %s", current,
operation_name(op),
agent_return_name(ret));
return false;
}
ret = external_agent_operation(agent, NEEDS_RECOVERY, "");
if (ret != FAILED) {
diag("Still needs recovery after step %u = %s",
current, agent_return_name(ret));
return false;
}
ret = external_agent_operation(agent, CHECK, "");
if (ret != SUCCESS) {
diag("Step %u check failed = %s", current,
agent_return_name(ret));
return false;
}
ret = external_agent_operation(agent, CLOSE, "");
if (ret != SUCCESS) {
diag("Step %u close failed = %s", current,
agent_return_name(ret));
return false;
}
/* Suppress logging as this tries to use closed fd. */
suppress_logging = true;
suppress_lockcheck = true;
tdb_close(tdb);
suppress_logging = false;
suppress_lockcheck = false;
target++;
current = 0;
free_all();
goto reset;
}
/* Put key for agent to fetch. */
key = tdb_mkdata(KEY_STRING, strlen(KEY_STRING));
if (tdb_store(tdb, key, key, TDB_INSERT) != 0)
return false;
/* This is the key we insert in transaction. */
key.dsize--;
ret = external_agent_operation(agent, OPEN, TEST_DBNAME);
if (ret != SUCCESS)
errx(1, "Agent failed to open: %s", agent_return_name(ret));
ret = external_agent_operation(agent, FETCH, KEY_STRING);
if (ret != SUCCESS)
errx(1, "Agent failed find key: %s", agent_return_name(ret));
in_transaction = true;
if (tdb_transaction_start(tdb) != 0)
return false;
if (tdb_store(tdb, key, key, TDB_INSERT) != 0)
return false;
if (tdb_transaction_commit(tdb) != 0)
return false;
in_transaction = false;
/* We made it! */
diag("Completed %u runs", current);
tdb_close(tdb);
ret = external_agent_operation(agent, CLOSE, "");
if (ret != SUCCESS) {
diag("Step %u close failed = %s", current,
agent_return_name(ret));
return false;
}
ok1(needed_recovery);
ok1(locking_errors == 0);
ok1(forget_locking() == 0);
locking_errors = 0;
return true;
}
int main(int argc, char *argv[])
{
enum operation ops[] = { FETCH, STORE, TRANSACTION_START };
struct agent *agent;
int i;
plan_tests(12);
unlock_callback = maybe_die;
agent = prepare_external_agent();
if (!agent)
err(1, "preparing agent");
for (i = 0; i < sizeof(ops)/sizeof(ops[0]); i++) {
diag("Testing %s after death", operation_name(ops[i]));
ok1(test_death(ops[i], agent));
}
free_external_agent(agent);
return exit_status();
}

View File

@ -0,0 +1,80 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/traverse.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
if (sizeof(off_t) <= 4) {
plan_tests(1);
pass("No 64 bit off_t");
return exit_status();
}
plan_tests(sizeof(flags) / sizeof(flags[0]) * 14);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
off_t old_size;
TDB_DATA k, d;
struct hash_info h;
struct tdb_used_record rec;
tdb_off_t off;
tdb = tdb_open("run-64-bit-tdb.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (!tdb)
continue;
old_size = tdb->file->map_size;
/* This makes a sparse file */
ok1(ftruncate(tdb->file->fd, 0xFFFFFFF0) == 0);
ok1(add_free_record(tdb, old_size, 0xFFFFFFF0 - old_size,
TDB_LOCK_WAIT, false) == TDB_SUCCESS);
/* Now add a little record past the 4G barrier. */
ok1(tdb_expand_file(tdb, 100) == TDB_SUCCESS);
ok1(add_free_record(tdb, 0xFFFFFFF0, 100, TDB_LOCK_WAIT, false)
== TDB_SUCCESS);
ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
/* Test allocation path. */
k = tdb_mkdata("key", 4);
d = tdb_mkdata("data", 5);
ok1(tdb_store(tdb, k, d, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
/* Make sure it put it at end as we expected. */
off = find_and_lock(tdb, k, F_RDLCK, &h, &rec, NULL);
ok1(off >= 0xFFFFFFF0);
tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
ok1(tdb_fetch(tdb, k, &d) == 0);
ok1(d.dsize == 5);
ok1(strcmp((char *)d.dptr, "data") == 0);
free(d.dptr);
ok1(tdb_delete(tdb, k) == 0);
ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS);
tdb_close(tdb);
}
/* We might get messages about mmap failing, so don't test
* tap_log_messages */
return exit_status();
}

View File

@ -0,0 +1,35 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
plan_tests(sizeof(flags) / sizeof(flags[0]) * 3);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-new_database.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
if (!ok1(tdb))
continue;
if (flags[i] & TDB_INTERNAL)
ok1(tdb_fd(tdb) == -1);
else
ok1(tdb_fd(tdb) > 2);
tdb_close(tdb);
ok1(tap_log_messages == 0);
}
return exit_status();
}

View File

@ -0,0 +1,71 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/traverse.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */
struct tdb_data key = tdb_mkdata("key", 3);
struct tdb_data data = tdb_mkdata("data", 4);
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
plan_tests(sizeof(flags) / sizeof(flags[0]) * 15 + 4 * 13);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-new_database.tdb", flags[i]|TDB_SEQNUM,
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
if (!ok1(tdb))
continue;
ok1(tdb_get_seqnum(tdb) == 0);
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
ok1(tdb_get_seqnum(tdb) == 1);
/* Fetch doesn't change seqnum */
if (ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS))
free(d.dptr);
ok1(tdb_get_seqnum(tdb) == 1);
ok1(tdb_append(tdb, key, data) == TDB_SUCCESS);
ok1(tdb_get_seqnum(tdb) == 2);
ok1(tdb_delete(tdb, key) == TDB_SUCCESS);
ok1(tdb_get_seqnum(tdb) == 3);
/* Empty append works */
ok1(tdb_append(tdb, key, data) == TDB_SUCCESS);
ok1(tdb_get_seqnum(tdb) == 4);
ok1(tdb_wipe_all(tdb) == TDB_SUCCESS);
ok1(tdb_get_seqnum(tdb) == 5);
if (!(flags[i] & TDB_INTERNAL)) {
ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
ok1(tdb_get_seqnum(tdb) == 6);
ok1(tdb_append(tdb, key, data) == TDB_SUCCESS);
ok1(tdb_get_seqnum(tdb) == 7);
ok1(tdb_delete(tdb, key) == TDB_SUCCESS);
ok1(tdb_get_seqnum(tdb) == 8);
ok1(tdb_transaction_commit(tdb) == TDB_SUCCESS);
ok1(tdb_get_seqnum(tdb) == 8);
ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
ok1(tdb_get_seqnum(tdb) == 9);
tdb_transaction_cancel(tdb);
ok1(tdb_get_seqnum(tdb) == 8);
}
tdb_close(tdb);
ok1(tap_log_messages == 0);
}
return exit_status();
}

View File

@ -0,0 +1,263 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/traverse.c>
#include <ccan/tap/tap.h>
#include "logging.h"
static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag,
void *_err)
{
int *lock_err = _err;
struct flock fl;
int ret;
if (*lock_err) {
errno = *lock_err;
return -1;
}
do {
fl.l_type = rw;
fl.l_whence = SEEK_SET;
fl.l_start = off;
fl.l_len = len;
if (waitflag)
ret = fcntl(fd, F_SETLKW, &fl);
else
ret = fcntl(fd, F_SETLK, &fl);
} while (ret != 0 && errno == EINTR);
return ret;
}
static int myunlock(int fd, int rw, off_t off, off_t len, void *_err)
{
int *lock_err = _err;
struct flock fl;
int ret;
if (*lock_err) {
errno = *lock_err;
return -1;
}
do {
fl.l_type = F_UNLCK;
fl.l_whence = SEEK_SET;
fl.l_start = off;
fl.l_len = len;
ret = fcntl(fd, F_SETLKW, &fl);
} while (ret != 0 && errno == EINTR);
return ret;
}
static int trav_err;
static int trav(struct tdb_context *tdb, TDB_DATA k, TDB_DATA d, int *err)
{
*err = trav_err;
return 0;
}
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
union tdb_attribute lock_attr;
struct tdb_data key = tdb_mkdata("key", 3);
struct tdb_data data = tdb_mkdata("data", 4);
int lock_err;
lock_attr.base.attr = TDB_ATTRIBUTE_FLOCK;
lock_attr.base.next = &tap_log_attr;
lock_attr.flock.lock = mylock;
lock_attr.flock.unlock = myunlock;
lock_attr.flock.data = &lock_err;
plan_tests(sizeof(flags) / sizeof(flags[0]) * 80);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
struct tdb_data d;
/* Nonblocking open; expect no error message. */
lock_err = EAGAIN;
tdb = tdb_open("run-82-lockattr.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
ok(errno == lock_err, "Errno is %u", errno);
ok1(!tdb);
ok1(tap_log_messages == 0);
lock_err = EINTR;
tdb = tdb_open("run-82-lockattr.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
ok(errno == lock_err, "Errno is %u", errno);
ok1(!tdb);
ok1(tap_log_messages == 0);
/* Forced fail open. */
lock_err = ENOMEM;
tdb = tdb_open("run-82-lockattr.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
ok1(errno == lock_err);
ok1(!tdb);
ok1(tap_log_messages == 1);
tap_log_messages = 0;
lock_err = 0;
tdb = tdb_open("run-82-lockattr.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr);
if (!ok1(tdb))
continue;
ok1(tap_log_messages == 0);
/* Nonblocking store. */
lock_err = EAGAIN;
ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = EINTR;
ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = ENOMEM;
ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_ERR_LOCK);
ok1(tap_log_messages == 1);
tap_log_messages = 0;
/* Nonblocking fetch. */
lock_err = EAGAIN;
ok1(!tdb_exists(tdb, key));
ok1(tap_log_messages == 0);
lock_err = EINTR;
ok1(!tdb_exists(tdb, key));
ok1(tap_log_messages == 0);
lock_err = ENOMEM;
ok1(!tdb_exists(tdb, key));
ok1(tap_log_messages == 1);
tap_log_messages = 0;
lock_err = EAGAIN;
ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = EINTR;
ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = ENOMEM;
ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_LOCK);
ok1(tap_log_messages == 1);
tap_log_messages = 0;
/* Nonblocking delete. */
lock_err = EAGAIN;
ok1(tdb_delete(tdb, key) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = EINTR;
ok1(tdb_delete(tdb, key) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = ENOMEM;
ok1(tdb_delete(tdb, key) == TDB_ERR_LOCK);
ok1(tap_log_messages == 1);
tap_log_messages = 0;
/* Nonblocking locks. */
lock_err = EAGAIN;
ok1(tdb_chainlock(tdb, key) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = EINTR;
ok1(tdb_chainlock(tdb, key) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = ENOMEM;
ok1(tdb_chainlock(tdb, key) == TDB_ERR_LOCK);
ok1(tap_log_messages == 1);
tap_log_messages = 0;
lock_err = EAGAIN;
ok1(tdb_chainlock_read(tdb, key) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = EINTR;
ok1(tdb_chainlock_read(tdb, key) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = ENOMEM;
ok1(tdb_chainlock_read(tdb, key) == TDB_ERR_LOCK);
ok1(tap_log_messages == 1);
tap_log_messages = 0;
lock_err = EAGAIN;
ok1(tdb_lockall(tdb) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = EINTR;
ok1(tdb_lockall(tdb) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = ENOMEM;
ok1(tdb_lockall(tdb) == TDB_ERR_LOCK);
/* This actually does divide and conquer. */
ok1(tap_log_messages > 0);
tap_log_messages = 0;
lock_err = EAGAIN;
ok1(tdb_lockall_read(tdb) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = EINTR;
ok1(tdb_lockall_read(tdb) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = ENOMEM;
ok1(tdb_lockall_read(tdb) == TDB_ERR_LOCK);
ok1(tap_log_messages > 0);
tap_log_messages = 0;
/* Nonblocking traverse; go nonblock partway through. */
lock_err = 0;
ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0);
trav_err = EAGAIN;
ok1(tdb_traverse(tdb, trav, &lock_err) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
trav_err = EINTR;
lock_err = 0;
ok1(tdb_traverse(tdb, trav, &lock_err) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
trav_err = ENOMEM;
lock_err = 0;
ok1(tdb_traverse(tdb, trav, &lock_err) == TDB_ERR_LOCK);
ok1(tap_log_messages == 1);
tap_log_messages = 0;
/* Nonblocking transactions. */
lock_err = EAGAIN;
ok1(tdb_transaction_start(tdb) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = EINTR;
ok1(tdb_transaction_start(tdb) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = ENOMEM;
ok1(tdb_transaction_start(tdb) == TDB_ERR_LOCK);
ok1(tap_log_messages == 1);
tap_log_messages = 0;
/* Nonblocking transaction prepare. */
lock_err = 0;
ok1(tdb_transaction_start(tdb) == 0);
ok1(tdb_delete(tdb, key) == 0);
lock_err = EAGAIN;
ok1(tdb_transaction_prepare_commit(tdb) == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
lock_err = 0;
ok1(tdb_transaction_prepare_commit(tdb) == 0);
ok1(tdb_transaction_commit(tdb) == 0);
/* And the transaction was committed, right? */
ok1(!tdb_exists(tdb, key));
tdb_close(tdb);
ok1(tap_log_messages == 0);
}
return exit_status();
}

View File

@ -0,0 +1,98 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdarg.h>
#include <err.h>
#include "external-agent.h"
#include "logging.h"
static enum TDB_ERROR clear_if_first(int fd, void *arg)
{
/* We hold a lock offset 63 always, so we can tell if anyone is holding it. */
struct flock fl;
if (arg != clear_if_first)
return TDB_ERR_CORRUPT;
fl.l_type = F_WRLCK;
fl.l_whence = SEEK_SET;
fl.l_start = 63;
fl.l_len = 1;
if (fcntl(fd, F_SETLK, &fl) == 0) {
/* We must be first ones to open it! */
diag("truncating file!");
if (ftruncate(fd, 0) != 0) {
return TDB_ERR_IO;
}
}
fl.l_type = F_RDLCK;
if (fcntl(fd, F_SETLKW, &fl) != 0) {
return TDB_ERR_IO;
}
return TDB_SUCCESS;
}
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
struct agent *agent;
union tdb_attribute cif;
struct tdb_data key = tdb_mkdata("key", 3);
int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
cif.openhook.base.attr = TDB_ATTRIBUTE_OPENHOOK;
cif.openhook.base.next = &tap_log_attr;
cif.openhook.fn = clear_if_first;
cif.openhook.data = clear_if_first;
agent = prepare_external_agent();
plan_tests(sizeof(flags) / sizeof(flags[0]) * 13);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
/* Create it */
tdb = tdb_open("run-83-openhook.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, NULL);
ok1(tdb);
ok1(tdb_store(tdb, key, key, TDB_REPLACE) == 0);
tdb_close(tdb);
/* Now, open with CIF, should clear it. */
tdb = tdb_open("run-83-openhook.tdb", flags[i],
O_RDWR, 0, &cif);
ok1(tdb);
ok1(!tdb_exists(tdb, key));
ok1(tdb_store(tdb, key, key, TDB_REPLACE) == 0);
/* Agent should not clear it, since it's still open. */
ok1(external_agent_operation(agent, OPEN_WITH_HOOK,
"run-83-openhook.tdb") == SUCCESS);
ok1(external_agent_operation(agent, FETCH, "key") == SUCCESS);
ok1(external_agent_operation(agent, CLOSE, "") == SUCCESS);
/* Still exists for us too. */
ok1(tdb_exists(tdb, key));
/* Close it, now agent should clear it. */
tdb_close(tdb);
ok1(external_agent_operation(agent, OPEN_WITH_HOOK,
"run-83-openhook.tdb") == SUCCESS);
ok1(external_agent_operation(agent, FETCH, "key") == FAILED);
ok1(external_agent_operation(agent, CLOSE, "") == SUCCESS);
ok1(tap_log_messages == 0);
}
free_external_agent(agent);
return exit_status();
}

View File

@ -0,0 +1,165 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/traverse.c>
#include <ccan/tap/tap.h>
#include "logging.h"
static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag,
void *unused)
{
return 0;
}
static int myunlock(int fd, int rw, off_t off, off_t len, void *unused)
{
return 0;
}
static uint64_t hash_fn(const void *key, size_t len, uint64_t seed,
void *priv)
{
return 0;
}
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
union tdb_attribute seed_attr;
union tdb_attribute hash_attr;
union tdb_attribute lock_attr;
hash_attr.base.attr = TDB_ATTRIBUTE_HASH;
hash_attr.base.next = &seed_attr;
hash_attr.hash.fn = hash_fn;
hash_attr.hash.data = &hash_attr;
seed_attr.base.attr = TDB_ATTRIBUTE_SEED;
seed_attr.base.next = &lock_attr;
seed_attr.seed.seed = 100;
lock_attr.base.attr = TDB_ATTRIBUTE_FLOCK;
lock_attr.base.next = &tap_log_attr;
lock_attr.flock.lock = mylock;
lock_attr.flock.unlock = myunlock;
lock_attr.flock.data = &lock_attr;
plan_tests(sizeof(flags) / sizeof(flags[0]) * 50);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
union tdb_attribute attr;
/* First open with no attributes. */
tdb = tdb_open("run-90-get-set-attributes.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, NULL);
ok1(tdb);
/* Get log on no attributes will fail */
attr.base.attr = TDB_ATTRIBUTE_LOG;
ok1(tdb_get_attribute(tdb, &attr) == TDB_ERR_NOEXIST);
/* These always work. */
attr.base.attr = TDB_ATTRIBUTE_HASH;
ok1(tdb_get_attribute(tdb, &attr) == 0);
ok1(attr.base.attr == TDB_ATTRIBUTE_HASH);
ok1(attr.hash.fn == jenkins_hash);
attr.base.attr = TDB_ATTRIBUTE_FLOCK;
ok1(tdb_get_attribute(tdb, &attr) == 0);
ok1(attr.base.attr == TDB_ATTRIBUTE_FLOCK);
ok1(attr.flock.lock == tdb_fcntl_lock);
ok1(attr.flock.unlock == tdb_fcntl_unlock);
attr.base.attr = TDB_ATTRIBUTE_SEED;
ok1(tdb_get_attribute(tdb, &attr) == 0);
ok1(attr.base.attr == TDB_ATTRIBUTE_SEED);
/* This is possible, just astronomically unlikely. */
ok1(attr.seed.seed != 0);
/* Unset attributes. */
tdb_unset_attribute(tdb, TDB_ATTRIBUTE_LOG);
tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK);
/* Set them. */
ok1(tdb_set_attribute(tdb, &tap_log_attr) == 0);
ok1(tdb_set_attribute(tdb, &lock_attr) == 0);
/* These should fail. */
ok1(tdb_set_attribute(tdb, &seed_attr) == TDB_ERR_EINVAL);
ok1(tap_log_messages == 1);
ok1(tdb_set_attribute(tdb, &hash_attr) == TDB_ERR_EINVAL);
ok1(tap_log_messages == 2);
tap_log_messages = 0;
/* Getting them should work as expected. */
attr.base.attr = TDB_ATTRIBUTE_LOG;
ok1(tdb_get_attribute(tdb, &attr) == 0);
ok1(attr.base.attr == TDB_ATTRIBUTE_LOG);
ok1(attr.log.fn == tap_log_attr.log.fn);
ok1(attr.log.data == tap_log_attr.log.data);
attr.base.attr = TDB_ATTRIBUTE_FLOCK;
ok1(tdb_get_attribute(tdb, &attr) == 0);
ok1(attr.base.attr == TDB_ATTRIBUTE_FLOCK);
ok1(attr.flock.lock == mylock);
ok1(attr.flock.unlock == myunlock);
ok1(attr.flock.data == &lock_attr);
/* Unset them again. */
tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK);
ok1(tap_log_messages == 0);
tdb_unset_attribute(tdb, TDB_ATTRIBUTE_LOG);
ok1(tap_log_messages == 0);
tdb_close(tdb);
ok1(tap_log_messages == 0);
/* Now open with all attributes. */
tdb = tdb_open("run-90-get-set-attributes.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &hash_attr);
ok1(tdb);
/* Get will succeed */
attr.base.attr = TDB_ATTRIBUTE_LOG;
ok1(tdb_get_attribute(tdb, &attr) == 0);
ok1(attr.base.attr == TDB_ATTRIBUTE_LOG);
ok1(attr.log.fn == tap_log_attr.log.fn);
ok1(attr.log.data == tap_log_attr.log.data);
attr.base.attr = TDB_ATTRIBUTE_HASH;
ok1(tdb_get_attribute(tdb, &attr) == 0);
ok1(attr.base.attr == TDB_ATTRIBUTE_HASH);
ok1(attr.hash.fn == hash_fn);
ok1(attr.hash.data == &hash_attr);
attr.base.attr = TDB_ATTRIBUTE_FLOCK;
ok1(tdb_get_attribute(tdb, &attr) == 0);
ok1(attr.base.attr == TDB_ATTRIBUTE_FLOCK);
ok1(attr.flock.lock == mylock);
ok1(attr.flock.unlock == myunlock);
ok1(attr.flock.data == &lock_attr);
attr.base.attr = TDB_ATTRIBUTE_SEED;
ok1(tdb_get_attribute(tdb, &attr) == 0);
ok1(attr.base.attr == TDB_ATTRIBUTE_SEED);
ok1(attr.seed.seed == seed_attr.seed.seed);
/* Unset attributes. */
tdb_unset_attribute(tdb, TDB_ATTRIBUTE_HASH);
ok1(tap_log_messages == 1);
tdb_unset_attribute(tdb, TDB_ATTRIBUTE_SEED);
ok1(tap_log_messages == 2);
tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK);
tdb_unset_attribute(tdb, TDB_ATTRIBUTE_LOG);
ok1(tap_log_messages == 2);
tap_log_messages = 0;
tdb_close(tdb);
}
return exit_status();
}

View File

@ -0,0 +1,59 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/traverse.c>
#include <ccan/tap/tap.h>
#include "logging.h"
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
plan_tests(sizeof(flags) / sizeof(flags[0]) * 11);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
union tdb_attribute *attr;
struct tdb_data key = tdb_mkdata("key", 3);
tdb = tdb_open("run-91-get-stats.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
ok1(tdb_store(tdb, key, key, TDB_REPLACE) == 0);
/* Use malloc so valgrind will catch overruns. */
attr = malloc(sizeof *attr);
attr->stats.base.attr = TDB_ATTRIBUTE_STATS;
attr->stats.size = sizeof(*attr);
ok1(tdb_get_attribute(tdb, attr) == 0);
ok1(attr->stats.size == sizeof(*attr));
ok1(attr->stats.allocs > 0);
ok1(attr->stats.expands > 0);
ok1(attr->stats.locks > 0);
free(attr);
/* Try short one. */
attr = malloc(offsetof(struct tdb_attribute_stats, allocs)
+ sizeof(attr->stats.allocs));
attr->stats.base.attr = TDB_ATTRIBUTE_STATS;
attr->stats.size = offsetof(struct tdb_attribute_stats, allocs)
+ sizeof(attr->stats.allocs);
ok1(tdb_get_attribute(tdb, attr) == 0);
ok1(attr->stats.size == sizeof(*attr));
ok1(attr->stats.allocs > 0);
free(attr);
ok1(tap_log_messages == 0);
tdb_close(tdb);
}
return exit_status();
}

View File

@ -0,0 +1,93 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/traverse.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
plan_tests(87);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-add-remove-flags.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (!tdb)
continue;
ok1(tdb_get_flags(tdb) == tdb->flags);
tap_log_messages = 0;
tdb_add_flag(tdb, TDB_NOLOCK);
if (flags[i] & TDB_INTERNAL)
ok1(tap_log_messages == 1);
else {
ok1(tap_log_messages == 0);
ok1(tdb_get_flags(tdb) & TDB_NOLOCK);
}
tap_log_messages = 0;
tdb_add_flag(tdb, TDB_NOMMAP);
if (flags[i] & TDB_INTERNAL)
ok1(tap_log_messages == 1);
else {
ok1(tap_log_messages == 0);
ok1(tdb_get_flags(tdb) & TDB_NOMMAP);
ok1(tdb->file->map_ptr == NULL);
}
tap_log_messages = 0;
tdb_add_flag(tdb, TDB_NOSYNC);
if (flags[i] & TDB_INTERNAL)
ok1(tap_log_messages == 1);
else {
ok1(tap_log_messages == 0);
ok1(tdb_get_flags(tdb) & TDB_NOSYNC);
}
ok1(tdb_get_flags(tdb) == tdb->flags);
tap_log_messages = 0;
tdb_remove_flag(tdb, TDB_NOLOCK);
if (flags[i] & TDB_INTERNAL)
ok1(tap_log_messages == 1);
else {
ok1(tap_log_messages == 0);
ok1(!(tdb_get_flags(tdb) & TDB_NOLOCK));
}
tap_log_messages = 0;
tdb_remove_flag(tdb, TDB_NOMMAP);
if (flags[i] & TDB_INTERNAL)
ok1(tap_log_messages == 1);
else {
ok1(tap_log_messages == 0);
ok1(!(tdb_get_flags(tdb) & TDB_NOMMAP));
ok1(tdb->file->map_ptr != NULL);
}
tap_log_messages = 0;
tdb_remove_flag(tdb, TDB_NOSYNC);
if (flags[i] & TDB_INTERNAL)
ok1(tap_log_messages == 1);
else {
ok1(tap_log_messages == 0);
ok1(!(tdb_get_flags(tdb) & TDB_NOSYNC));
}
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,90 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/traverse.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/open.c>
#include <ccan/tap/tap.h>
#include "logging.h"
#define NUM_RECORDS 1000
static bool store_records(struct tdb_context *tdb)
{
int i;
struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
for (i = 0; i < NUM_RECORDS; i++)
if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
return false;
return true;
}
static enum TDB_ERROR check(struct tdb_data key,
struct tdb_data data,
bool *array)
{
int val;
if (key.dsize != sizeof(val)) {
diag("Wrong key size: %u\n", key.dsize);
return TDB_ERR_CORRUPT;
}
if (key.dsize != data.dsize
|| memcmp(key.dptr, data.dptr, sizeof(val)) != 0) {
diag("Key and data differ\n");
return TDB_ERR_CORRUPT;
}
memcpy(&val, key.dptr, sizeof(val));
if (val >= NUM_RECORDS || val < 0) {
diag("check value %i\n", val);
return TDB_ERR_CORRUPT;
}
if (array[val]) {
diag("Value %i already seen\n", val);
return TDB_ERR_CORRUPT;
}
array[val] = true;
return TDB_SUCCESS;
}
int main(int argc, char *argv[])
{
unsigned int i, j;
struct tdb_context *tdb;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
bool array[NUM_RECORDS];
tdb = tdb_open("run-check-callback.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (!tdb)
continue;
ok1(store_records(tdb));
for (j = 0; j < NUM_RECORDS; j++)
array[j] = false;
ok1(tdb_check(tdb, check, array) == TDB_SUCCESS);
for (j = 0; j < NUM_RECORDS; j++)
if (!array[j])
break;
ok1(j == NUM_RECORDS);
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,45 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT,
TDB_CONVERT|TDB_NOSYNC,
TDB_NOMMAP|TDB_CONVERT|TDB_NOSYNC };
struct tdb_data key = tdb_mkdata("key", 3);
struct tdb_data data = tdb_mkdata("data", 4);
plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
size_t size;
tdb = tdb_open("run-expand-in-transaction.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (!tdb)
continue;
size = tdb->file->map_size;
ok1(tdb_transaction_start(tdb) == 0);
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
ok1(tdb->file->map_size > size);
ok1(tdb_transaction_commit(tdb) == 0);
ok1(tdb->file->map_size > size);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,70 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/summary.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
int main(int argc, char *argv[])
{
unsigned int i, j;
struct tdb_context *tdb;
int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
struct tdb_data key = { (unsigned char *)&j, sizeof(j) };
struct tdb_data data = { (unsigned char *)&j, sizeof(j) };
plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
uint64_t features;
tdb = tdb_open("run-features.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (!tdb)
continue;
/* Put some stuff in there. */
for (j = 0; j < 100; j++) {
if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
fail("Storing in tdb");
}
/* Mess with features fields in hdr. */
features = (~TDB_FEATURE_MASK ^ 1);
ok1(tdb_write_convert(tdb, offsetof(struct tdb_header,
features_used),
&features, sizeof(features)) == 0);
ok1(tdb_write_convert(tdb, offsetof(struct tdb_header,
features_offered),
&features, sizeof(features)) == 0);
tdb_close(tdb);
tdb = tdb_open("run-features.tdb", flags[i], O_RDWR, 0,
&tap_log_attr);
ok1(tdb);
if (!tdb)
continue;
/* Should not have changed features offered. */
ok1(tdb_read_convert(tdb, offsetof(struct tdb_header,
features_offered),
&features, sizeof(features)) == 0);
ok1(features == (~TDB_FEATURE_MASK ^ 1));
/* Should have cleared unknown bits in features_used. */
ok1(tdb_read_convert(tdb, offsetof(struct tdb_header,
features_used),
&features, sizeof(features)) == 0);
ok1(features == (1 & TDB_FEATURE_MASK));
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,162 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/traverse.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
#define NUM_RECORDS 1000
static bool store_records(struct tdb_context *tdb)
{
int i;
struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
for (i = 0; i < NUM_RECORDS; i++)
if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
return false;
return true;
}
struct trav_data {
unsigned int records[NUM_RECORDS];
unsigned int calls;
};
static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *p)
{
struct trav_data *td = p;
int val;
memcpy(&val, dbuf.dptr, dbuf.dsize);
td->records[td->calls++] = val;
return 0;
}
/* Since tdb_nextkey frees dptr, we need to clone it. */
static TDB_DATA dup_key(TDB_DATA key)
{
void *p = malloc(key.dsize);
memcpy(p, key.dptr, key.dsize);
key.dptr = p;
return key;
}
int main(int argc, char *argv[])
{
unsigned int i, j;
int num;
struct trav_data td;
TDB_DATA k;
struct tdb_context *tdb;
union tdb_attribute seed_attr;
enum TDB_ERROR ecode;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
seed_attr.base.attr = TDB_ATTRIBUTE_SEED;
seed_attr.base.next = &tap_log_attr;
seed_attr.seed.seed = 6334326220117065685ULL;
plan_tests(sizeof(flags) / sizeof(flags[0])
* (NUM_RECORDS*6 + (NUM_RECORDS-1)*3 + 22) + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-traverse.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &seed_attr);
ok1(tdb);
if (!tdb)
continue;
ok1(tdb_firstkey(tdb, &k) == TDB_ERR_NOEXIST);
/* One entry... */
k.dptr = (unsigned char *)&num;
k.dsize = sizeof(num);
num = 0;
ok1(tdb_store(tdb, k, k, TDB_INSERT) == 0);
ok1(tdb_firstkey(tdb, &k) == TDB_SUCCESS);
ok1(k.dsize == sizeof(num));
ok1(memcmp(k.dptr, &num, sizeof(num)) == 0);
ok1(tdb_nextkey(tdb, &k) == TDB_ERR_NOEXIST);
/* Two entries. */
k.dptr = (unsigned char *)&num;
k.dsize = sizeof(num);
num = 1;
ok1(tdb_store(tdb, k, k, TDB_INSERT) == 0);
ok1(tdb_firstkey(tdb, &k) == TDB_SUCCESS);
ok1(k.dsize == sizeof(num));
memcpy(&num, k.dptr, sizeof(num));
ok1(num == 0 || num == 1);
ok1(tdb_nextkey(tdb, &k) == TDB_SUCCESS);
ok1(k.dsize == sizeof(j));
memcpy(&j, k.dptr, sizeof(j));
ok1(j == 0 || j == 1);
ok1(j != num);
ok1(tdb_nextkey(tdb, &k) == TDB_ERR_NOEXIST);
/* Clean up. */
k.dptr = (unsigned char *)&num;
k.dsize = sizeof(num);
num = 0;
ok1(tdb_delete(tdb, k) == 0);
num = 1;
ok1(tdb_delete(tdb, k) == 0);
/* Now lots of records. */
ok1(store_records(tdb));
td.calls = 0;
num = tdb_traverse(tdb, trav, &td);
ok1(num == NUM_RECORDS);
ok1(td.calls == NUM_RECORDS);
/* Simple loop should match tdb_traverse */
for (j = 0, ecode = tdb_firstkey(tdb, &k); j < td.calls; j++) {
int val;
ok1(ecode == TDB_SUCCESS);
ok1(k.dsize == sizeof(val));
memcpy(&val, k.dptr, k.dsize);
ok1(td.records[j] == val);
ecode = tdb_nextkey(tdb, &k);
}
/* But arbitrary orderings should work too. */
for (j = td.calls-1; j > 0; j--) {
k.dptr = (unsigned char *)&td.records[j-1];
k.dsize = sizeof(td.records[j-1]);
k = dup_key(k);
ok1(tdb_nextkey(tdb, &k) == TDB_SUCCESS);
ok1(k.dsize == sizeof(td.records[j]));
ok1(memcmp(k.dptr, &td.records[j], k.dsize) == 0);
free(k.dptr);
}
/* Even delete should work. */
for (j = 0, ecode = tdb_firstkey(tdb, &k);
ecode != TDB_ERR_NOEXIST;
j++) {
ok1(ecode == TDB_SUCCESS);
ok1(k.dsize == 4);
ok1(tdb_delete(tdb, k) == 0);
ecode = tdb_nextkey(tdb, &k);
}
diag("delete using first/nextkey gave %u of %u records",
j, NUM_RECORDS);
ok1(j == NUM_RECORDS);
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,180 @@
/* Test forking while holding lock.
*
* There are only five ways to do this currently:
* (1) grab a tdb_chainlock, then fork.
* (2) grab a tdb_lockall, then fork.
* (3) grab a tdb_lockall_read, then fork.
* (4) start a transaction, then fork.
* (5) fork from inside a tdb_parse() callback.
*
* Note that we don't hold a lock across tdb_traverse callbacks, so
* that doesn't matter.
*/
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include <sys/types.h>
#include <sys/wait.h>
#include "logging.h"
static enum TDB_ERROR fork_in_parse(TDB_DATA key, TDB_DATA data,
struct tdb_context *tdb)
{
int status;
if (fork() == 0) {
/* We expect this to fail. */
if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK)
exit(1);
if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK)
exit(1);
if (tap_log_messages != 2)
exit(2);
tdb_close(tdb);
if (tap_log_messages != 2)
exit(3);
exit(0);
}
wait(&status);
ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
return TDB_SUCCESS;
}
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
struct tdb_data key = tdb_mkdata("key", 3);
struct tdb_data data = tdb_mkdata("data", 4);
plan_tests(sizeof(flags) / sizeof(flags[0]) * 14);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
int status;
tap_log_messages = 0;
tdb = tdb_open("run-fork-test.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
if (!ok1(tdb))
continue;
/* Put a record in here. */
ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_SUCCESS);
ok1(tdb_chainlock(tdb, key) == TDB_SUCCESS);
if (fork() == 0) {
/* We expect this to fail. */
if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK)
return 1;
if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK)
return 1;
if (tap_log_messages != 2)
return 2;
tdb_chainunlock(tdb, key);
if (tap_log_messages != 3)
return 3;
tdb_close(tdb);
if (tap_log_messages != 3)
return 4;
return 0;
}
wait(&status);
ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
tdb_chainunlock(tdb, key);
ok1(tdb_lockall(tdb) == TDB_SUCCESS);
if (fork() == 0) {
/* We expect this to fail. */
if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK)
return 1;
if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK)
return 1;
if (tap_log_messages != 2)
return 2;
tdb_unlockall(tdb);
if (tap_log_messages != 2)
return 3;
tdb_close(tdb);
if (tap_log_messages != 2)
return 4;
return 0;
}
wait(&status);
ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
tdb_unlockall(tdb);
ok1(tdb_lockall_read(tdb) == TDB_SUCCESS);
if (fork() == 0) {
/* We expect this to fail. */
/* This would always fail anyway... */
if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK)
return 1;
if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK)
return 1;
if (tap_log_messages != 2)
return 2;
tdb_unlockall_read(tdb);
if (tap_log_messages != 2)
return 3;
tdb_close(tdb);
if (tap_log_messages != 2)
return 4;
return 0;
}
wait(&status);
ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
tdb_unlockall_read(tdb);
ok1(tdb_transaction_start(tdb) == TDB_SUCCESS);
/* If transactions is empty, noop "commit" succeeds. */
ok1(tdb_delete(tdb, key) == TDB_SUCCESS);
if (fork() == 0) {
/* We expect this to fail. */
if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK)
return 1;
if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK)
return 1;
if (tap_log_messages != 2)
return 2;
if (tdb_transaction_commit(tdb) != TDB_ERR_LOCK)
return 3;
tdb_close(tdb);
if (tap_log_messages < 3)
return 4;
return 0;
}
wait(&status);
ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0);
tdb_transaction_cancel(tdb);
ok1(tdb_parse_record(tdb, key, fork_in_parse, tdb)
== TDB_SUCCESS);
tdb_close(tdb);
ok1(tap_log_messages == 0);
}
return exit_status();
}

View File

@ -0,0 +1,80 @@
#include "config.h"
#include <unistd.h>
#include "lock-tracking.h"
#define fcntl fcntl_with_lockcheck
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdarg.h>
#include <err.h>
#include "external-agent.h"
#include "logging.h"
#define TEST_DBNAME "run-lockall.tdb"
#undef fcntl
int main(int argc, char *argv[])
{
struct agent *agent;
const int flags[] = { TDB_DEFAULT,
TDB_NOMMAP,
TDB_CONVERT,
TDB_CONVERT | TDB_NOMMAP };
int i;
plan_tests(13 * sizeof(flags)/sizeof(flags[0]) + 1);
agent = prepare_external_agent();
if (!agent)
err(1, "preparing agent");
for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) {
enum agent_return ret;
struct tdb_context *tdb;
tdb = tdb_open(TEST_DBNAME, flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
ret = external_agent_operation(agent, OPEN, TEST_DBNAME);
ok1(ret == SUCCESS);
ok1(tdb_lockall(tdb) == TDB_SUCCESS);
ok1(external_agent_operation(agent, STORE, "key")
== WOULD_HAVE_BLOCKED);
ok1(external_agent_operation(agent, FETCH, "key")
== WOULD_HAVE_BLOCKED);
/* Test nesting. */
ok1(tdb_lockall(tdb) == TDB_SUCCESS);
tdb_unlockall(tdb);
tdb_unlockall(tdb);
ok1(external_agent_operation(agent, STORE, "key") == SUCCESS);
ok1(tdb_lockall_read(tdb) == TDB_SUCCESS);
ok1(external_agent_operation(agent, STORE, "key")
== WOULD_HAVE_BLOCKED);
ok1(external_agent_operation(agent, FETCH, "key") == SUCCESS);
ok1(tdb_lockall_read(tdb) == TDB_SUCCESS);
tdb_unlockall_read(tdb);
tdb_unlockall_read(tdb);
ok1(external_agent_operation(agent, STORE, "key") == SUCCESS);
ok1(external_agent_operation(agent, CLOSE, NULL) == SUCCESS);
tdb_close(tdb);
}
free_external_agent(agent);
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,192 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tdb2/check.c>
#include <ccan/tap/tap.h>
#include "logging.h"
#include "external-agent.h"
#undef alarm
#define alarm fast_alarm
/* Speed things up by doing things in milliseconds. */
static unsigned int fast_alarm(unsigned int milli_seconds)
{
struct itimerval it;
it.it_interval.tv_sec = it.it_interval.tv_usec = 0;
it.it_value.tv_sec = milli_seconds / 1000;
it.it_value.tv_usec = milli_seconds * 1000;
setitimer(ITIMER_REAL, &it, NULL);
return 0;
}
#define CatchSignal(sig, handler) signal((sig), (handler))
static void do_nothing(int signum)
{
}
/* This example code is taken from SAMBA, so try not to change it. */
static struct flock flock_struct;
/* Return a value which is none of v1, v2 or v3. */
static inline short int invalid_value(short int v1, short int v2, short int v3)
{
short int try = (v1+v2+v3)^((v1+v2+v3) << 16);
while (try == v1 || try == v2 || try == v3)
try++;
return try;
}
/* We invalidate in as many ways as we can, so the OS rejects it */
static void invalidate_flock_struct(int signum)
{
flock_struct.l_type = invalid_value(F_RDLCK, F_WRLCK, F_UNLCK);
flock_struct.l_whence = invalid_value(SEEK_SET, SEEK_CUR, SEEK_END);
flock_struct.l_start = -1;
/* A large negative. */
flock_struct.l_len = (((off_t)1 << (sizeof(off_t)*CHAR_BIT - 1)) + 1);
}
static int timeout_lock(int fd, int rw, off_t off, off_t len, bool waitflag,
void *_timeout)
{
int ret, saved_errno = errno;
unsigned int timeout = *(unsigned int *)_timeout;
flock_struct.l_type = rw;
flock_struct.l_whence = SEEK_SET;
flock_struct.l_start = off;
flock_struct.l_len = len;
CatchSignal(SIGALRM, invalidate_flock_struct);
alarm(timeout);
for (;;) {
if (waitflag)
ret = fcntl(fd, F_SETLKW, &flock_struct);
else
ret = fcntl(fd, F_SETLK, &flock_struct);
if (ret == 0)
break;
/* Not signalled? Something else went wrong. */
if (flock_struct.l_len == len) {
if (errno == EAGAIN || errno == EINTR)
continue;
saved_errno = errno;
break;
} else {
saved_errno = EINTR;
break;
}
}
alarm(0);
errno = saved_errno;
return ret;
}
static int tdb_chainlock_with_timeout_internal(struct tdb_context *tdb,
TDB_DATA key,
unsigned int timeout,
int rw_type)
{
union tdb_attribute locking;
enum TDB_ERROR ecode;
if (timeout) {
locking.base.attr = TDB_ATTRIBUTE_FLOCK;
ecode = tdb_get_attribute(tdb, &locking);
if (ecode != TDB_SUCCESS)
return ecode;
/* Replace locking function with our own. */
locking.flock.data = &timeout;
locking.flock.lock = timeout_lock;
ecode = tdb_set_attribute(tdb, &locking);
if (ecode != TDB_SUCCESS)
return ecode;
}
if (rw_type == F_RDLCK)
ecode = tdb_chainlock_read(tdb, key);
else
ecode = tdb_chainlock(tdb, key);
if (timeout) {
tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK);
}
return ecode;
}
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
TDB_DATA key = tdb_mkdata("hello", 5);
int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
struct agent *agent;
plan_tests(sizeof(flags) / sizeof(flags[0]) * 15);
agent = prepare_external_agent();
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
enum TDB_ERROR ecode;
tdb = tdb_open("run-locktimeout.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
if (!ok1(tdb))
break;
/* Simple cases: should succeed. */
ecode = tdb_chainlock_with_timeout_internal(tdb, key, 20,
F_RDLCK);
ok1(ecode == TDB_SUCCESS);
ok1(tap_log_messages == 0);
tdb_chainunlock_read(tdb, key);
ok1(tap_log_messages == 0);
ecode = tdb_chainlock_with_timeout_internal(tdb, key, 20,
F_WRLCK);
ok1(ecode == TDB_SUCCESS);
ok1(tap_log_messages == 0);
tdb_chainunlock(tdb, key);
ok1(tap_log_messages == 0);
/* OK, get agent to start transaction, then we should time out. */
ok1(external_agent_operation(agent, OPEN, "run-locktimeout.tdb")
== SUCCESS);
ok1(external_agent_operation(agent, TRANSACTION_START, "")
== SUCCESS);
ecode = tdb_chainlock_with_timeout_internal(tdb, key, 20,
F_WRLCK);
ok1(ecode == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
/* Even if we get a different signal, should be fine. */
CatchSignal(SIGUSR1, do_nothing);
external_agent_operation(agent, SEND_SIGNAL, "");
ecode = tdb_chainlock_with_timeout_internal(tdb, key, 20,
F_WRLCK);
ok1(ecode == TDB_ERR_LOCK);
ok1(tap_log_messages == 0);
ok1(external_agent_operation(agent, TRANSACTION_COMMIT, "")
== SUCCESS);
ok1(external_agent_operation(agent, CLOSE, "")
== SUCCESS);
tdb_close(tdb);
}
free_external_agent(agent);
return exit_status();
}

View File

@ -0,0 +1,48 @@
/* Another test revealed that we lost an entry. This reproduces it. */
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/traverse.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
#define NUM_RECORDS 1189
/* We use the same seed which we saw this failure on. */
static uint64_t failhash(const void *key, size_t len, uint64_t seed, void *p)
{
seed = 699537674708983027ULL;
return hash64_stable((const unsigned char *)key, len, seed);
}
int main(int argc, char *argv[])
{
int i;
struct tdb_context *tdb;
struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.fn = failhash } };
hattr.base.next = &tap_log_attr;
plan_tests(1 + 2 * NUM_RECORDS + 1);
tdb = tdb_open("run-missing-entries.tdb", TDB_INTERNAL,
O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
ok1(tdb);
if (tdb) {
for (i = 0; i < NUM_RECORDS; i++) {
ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
}
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,84 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb, *tdb2;
struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */
int flags[] = { TDB_DEFAULT, TDB_NOMMAP,
TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT };
plan_tests(sizeof(flags) / sizeof(flags[0]) * 28);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-open-multiple-times.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (!tdb)
continue;
tdb2 = tdb_open("run-open-multiple-times.tdb", flags[i],
O_RDWR|O_CREAT, 0600, &tap_log_attr);
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(tdb_check(tdb2, NULL, NULL) == 0);
/* Store in one, fetch in the other. */
ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0);
ok1(tdb_fetch(tdb2, key, &d) == TDB_SUCCESS);
ok1(tdb_deq(d, data));
free(d.dptr);
/* Vice versa, with delete. */
ok1(tdb_delete(tdb2, key) == 0);
ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_NOEXIST);
/* OK, now close first one, check second still good. */
ok1(tdb_close(tdb) == 0);
ok1(tdb_store(tdb2, key, data, TDB_REPLACE) == 0);
ok1(tdb_fetch(tdb2, key, &d) == TDB_SUCCESS);
ok1(tdb_deq(d, data));
free(d.dptr);
/* Reopen */
tdb = tdb_open("run-open-multiple-times.tdb", flags[i],
O_RDWR|O_CREAT, 0600, &tap_log_attr);
ok1(tdb);
ok1(tdb_transaction_start(tdb2) == 0);
/* Anything in the other one should fail. */
ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_LOCK);
ok1(tap_log_messages == 1);
ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_ERR_LOCK);
ok1(tap_log_messages == 2);
ok1(tdb_transaction_start(tdb) == TDB_ERR_LOCK);
ok1(tap_log_messages == 3);
ok1(tdb_chainlock(tdb, key) == TDB_ERR_LOCK);
ok1(tap_log_messages == 4);
/* Transaciton should work as normal. */
ok1(tdb_store(tdb2, key, data, TDB_REPLACE) == TDB_SUCCESS);
/* Now... try closing with locks held. */
ok1(tdb_close(tdb2) == 0);
ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS);
ok1(tdb_deq(d, data));
free(d.dptr);
ok1(tdb_close(tdb) == 0);
ok1(tap_log_messages == 4);
tap_log_messages = 0;
}
return exit_status();
}

View File

@ -0,0 +1,53 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
#define MAX_SIZE 10000
#define SIZE_STEP 131
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
struct tdb_data key = tdb_mkdata("key", 3);
struct tdb_data data;
data.dptr = malloc(MAX_SIZE);
memset(data.dptr, 0x24, MAX_SIZE);
plan_tests(sizeof(flags) / sizeof(flags[0])
* (3 + (1 + (MAX_SIZE/SIZE_STEP)) * 2) + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-record-expand.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (!tdb)
continue;
data.dsize = 0;
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
for (data.dsize = 0;
data.dsize < MAX_SIZE;
data.dsize += SIZE_STEP) {
memset(data.dptr, data.dsize, data.dsize);
ok1(tdb_store(tdb, key, data, TDB_MODIFY) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
}
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
free(data.dptr);
return exit_status();
}

View File

@ -0,0 +1,65 @@
/* We had a bug where we marked the tdb read-only for a tdb_traverse_read.
* If we then expanded the tdb, we would remap read-only, and later SEGV. */
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/traverse.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "external-agent.h"
#include "logging.h"
static bool file_larger(int fd, tdb_len_t size)
{
struct stat st;
fstat(fd, &st);
return st.st_size != size;
}
static unsigned add_records_to_grow(struct agent *agent, int fd, tdb_len_t size)
{
unsigned int i;
for (i = 0; !file_larger(fd, size); i++) {
char data[20];
sprintf(data, "%i", i);
if (external_agent_operation(agent, STORE, data) != SUCCESS)
return 0;
}
diag("Added %u records to grow file", i);
return i;
}
int main(int argc, char *argv[])
{
unsigned int i;
struct agent *agent;
struct tdb_context *tdb;
struct tdb_data d = tdb_mkdata("hello", 5);
const char filename[] = "run-remap-in-read_traverse.tdb";
plan_tests(4);
agent = prepare_external_agent();
tdb = tdb_open(filename, TDB_DEFAULT,
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(external_agent_operation(agent, OPEN, filename) == SUCCESS);
i = add_records_to_grow(agent, tdb->file->fd, tdb->file->map_size);
/* Do a traverse. */
ok1(tdb_traverse(tdb, NULL, NULL) == i);
/* Now store something! */
ok1(tdb_store(tdb, d, d, TDB_INSERT) == 0);
ok1(tap_log_messages == 0);
tdb_close(tdb);
free_external_agent(agent);
return exit_status();
}

67
lib/tdb2/test/run-seed.c Normal file
View File

@ -0,0 +1,67 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
static int log_count = 0;
/* Normally we get a log when setting random seed. */
static void my_log_fn(struct tdb_context *tdb,
enum tdb_log_level level,
const char *message, void *priv)
{
log_count++;
}
static union tdb_attribute log_attr = {
.log = { .base = { .attr = TDB_ATTRIBUTE_LOG },
.fn = my_log_fn }
};
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
union tdb_attribute attr;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
attr.seed.base.attr = TDB_ATTRIBUTE_SEED;
attr.seed.base.next = &log_attr;
attr.seed.seed = 42;
plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 4 * 3);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
struct tdb_header hdr;
int fd;
tdb = tdb_open("run-seed.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &attr);
ok1(tdb);
if (!tdb)
continue;
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(tdb->hash_seed == 42);
ok1(log_count == 0);
tdb_close(tdb);
if (flags[i] & TDB_INTERNAL)
continue;
fd = open("run-seed.tdb", O_RDONLY);
ok1(fd >= 0);
ok1(read(fd, &hdr, sizeof(hdr)) == sizeof(hdr));
if (flags[i] & TDB_CONVERT)
ok1(bswap_64(hdr.hash_seed) == 42);
else
ok1(hdr.hash_seed == 42);
close(fd);
}
return exit_status();
}

View File

@ -0,0 +1,42 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
int main(int argc, char *argv[])
{
unsigned int i;
struct tdb_context *tdb;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
struct tdb_data key = tdb_mkdata("key", 3);
struct tdb_data data = tdb_mkdata("data", 4);
plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-simple-delete.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (tdb) {
/* Delete should fail. */
ok1(tdb_delete(tdb, key) == TDB_ERR_NOEXIST);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Insert should succeed. */
ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Delete should now work. */
ok1(tdb_delete(tdb, key) == 0);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
}
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,60 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/summary.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
int main(int argc, char *argv[])
{
unsigned int i, j;
struct tdb_context *tdb;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
struct tdb_data key = { (unsigned char *)&j, sizeof(j) };
struct tdb_data data = { (unsigned char *)&j, sizeof(j) };
char *summary;
plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 2 * 5) + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-summary.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
ok1(tdb);
if (!tdb)
continue;
/* Put some stuff in there. */
for (j = 0; j < 500; j++) {
/* Make sure padding varies to we get some graphs! */
data.dsize = j % (sizeof(j) + 1);
if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
fail("Storing in tdb");
}
for (j = 0;
j <= TDB_SUMMARY_HISTOGRAMS;
j += TDB_SUMMARY_HISTOGRAMS) {
ok1(tdb_summary(tdb, j, &summary) == TDB_SUCCESS);
ok1(strstr(summary, "Number of records: 500\n"));
ok1(strstr(summary, "Smallest/average/largest keys: 4/4/4\n"));
ok1(strstr(summary, "Smallest/average/largest data: 0/2/4\n"));
if (j == TDB_SUMMARY_HISTOGRAMS)
ok1(strstr(summary, "|")
&& strstr(summary, "*"));
else
ok1(!strstr(summary, "|")
&& !strstr(summary, "*"));
free(summary);
}
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}

View File

@ -0,0 +1,59 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
int main(int argc, char *argv[])
{
enum TDB_ERROR err;
plan_tests(TDB_ERR_RDONLY*-1 + 2);
for (err = TDB_SUCCESS; err >= TDB_ERR_RDONLY; err--) {
switch (err) {
case TDB_SUCCESS:
ok1(!strcmp(tdb_errorstr(err),
"Success"));
break;
case TDB_ERR_IO:
ok1(!strcmp(tdb_errorstr(err),
"IO Error"));
break;
case TDB_ERR_LOCK:
ok1(!strcmp(tdb_errorstr(err),
"Locking error"));
break;
case TDB_ERR_OOM:
ok1(!strcmp(tdb_errorstr(err),
"Out of memory"));
break;
case TDB_ERR_EXISTS:
ok1(!strcmp(tdb_errorstr(err),
"Record exists"));
break;
case TDB_ERR_EINVAL:
ok1(!strcmp(tdb_errorstr(err),
"Invalid parameter"));
break;
case TDB_ERR_NOEXIST:
ok1(!strcmp(tdb_errorstr(err),
"Record does not exist"));
break;
case TDB_ERR_RDONLY:
ok1(!strcmp(tdb_errorstr(err),
"write not permitted"));
break;
case TDB_ERR_CORRUPT:
ok1(!strcmp(tdb_errorstr(err),
"Corrupt database"));
break;
}
}
ok1(!strcmp(tdb_errorstr(err), "Invalid error code"));
return exit_status();
}

View File

@ -0,0 +1,211 @@
#include <ccan/tdb2/tdb.c>
#include <ccan/tdb2/open.c>
#include <ccan/tdb2/free.c>
#include <ccan/tdb2/lock.c>
#include <ccan/tdb2/io.c>
#include <ccan/tdb2/hash.c>
#include <ccan/tdb2/check.c>
#include <ccan/tdb2/traverse.c>
#include <ccan/tdb2/transaction.c>
#include <ccan/tap/tap.h>
#include "logging.h"
#define NUM_RECORDS 1000
/* We use the same seed which we saw a failure on. */
static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p)
{
return hash64_stable((const unsigned char *)key, len,
*(uint64_t *)p);
}
static bool store_records(struct tdb_context *tdb)
{
int i;
struct tdb_data key = { (unsigned char *)&i, sizeof(i) };
struct tdb_data data = { (unsigned char *)&i, sizeof(i) };
for (i = 0; i < NUM_RECORDS; i++)
if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
return false;
return true;
}
struct trav_data {
unsigned int calls, call_limit;
int low, high;
bool mismatch;
bool delete;
enum TDB_ERROR delete_error;
};
static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
struct trav_data *td)
{
int val;
td->calls++;
if (key.dsize != sizeof(val) || dbuf.dsize != sizeof(val)
|| memcmp(key.dptr, dbuf.dptr, key.dsize) != 0) {
td->mismatch = true;
return -1;
}
memcpy(&val, dbuf.dptr, dbuf.dsize);
if (val < td->low)
td->low = val;
if (val > td->high)
td->high = val;
if (td->delete) {
td->delete_error = tdb_delete(tdb, key);
if (td->delete_error != TDB_SUCCESS) {
return -1;
}
}
if (td->calls == td->call_limit)
return 1;
return 0;
}
struct trav_grow_data {
unsigned int calls;
unsigned int num_large;
bool mismatch;
enum TDB_ERROR error;
};
static int trav_grow(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
struct trav_grow_data *tgd)
{
int val;
unsigned char buffer[128] = { 0 };
tgd->calls++;
if (key.dsize != sizeof(val) || dbuf.dsize < sizeof(val)
|| memcmp(key.dptr, dbuf.dptr, key.dsize) != 0) {
tgd->mismatch = true;
return -1;
}
if (dbuf.dsize > sizeof(val))
/* We must have seen this before! */
tgd->num_large++;
/* Make a big difference to the database. */
dbuf.dptr = buffer;
dbuf.dsize = sizeof(buffer);
tgd->error = tdb_append(tdb, key, dbuf);
if (tgd->error != TDB_SUCCESS) {
return -1;
}
return 0;
}
int main(int argc, char *argv[])
{
unsigned int i;
int num;
struct trav_data td;
struct trav_grow_data tgd;
struct tdb_context *tdb;
uint64_t seed = 16014841315512641303ULL;
int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
TDB_NOMMAP|TDB_CONVERT };
union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH },
.fn = fixedhash,
.data = &seed } };
hattr.base.next = &tap_log_attr;
plan_tests(sizeof(flags) / sizeof(flags[0]) * 32 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("run-traverse.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
ok1(tdb);
if (!tdb)
continue;
ok1(tdb_traverse(tdb, NULL, NULL) == 0);
ok1(store_records(tdb));
num = tdb_traverse(tdb, NULL, NULL);
ok1(num == NUM_RECORDS);
/* Full traverse. */
td.calls = 0;
td.call_limit = UINT_MAX;
td.low = INT_MAX;
td.high = INT_MIN;
td.mismatch = false;
td.delete = false;
num = tdb_traverse(tdb, trav, &td);
ok1(num == NUM_RECORDS);
ok1(!td.mismatch);
ok1(td.calls == NUM_RECORDS);
ok1(td.low == 0);
ok1(td.high == NUM_RECORDS-1);
/* Short traverse. */
td.calls = 0;
td.call_limit = NUM_RECORDS / 2;
td.low = INT_MAX;
td.high = INT_MIN;
td.mismatch = false;
td.delete = false;
num = tdb_traverse(tdb, trav, &td);
ok1(num == NUM_RECORDS / 2);
ok1(!td.mismatch);
ok1(td.calls == NUM_RECORDS / 2);
ok1(td.low <= NUM_RECORDS / 2);
ok1(td.high > NUM_RECORDS / 2);
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(tap_log_messages == 0);
/* Deleting traverse (delete everything). */
td.calls = 0;
td.call_limit = UINT_MAX;
td.low = INT_MAX;
td.high = INT_MIN;
td.mismatch = false;
td.delete = true;
td.delete_error = TDB_SUCCESS;
num = tdb_traverse(tdb, trav, &td);
ok1(num == NUM_RECORDS);
ok1(td.delete_error == TDB_SUCCESS);
ok1(!td.mismatch);
ok1(td.calls == NUM_RECORDS);
ok1(td.low == 0);
ok1(td.high == NUM_RECORDS - 1);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Now it's empty! */
ok1(tdb_traverse(tdb, NULL, NULL) == 0);
/* Re-add. */
ok1(store_records(tdb));
ok1(tdb_traverse(tdb, NULL, NULL) == NUM_RECORDS);
ok1(tdb_check(tdb, NULL, NULL) == 0);
/* Grow. This will cause us to be reshuffled. */
tgd.calls = 0;
tgd.num_large = 0;
tgd.mismatch = false;
tgd.error = TDB_SUCCESS;
ok1(tdb_traverse(tdb, trav_grow, &tgd) > 1);
ok1(tgd.error == 0);
ok1(!tgd.mismatch);
ok1(tdb_check(tdb, NULL, NULL) == 0);
ok1(tgd.num_large < tgd.calls);
diag("growing db: %u calls, %u repeats",
tgd.calls, tgd.num_large);
tdb_close(tdb);
}
ok1(tap_log_messages == 0);
return exit_status();
}

16
lib/tdb2/tools/Makefile Normal file
View File

@ -0,0 +1,16 @@
OBJS:=../../tdb2.o ../../hash.o ../../tally.o
CFLAGS:=-I../../.. -I.. -Wall -g -O3 #-g -pg
LDFLAGS:=-L../../..
default: tdb2torture tdb2tool tdb2dump tdb2restore mktdb2 speed growtdb-bench
tdb2dump: tdb2dump.c $(OBJS)
tdb2restore: tdb2restore.c $(OBJS)
tdb2torture: tdb2torture.c $(OBJS)
tdb2tool: tdb2tool.c $(OBJS)
mktdb2: mktdb2.c $(OBJS)
speed: speed.c $(OBJS)
growtdb-bench: growtdb-bench.c $(OBJS)
clean:
rm -f tdb2torture tdb2dump tdb2restore tdb2tool mktdb2 speed growtdb-bench

View File

@ -0,0 +1,112 @@
#include "tdb2.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <err.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
static void logfn(struct tdb_context *tdb,
enum tdb_log_level level,
const char *message,
void *data)
{
fprintf(stderr, "tdb:%s:%s\n", tdb_name(tdb), message);
}
int main(int argc, char *argv[])
{
unsigned int i, j, users, groups;
TDB_DATA idxkey, idxdata;
TDB_DATA k, d, gk;
char cmd[100];
struct tdb_context *tdb;
enum TDB_ERROR ecode;
union tdb_attribute log;
if (argc != 3) {
printf("Usage: growtdb-bench <users> <groups>\n");
exit(1);
}
users = atoi(argv[1]);
groups = atoi(argv[2]);
sprintf(cmd, "cat /proc/%i/statm", getpid());
log.base.attr = TDB_ATTRIBUTE_LOG;
log.base.next = NULL;
log.log.fn = logfn;
tdb = tdb_open("/tmp/growtdb.tdb", TDB_DEFAULT,
O_RDWR|O_CREAT|O_TRUNC, 0600, &log);
idxkey.dptr = (unsigned char *)"User index";
idxkey.dsize = strlen("User index");
idxdata.dsize = 51;
idxdata.dptr = calloc(idxdata.dsize, 1);
/* Create users. */
k.dsize = 48;
k.dptr = calloc(k.dsize, 1);
d.dsize = 64;
d.dptr = calloc(d.dsize, 1);
tdb_transaction_start(tdb);
for (i = 0; i < users; i++) {
memcpy(k.dptr, &i, sizeof(i));
ecode = tdb_store(tdb, k, d, TDB_INSERT);
if (ecode != TDB_SUCCESS)
errx(1, "tdb insert failed: %s", tdb_errorstr(ecode));
/* This simulates a growing index record. */
ecode = tdb_append(tdb, idxkey, idxdata);
if (ecode != TDB_SUCCESS)
errx(1, "tdb append failed: %s", tdb_errorstr(ecode));
}
if ((ecode = tdb_transaction_commit(tdb)) != 0)
errx(1, "tdb commit1 failed: %s", tdb_errorstr(ecode));
if ((ecode = tdb_check(tdb, NULL, NULL)) != 0)
errx(1, "tdb_check failed after initial insert!");
system(cmd);
/* Now put them all in groups: add 32 bytes to each record for
* a group. */
gk.dsize = 48;
gk.dptr = calloc(k.dsize, 1);
gk.dptr[gk.dsize-1] = 1;
d.dsize = 32;
for (i = 0; i < groups; i++) {
tdb_transaction_start(tdb);
/* Create the "group". */
memcpy(gk.dptr, &i, sizeof(i));
ecode = tdb_store(tdb, gk, d, TDB_INSERT);
if (ecode != TDB_SUCCESS)
errx(1, "tdb insert failed: %s", tdb_errorstr(ecode));
/* Now populate it. */
for (j = 0; j < users; j++) {
/* Append to the user. */
memcpy(k.dptr, &j, sizeof(j));
if ((ecode = tdb_append(tdb, k, d)) != 0)
errx(1, "tdb append failed: %s",
tdb_errorstr(ecode));
/* Append to the group. */
if ((ecode = tdb_append(tdb, gk, d)) != 0)
errx(1, "tdb append failed: %s",
tdb_errorstr(ecode));
}
if ((ecode = tdb_transaction_commit(tdb)) != 0)
errx(1, "tdb commit2 failed: %s", tdb_errorstr(ecode));
if ((ecode = tdb_check(tdb, NULL, NULL)) != 0)
errx(1, "tdb_check failed after iteration %i!", i);
system(cmd);
}
return 0;
}

29
lib/tdb2/tools/mktdb2.c Normal file
View File

@ -0,0 +1,29 @@
#include "tdb2.h"
#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
#include <err.h>
int main(int argc, char *argv[])
{
unsigned int i, num_recs;
struct tdb_context *tdb;
if (argc != 3 || (num_recs = atoi(argv[2])) == 0)
errx(1, "Usage: mktdb <tdbfile> <numrecords>");
tdb = tdb_open(argv[1], TDB_DEFAULT, O_CREAT|O_TRUNC|O_RDWR, 0600,NULL);
if (!tdb)
err(1, "Opening %s", argv[1]);
for (i = 0; i < num_recs; i++) {
TDB_DATA d;
d.dptr = (void *)&i;
d.dsize = sizeof(i);
if (tdb_store(tdb, d, d, TDB_INSERT) != 0)
err(1, "Failed to store record %i", i);
}
printf("Done\n");
return 0;
}

440
lib/tdb2/tools/speed.c Normal file
View File

@ -0,0 +1,440 @@
/* Simple speed test for TDB */
#include <err.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/time.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include "tdb2.h"
/* Nanoseconds per operation */
static size_t normalize(const struct timeval *start,
const struct timeval *stop,
unsigned int num)
{
struct timeval diff;
timersub(stop, start, &diff);
/* Floating point is more accurate here. */
return (double)(diff.tv_sec * 1000000 + diff.tv_usec)
/ num * 1000;
}
static size_t file_size(void)
{
struct stat st;
if (stat("/tmp/speed.tdb", &st) != 0)
return -1;
return st.st_size;
}
static int count_record(struct tdb_context *tdb,
TDB_DATA key, TDB_DATA data, void *p)
{
int *total = p;
*total += *(int *)data.dptr;
return 0;
}
static void dump_and_clear_stats(struct tdb_context **tdb,
int flags,
union tdb_attribute *attr)
{
union tdb_attribute stats;
enum TDB_ERROR ecode;
stats.base.attr = TDB_ATTRIBUTE_STATS;
stats.stats.size = sizeof(stats.stats);
ecode = tdb_get_attribute(*tdb, &stats);
if (ecode != TDB_SUCCESS)
errx(1, "Getting stats: %s", tdb_errorstr(ecode));
printf("allocs = %llu\n",
(unsigned long long)stats.stats.allocs);
printf(" alloc_subhash = %llu\n",
(unsigned long long)stats.stats.alloc_subhash);
printf(" alloc_chain = %llu\n",
(unsigned long long)stats.stats.alloc_chain);
printf(" alloc_bucket_exact = %llu\n",
(unsigned long long)stats.stats.alloc_bucket_exact);
printf(" alloc_bucket_max = %llu\n",
(unsigned long long)stats.stats.alloc_bucket_max);
printf(" alloc_leftover = %llu\n",
(unsigned long long)stats.stats.alloc_leftover);
printf(" alloc_coalesce_tried = %llu\n",
(unsigned long long)stats.stats.alloc_coalesce_tried);
printf(" alloc_coalesce_iterate_clash = %llu\n",
(unsigned long long)stats.stats.alloc_coalesce_iterate_clash);
printf(" alloc_coalesce_lockfail = %llu\n",
(unsigned long long)stats.stats.alloc_coalesce_lockfail);
printf(" alloc_coalesce_race = %llu\n",
(unsigned long long)stats.stats.alloc_coalesce_race);
printf(" alloc_coalesce_succeeded = %llu\n",
(unsigned long long)stats.stats.alloc_coalesce_succeeded);
printf(" alloc_coalesce_num_merged = %llu\n",
(unsigned long long)stats.stats.alloc_coalesce_num_merged);
printf("compares = %llu\n",
(unsigned long long)stats.stats.compares);
printf(" compare_wrong_bucket = %llu\n",
(unsigned long long)stats.stats.compare_wrong_bucket);
printf(" compare_wrong_offsetbits = %llu\n",
(unsigned long long)stats.stats.compare_wrong_offsetbits);
printf(" compare_wrong_keylen = %llu\n",
(unsigned long long)stats.stats.compare_wrong_keylen);
printf(" compare_wrong_rechash = %llu\n",
(unsigned long long)stats.stats.compare_wrong_rechash);
printf(" compare_wrong_keycmp = %llu\n",
(unsigned long long)stats.stats.compare_wrong_keycmp);
printf("transactions = %llu\n",
(unsigned long long)stats.stats.transactions);
printf(" transaction_cancel = %llu\n",
(unsigned long long)stats.stats.transaction_cancel);
printf(" transaction_nest = %llu\n",
(unsigned long long)stats.stats.transaction_nest);
printf(" transaction_expand_file = %llu\n",
(unsigned long long)stats.stats.transaction_expand_file);
printf(" transaction_read_direct = %llu\n",
(unsigned long long)stats.stats.transaction_read_direct);
printf(" transaction_read_direct_fail = %llu\n",
(unsigned long long)stats.stats.transaction_read_direct_fail);
printf(" transaction_write_direct = %llu\n",
(unsigned long long)stats.stats.transaction_write_direct);
printf(" transaction_write_direct_fail = %llu\n",
(unsigned long long)stats.stats.transaction_write_direct_fail);
printf("expands = %llu\n",
(unsigned long long)stats.stats.expands);
printf("frees = %llu\n",
(unsigned long long)stats.stats.frees);
printf("locks = %llu\n",
(unsigned long long)stats.stats.locks);
printf(" lock_lowlevel = %llu\n",
(unsigned long long)stats.stats.lock_lowlevel);
printf(" lock_nonblock = %llu\n",
(unsigned long long)stats.stats.lock_nonblock);
printf(" lock_nonblock_fail = %llu\n",
(unsigned long long)stats.stats.lock_nonblock_fail);
/* Now clear. */
tdb_close(*tdb);
*tdb = tdb_open("/tmp/speed.tdb", flags, O_RDWR, 0, attr);
}
static void tdb_log(struct tdb_context *tdb, enum tdb_log_level level,
const char *message, void *data)
{
fputs(message, stderr);
putc('\n', stderr);
}
int main(int argc, char *argv[])
{
unsigned int i, j, num = 1000, stage = 0, stopat = -1;
int flags = TDB_DEFAULT;
bool transaction = false, summary = false;
TDB_DATA key, data;
struct tdb_context *tdb;
struct timeval start, stop;
union tdb_attribute seed, log;
bool do_stats = false;
enum TDB_ERROR ecode;
/* Try to keep benchmarks even. */
seed.base.attr = TDB_ATTRIBUTE_SEED;
seed.base.next = NULL;
seed.seed.seed = 0;
log.base.attr = TDB_ATTRIBUTE_LOG;
log.base.next = &seed;
log.log.fn = tdb_log;
if (argv[1] && strcmp(argv[1], "--internal") == 0) {
flags = TDB_INTERNAL;
argc--;
argv++;
}
if (argv[1] && strcmp(argv[1], "--transaction") == 0) {
transaction = true;
argc--;
argv++;
}
if (argv[1] && strcmp(argv[1], "--no-sync") == 0) {
flags |= TDB_NOSYNC;
argc--;
argv++;
}
if (argv[1] && strcmp(argv[1], "--summary") == 0) {
summary = true;
argc--;
argv++;
}
if (argv[1] && strcmp(argv[1], "--stats") == 0) {
do_stats = true;
argc--;
argv++;
}
tdb = tdb_open("/tmp/speed.tdb", flags, O_RDWR|O_CREAT|O_TRUNC,
0600, &log);
if (!tdb)
err(1, "Opening /tmp/speed.tdb");
key.dptr = (void *)&i;
key.dsize = sizeof(i);
data = key;
if (argv[1]) {
num = atoi(argv[1]);
argv++;
argc--;
}
if (argv[1]) {
stopat = atoi(argv[1]);
argv++;
argc--;
}
/* Add 1000 records. */
printf("Adding %u records: ", num); fflush(stdout);
if (transaction && (ecode = tdb_transaction_start(tdb)))
errx(1, "starting transaction: %s", tdb_errorstr(ecode));
gettimeofday(&start, NULL);
for (i = 0; i < num; i++)
if ((ecode = tdb_store(tdb, key, data, TDB_INSERT)) != 0)
errx(1, "Inserting key %u in tdb: %s",
i, tdb_errorstr(ecode));
gettimeofday(&stop, NULL);
if (transaction && (ecode = tdb_transaction_commit(tdb)))
errx(1, "committing transaction: %s", tdb_errorstr(ecode));
printf(" %zu ns (%zu bytes)\n",
normalize(&start, &stop, num), file_size());
if (tdb_check(tdb, NULL, NULL))
errx(1, "tdb_check failed!");
if (summary) {
char *sumstr = NULL;
tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
printf("%s\n", sumstr);
free(sumstr);
}
if (do_stats)
dump_and_clear_stats(&tdb, flags, &log);
if (++stage == stopat)
exit(0);
/* Finding 1000 records. */
printf("Finding %u records: ", num); fflush(stdout);
if (transaction && (ecode = tdb_transaction_start(tdb)))
errx(1, "starting transaction: %s", tdb_errorstr(ecode));
gettimeofday(&start, NULL);
for (i = 0; i < num; i++) {
struct tdb_data dbuf;
if ((ecode = tdb_fetch(tdb, key, &dbuf)) != TDB_SUCCESS
|| *(int *)dbuf.dptr != i) {
errx(1, "Fetching key %u in tdb gave %u",
i, ecode ? ecode : *(int *)dbuf.dptr);
}
}
gettimeofday(&stop, NULL);
if (transaction && (ecode = tdb_transaction_commit(tdb)))
errx(1, "committing transaction: %s", tdb_errorstr(ecode));
printf(" %zu ns (%zu bytes)\n",
normalize(&start, &stop, num), file_size());
if (tdb_check(tdb, NULL, NULL))
errx(1, "tdb_check failed!");
if (summary) {
char *sumstr = NULL;
tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
printf("%s\n", sumstr);
free(sumstr);
}
if (do_stats)
dump_and_clear_stats(&tdb, flags, &log);
if (++stage == stopat)
exit(0);
/* Missing 1000 records. */
printf("Missing %u records: ", num); fflush(stdout);
if (transaction && (ecode = tdb_transaction_start(tdb)))
errx(1, "starting transaction: %s", tdb_errorstr(ecode));
gettimeofday(&start, NULL);
for (i = num; i < num*2; i++) {
struct tdb_data dbuf;
ecode = tdb_fetch(tdb, key, &dbuf);
if (ecode != TDB_ERR_NOEXIST)
errx(1, "Fetching key %u in tdb gave %s",
i, tdb_errorstr(ecode));
}
gettimeofday(&stop, NULL);
if (transaction && (ecode = tdb_transaction_commit(tdb)))
errx(1, "committing transaction: %s", tdb_errorstr(ecode));
printf(" %zu ns (%zu bytes)\n",
normalize(&start, &stop, num), file_size());
if (tdb_check(tdb, NULL, NULL))
errx(1, "tdb_check failed!");
if (summary) {
char *sumstr = NULL;
tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
printf("%s\n", sumstr);
free(sumstr);
}
if (do_stats)
dump_and_clear_stats(&tdb, flags, &log);
if (++stage == stopat)
exit(0);
/* Traverse 1000 records. */
printf("Traversing %u records: ", num); fflush(stdout);
if (transaction && (ecode = tdb_transaction_start(tdb)))
errx(1, "starting transaction: %s", tdb_errorstr(ecode));
i = 0;
gettimeofday(&start, NULL);
if (tdb_traverse(tdb, count_record, &i) != num)
errx(1, "Traverse returned wrong number of records");
if (i != (num - 1) * (num / 2))
errx(1, "Traverse tallied to %u", i);
gettimeofday(&stop, NULL);
if (transaction && (ecode = tdb_transaction_commit(tdb)))
errx(1, "committing transaction: %s", tdb_errorstr(ecode));
printf(" %zu ns (%zu bytes)\n",
normalize(&start, &stop, num), file_size());
if (tdb_check(tdb, NULL, NULL))
errx(1, "tdb_check failed!");
if (summary) {
char *sumstr = NULL;
tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
printf("%s\n", sumstr);
free(sumstr);
}
if (do_stats)
dump_and_clear_stats(&tdb, flags, &log);
if (++stage == stopat)
exit(0);
/* Delete 1000 records (not in order). */
printf("Deleting %u records: ", num); fflush(stdout);
if (transaction && (ecode = tdb_transaction_start(tdb)))
errx(1, "starting transaction: %s", tdb_errorstr(ecode));
gettimeofday(&start, NULL);
for (j = 0; j < num; j++) {
i = (j + 100003) % num;
if ((ecode = tdb_delete(tdb, key)) != TDB_SUCCESS)
errx(1, "Deleting key %u in tdb: %s",
i, tdb_errorstr(ecode));
}
gettimeofday(&stop, NULL);
if (transaction && (ecode = tdb_transaction_commit(tdb)))
errx(1, "committing transaction: %s", tdb_errorstr(ecode));
printf(" %zu ns (%zu bytes)\n",
normalize(&start, &stop, num), file_size());
if (tdb_check(tdb, NULL, NULL))
errx(1, "tdb_check failed!");
if (summary) {
char *sumstr = NULL;
tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
printf("%s\n", sumstr);
free(sumstr);
}
if (do_stats)
dump_and_clear_stats(&tdb, flags, &log);
if (++stage == stopat)
exit(0);
/* Re-add 1000 records (not in order). */
printf("Re-adding %u records: ", num); fflush(stdout);
if (transaction && (ecode = tdb_transaction_start(tdb)))
errx(1, "starting transaction: %s", tdb_errorstr(ecode));
gettimeofday(&start, NULL);
for (j = 0; j < num; j++) {
i = (j + 100003) % num;
if ((ecode = tdb_store(tdb, key, data, TDB_INSERT)) != 0)
errx(1, "Inserting key %u in tdb: %s",
i, tdb_errorstr(ecode));
}
gettimeofday(&stop, NULL);
if (transaction && (ecode = tdb_transaction_commit(tdb)))
errx(1, "committing transaction: %s", tdb_errorstr(ecode));
printf(" %zu ns (%zu bytes)\n",
normalize(&start, &stop, num), file_size());
if (tdb_check(tdb, NULL, NULL))
errx(1, "tdb_check failed!");
if (summary) {
char *sumstr = NULL;
tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
printf("%s\n", sumstr);
free(sumstr);
}
if (do_stats)
dump_and_clear_stats(&tdb, flags, &log);
if (++stage == stopat)
exit(0);
/* Append 1000 records. */
if (transaction && (ecode = tdb_transaction_start(tdb)))
errx(1, "starting transaction: %s", tdb_errorstr(ecode));
printf("Appending %u records: ", num); fflush(stdout);
gettimeofday(&start, NULL);
for (i = 0; i < num; i++)
if ((ecode = tdb_append(tdb, key, data)) != TDB_SUCCESS)
errx(1, "Appending key %u in tdb: %s",
i, tdb_errorstr(ecode));
gettimeofday(&stop, NULL);
if (transaction && (ecode = tdb_transaction_commit(tdb)))
errx(1, "committing transaction: %s", tdb_errorstr(ecode));
printf(" %zu ns (%zu bytes)\n",
normalize(&start, &stop, num), file_size());
if (tdb_check(tdb, NULL, NULL))
errx(1, "tdb_check failed!");
if (summary) {
char *sumstr = NULL;
tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
printf("%s\n", sumstr);
free(sumstr);
}
if (++stage == stopat)
exit(0);
/* Churn 1000 records: not in order! */
if (transaction && (ecode = tdb_transaction_start(tdb)))
errx(1, "starting transaction: %s", tdb_errorstr(ecode));
printf("Churning %u records: ", num); fflush(stdout);
gettimeofday(&start, NULL);
for (j = 0; j < num; j++) {
i = (j + 1000019) % num;
if ((ecode = tdb_delete(tdb, key)) != TDB_SUCCESS)
errx(1, "Deleting key %u in tdb: %s",
i, tdb_errorstr(ecode));
i += num;
if ((ecode = tdb_store(tdb, key, data, TDB_INSERT)) != 0)
errx(1, "Inserting key %u in tdb: %s",
i, tdb_errorstr(ecode));
}
gettimeofday(&stop, NULL);
if (transaction && (ecode = tdb_transaction_commit(tdb)))
errx(1, "committing transaction: %s", tdb_errorstr(ecode));
printf(" %zu ns (%zu bytes)\n",
normalize(&start, &stop, num), file_size());
if (tdb_check(tdb, NULL, NULL))
errx(1, "tdb_check failed!");
if (summary) {
char *sumstr = NULL;
tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr);
printf("%s\n", sumstr);
free(sumstr);
}
if (do_stats)
dump_and_clear_stats(&tdb, flags, &log);
if (++stage == stopat)
exit(0);
return 0;
}

115
lib/tdb2/tools/tdb2dump.c Normal file
View File

@ -0,0 +1,115 @@
/*
simple tdb2 dump util
Copyright (C) Andrew Tridgell 2001
Copyright (C) Rusty Russell 2011
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tdb2.h"
#include <ctype.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
static void print_data(TDB_DATA d)
{
unsigned char *p = (unsigned char *)d.dptr;
int len = d.dsize;
while (len--) {
if (isprint(*p) && !strchr("\"\\", *p)) {
fputc(*p, stdout);
} else {
printf("\\%02X", *p);
}
p++;
}
}
static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
{
printf("{\n");
printf("key(%d) = \"", (int)key.dsize);
print_data(key);
printf("\"\n");
printf("data(%d) = \"", (int)dbuf.dsize);
print_data(dbuf);
printf("\"\n");
printf("}\n");
return 0;
}
static int dump_tdb(const char *fname, const char *keyname)
{
struct tdb_context *tdb;
TDB_DATA key, value;
tdb = tdb_open(fname, 0, O_RDONLY, 0, NULL);
if (!tdb) {
printf("Failed to open %s\n", fname);
return 1;
}
if (!keyname) {
tdb_traverse(tdb, traverse_fn, NULL);
} else {
key = tdb_mkdata(keyname, strlen(keyname));
if (tdb_fetch(tdb, key, &value) != 0) {
return 1;
} else {
print_data(value);
free(value.dptr);
}
}
return 0;
}
static void usage( void)
{
printf( "Usage: tdb2dump [options] <filename>\n\n");
printf( " -h this help message\n");
printf( " -k keyname dumps value of keyname\n");
}
int main(int argc, char *argv[])
{
char *fname, *keyname=NULL;
int c;
if (argc < 2) {
printf("Usage: tdb2dump <fname>\n");
exit(1);
}
while ((c = getopt( argc, argv, "hk:")) != -1) {
switch (c) {
case 'h':
usage();
exit( 0);
case 'k':
keyname = optarg;
break;
default:
usage();
exit( 1);
}
}
fname = argv[optind];
return dump_tdb(fname, keyname);
}

View File

@ -0,0 +1,227 @@
/*
tdb2restore -- construct a tdb from tdbdump output.
Copyright (C) Volker Lendecke 2010
Copyright (C) Simon McVittie 2005
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tdb2.h"
#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#define debug_fprintf(file, fmt, ...) do {/*nothing*/} while (0)
static int read_linehead(FILE *f)
{
int i, c;
int num_bytes;
char prefix[128];
while (1) {
c = getc(f);
if (c == EOF) {
return -1;
}
if (c == '(') {
break;
}
}
for (i=0; i<sizeof(prefix); i++) {
c = getc(f);
if (c == EOF) {
return -1;
}
prefix[i] = c;
if (c == '"') {
break;
}
}
if (i == sizeof(prefix)) {
return -1;
}
prefix[i] = '\0';
if (sscanf(prefix, "%d) = ", &num_bytes) != 1) {
return -1;
}
return num_bytes;
}
static int read_hex(void) {
int c;
c = getchar();
if (c == EOF) {
fprintf(stderr, "Unexpected EOF in data\n");
return -1;
} else if (c == '"') {
fprintf(stderr, "Unexpected \\\" sequence\n");
return -1;
} else if ('0' <= c && c <= '9') {
return c - '0';
} else if ('A' <= c && c <= 'F') {
return c - 'A' + 10;
} else if ('a' <= c && c <= 'f') {
return c - 'a' + 10;
} else {
fprintf(stderr, "Invalid hex: %c\n", c);
return -1;
}
}
static int read_data(FILE *f, struct tdb_data *d, size_t size) {
int c, low, high;
int i;
d->dptr = (unsigned char *)malloc(size);
if (d->dptr == NULL) {
return -1;
}
d->dsize = size;
for (i=0; i<size; i++) {
c = getc(f);
if (c == EOF) {
fprintf(stderr, "Unexpected EOF in data\n");
return 1;
} else if (c == '"') {
return 0;
} else if (c == '\\') {
high = read_hex();
if (high < 0) {
return -1;
}
high = high << 4;
assert(high == (high & 0xf0));
low = read_hex();
if (low < 0) {
return -1;
}
assert(low == (low & 0x0f));
d->dptr[i] = (low|high);
} else {
d->dptr[i] = c;
}
}
return 0;
}
static int swallow(FILE *f, const char *s, int *eof)
{
char line[128];
if (fgets(line, sizeof(line), f) == NULL) {
if (eof != NULL) {
*eof = 1;
}
return -1;
}
if (strcmp(line, s) != 0) {
return -1;
}
return 0;
}
static bool read_rec(FILE *f, struct tdb_context *tdb, int *eof)
{
int length;
struct tdb_data key, data;
bool ret = false;
enum TDB_ERROR e;
key.dptr = NULL;
data.dptr = NULL;
if (swallow(f, "{\n", eof) == -1) {
goto fail;
}
length = read_linehead(f);
if (length == -1) {
goto fail;
}
if (read_data(f, &key, length) == -1) {
goto fail;
}
if (swallow(f, "\"\n", NULL) == -1) {
goto fail;
}
length = read_linehead(f);
if (length == -1) {
goto fail;
}
if (read_data(f, &data, length) == -1) {
goto fail;
}
if ((swallow(f, "\"\n", NULL) == -1)
|| (swallow(f, "}\n", NULL) == -1)) {
goto fail;
}
e = tdb_store(tdb, key, data, TDB_INSERT);
if (e != TDB_SUCCESS) {
fprintf(stderr, "TDB error: %s\n", tdb_errorstr(e));
goto fail;
}
ret = true;
fail:
free(key.dptr);
free(data.dptr);
return ret;
}
static int restore_tdb(const char *fname)
{
struct tdb_context *tdb;
tdb = tdb_open(fname, 0, O_RDWR|O_CREAT|O_EXCL, 0666, NULL);
if (!tdb) {
perror("tdb_open");
fprintf(stderr, "Failed to open %s\n", fname);
return 1;
}
while (1) {
int eof = 0;
if (!read_rec(stdin, tdb, &eof)) {
if (eof) {
break;
}
return 1;
}
}
if (tdb_close(tdb)) {
fprintf(stderr, "Error closing tdb\n");
return 1;
}
fprintf(stderr, "EOF\n");
return 0;
}
int main(int argc, char *argv[])
{
char *fname;
if (argc < 2) {
printf("Usage: %s dbname < tdbdump_output\n", argv[0]);
exit(1);
}
fname = argv[1];
return restore_tdb(fname);
}

798
lib/tdb2/tools/tdb2tool.c Normal file
View File

@ -0,0 +1,798 @@
/*
Unix SMB/CIFS implementation.
Samba database functions
Copyright (C) Andrew Tridgell 1999-2000
Copyright (C) Paul `Rusty' Russell 2000
Copyright (C) Jeremy Allison 2000
Copyright (C) Andrew Esh 2001
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tdb2.h"
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <stdarg.h>
static int do_command(void);
const char *cmdname;
char *arg1, *arg2;
size_t arg1len, arg2len;
int bIterate = 0;
char *line;
TDB_DATA iterate_kbuf;
char cmdline[1024];
static int disable_mmap;
enum commands {
CMD_CREATE_TDB,
CMD_OPEN_TDB,
CMD_TRANSACTION_START,
CMD_TRANSACTION_COMMIT,
CMD_TRANSACTION_CANCEL,
CMD_ERASE,
CMD_DUMP,
CMD_INSERT,
CMD_MOVE,
CMD_STORE,
CMD_SHOW,
CMD_KEYS,
CMD_HEXKEYS,
CMD_DELETE,
#if 0
CMD_LIST_HASH_FREE,
CMD_LIST_FREE,
#endif
CMD_INFO,
CMD_MMAP,
CMD_SPEED,
CMD_FIRST,
CMD_NEXT,
CMD_SYSTEM,
CMD_CHECK,
CMD_QUIT,
CMD_HELP
};
typedef struct {
const char *name;
enum commands cmd;
} COMMAND_TABLE;
COMMAND_TABLE cmd_table[] = {
{"create", CMD_CREATE_TDB},
{"open", CMD_OPEN_TDB},
#if 0
{"transaction_start", CMD_TRANSACTION_START},
{"transaction_commit", CMD_TRANSACTION_COMMIT},
{"transaction_cancel", CMD_TRANSACTION_CANCEL},
#endif
{"erase", CMD_ERASE},
{"dump", CMD_DUMP},
{"insert", CMD_INSERT},
{"move", CMD_MOVE},
{"store", CMD_STORE},
{"show", CMD_SHOW},
{"keys", CMD_KEYS},
{"hexkeys", CMD_HEXKEYS},
{"delete", CMD_DELETE},
#if 0
{"list", CMD_LIST_HASH_FREE},
{"free", CMD_LIST_FREE},
#endif
{"info", CMD_INFO},
{"speed", CMD_SPEED},
{"mmap", CMD_MMAP},
{"first", CMD_FIRST},
{"1", CMD_FIRST},
{"next", CMD_NEXT},
{"n", CMD_NEXT},
{"check", CMD_CHECK},
{"quit", CMD_QUIT},
{"q", CMD_QUIT},
{"!", CMD_SYSTEM},
{NULL, CMD_HELP}
};
struct timeval tp1,tp2;
static void _start_timer(void)
{
gettimeofday(&tp1,NULL);
}
static double _end_timer(void)
{
gettimeofday(&tp2,NULL);
return((tp2.tv_sec - tp1.tv_sec) +
(tp2.tv_usec - tp1.tv_usec)*1.0e-6);
}
static void tdb_log(struct tdb_context *tdb, enum tdb_log_level level,
const char *message, void *priv)
{
fputs(message, stderr);
}
/* a tdb tool for manipulating a tdb database */
static struct tdb_context *tdb;
static int print_rec(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state);
static int print_key(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state);
static int print_hexkey(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state);
static void print_asc(const char *buf,int len)
{
int i;
/* We're probably printing ASCII strings so don't try to display
the trailing NULL character. */
if (buf[len - 1] == 0)
len--;
for (i=0;i<len;i++)
printf("%c",isprint(buf[i])?buf[i]:'.');
}
static void print_data(const char *buf,int len)
{
int i=0;
if (len<=0) return;
printf("[%03X] ",i);
for (i=0;i<len;) {
printf("%02X ",(int)((unsigned char)buf[i]));
i++;
if (i%8 == 0) printf(" ");
if (i%16 == 0) {
print_asc(&buf[i-16],8); printf(" ");
print_asc(&buf[i-8],8); printf("\n");
if (i<len) printf("[%03X] ",i);
}
}
if (i%16) {
int n;
n = 16 - (i%16);
printf(" ");
if (n>8) printf(" ");
while (n--) printf(" ");
n = i%16;
if (n > 8) n = 8;
print_asc(&buf[i-(i%16)],n); printf(" ");
n = (i%16) - n;
if (n>0) print_asc(&buf[i-n],n);
printf("\n");
}
}
static void help(void)
{
printf("\n"
"tdbtool: \n"
" create dbname : create a database\n"
" open dbname : open an existing database\n"
" openjh dbname : open an existing database (jenkins hash)\n"
" transaction_start : start a transaction\n"
" transaction_commit : commit a transaction\n"
" transaction_cancel : cancel a transaction\n"
" erase : erase the database\n"
" dump : dump the database as strings\n"
" keys : dump the database keys as strings\n"
" hexkeys : dump the database keys as hex values\n"
" info : print summary info about the database\n"
" insert key data : insert a record\n"
" move key file : move a record to a destination tdb\n"
" store key data : store a record (replace)\n"
" show key : show a record by key\n"
" delete key : delete a record by key\n"
#if 0
" list : print the database hash table and freelist\n"
" free : print the database freelist\n"
#endif
" check : check the integrity of an opened database\n"
" speed : perform speed tests on the database\n"
" ! command : execute system command\n"
" 1 | first : print the first record\n"
" n | next : print the next record\n"
" q | quit : terminate\n"
" \\n : repeat 'next' command\n"
"\n");
}
static void terror(enum TDB_ERROR err, const char *why)
{
if (err != TDB_SUCCESS)
printf("%s:%s\n", tdb_errorstr(err), why);
else
printf("%s\n", why);
}
static void create_tdb(const char *tdbname)
{
union tdb_attribute log_attr;
log_attr.base.attr = TDB_ATTRIBUTE_LOG;
log_attr.base.next = NULL;
log_attr.log.fn = tdb_log;
if (tdb) tdb_close(tdb);
tdb = tdb_open(tdbname, (disable_mmap?TDB_NOMMAP:0),
O_RDWR | O_CREAT | O_TRUNC, 0600, &log_attr);
if (!tdb) {
printf("Could not create %s: %s\n", tdbname, strerror(errno));
}
}
static void open_tdb(const char *tdbname)
{
union tdb_attribute log_attr;
log_attr.base.attr = TDB_ATTRIBUTE_LOG;
log_attr.base.next = NULL;
log_attr.log.fn = tdb_log;
if (tdb) tdb_close(tdb);
tdb = tdb_open(tdbname, disable_mmap?TDB_NOMMAP:0, O_RDWR, 0600,
&log_attr);
if (!tdb) {
printf("Could not open %s: %s\n", tdbname, strerror(errno));
}
}
static void insert_tdb(char *keyname, size_t keylen, char* data, size_t datalen)
{
TDB_DATA key, dbuf;
enum TDB_ERROR ecode;
if ((keyname == NULL) || (keylen == 0)) {
terror(TDB_SUCCESS, "need key");
return;
}
key.dptr = (unsigned char *)keyname;
key.dsize = keylen;
dbuf.dptr = (unsigned char *)data;
dbuf.dsize = datalen;
ecode = tdb_store(tdb, key, dbuf, TDB_INSERT);
if (ecode) {
terror(ecode, "insert failed");
}
}
static void store_tdb(char *keyname, size_t keylen, char* data, size_t datalen)
{
TDB_DATA key, dbuf;
enum TDB_ERROR ecode;
if ((keyname == NULL) || (keylen == 0)) {
terror(TDB_SUCCESS, "need key");
return;
}
if ((data == NULL) || (datalen == 0)) {
terror(TDB_SUCCESS, "need data");
return;
}
key.dptr = (unsigned char *)keyname;
key.dsize = keylen;
dbuf.dptr = (unsigned char *)data;
dbuf.dsize = datalen;
printf("Storing key:\n");
print_rec(tdb, key, dbuf, NULL);
ecode = tdb_store(tdb, key, dbuf, TDB_REPLACE);
if (ecode) {
terror(ecode, "store failed");
}
}
static void show_tdb(char *keyname, size_t keylen)
{
TDB_DATA key, dbuf;
enum TDB_ERROR ecode;
if ((keyname == NULL) || (keylen == 0)) {
terror(TDB_SUCCESS, "need key");
return;
}
key.dptr = (unsigned char *)keyname;
key.dsize = keylen;
ecode = tdb_fetch(tdb, key, &dbuf);
if (ecode) {
terror(ecode, "fetch failed");
return;
}
print_rec(tdb, key, dbuf, NULL);
free( dbuf.dptr );
}
static void delete_tdb(char *keyname, size_t keylen)
{
TDB_DATA key;
enum TDB_ERROR ecode;
if ((keyname == NULL) || (keylen == 0)) {
terror(TDB_SUCCESS, "need key");
return;
}
key.dptr = (unsigned char *)keyname;
key.dsize = keylen;
ecode = tdb_delete(tdb, key);
if (ecode) {
terror(ecode, "delete failed");
}
}
static void move_rec(char *keyname, size_t keylen, char* tdbname)
{
TDB_DATA key, dbuf;
struct tdb_context *dst_tdb;
enum TDB_ERROR ecode;
if ((keyname == NULL) || (keylen == 0)) {
terror(TDB_SUCCESS, "need key");
return;
}
if ( !tdbname ) {
terror(TDB_SUCCESS, "need destination tdb name");
return;
}
key.dptr = (unsigned char *)keyname;
key.dsize = keylen;
ecode = tdb_fetch(tdb, key, &dbuf);
if (ecode) {
terror(ecode, "fetch failed");
return;
}
print_rec(tdb, key, dbuf, NULL);
dst_tdb = tdb_open(tdbname, 0, O_RDWR, 0600, NULL);
if ( !dst_tdb ) {
terror(TDB_SUCCESS, "unable to open destination tdb");
return;
}
ecode = tdb_store( dst_tdb, key, dbuf, TDB_REPLACE);
if (ecode)
terror(ecode, "failed to move record");
else
printf("record moved\n");
tdb_close( dst_tdb );
}
static int print_rec(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
{
printf("\nkey %d bytes\n", (int)key.dsize);
print_asc((const char *)key.dptr, key.dsize);
printf("\ndata %d bytes\n", (int)dbuf.dsize);
print_data((const char *)dbuf.dptr, dbuf.dsize);
return 0;
}
static int print_key(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
{
printf("key %d bytes: ", (int)key.dsize);
print_asc((const char *)key.dptr, key.dsize);
printf("\n");
return 0;
}
static int print_hexkey(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
{
printf("key %d bytes\n", (int)key.dsize);
print_data((const char *)key.dptr, key.dsize);
printf("\n");
return 0;
}
static int total_bytes;
static int traverse_fn(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state)
{
total_bytes += dbuf.dsize;
return 0;
}
static void info_tdb(void)
{
enum TDB_ERROR ecode;
char *summary;
ecode = tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &summary);
if (ecode) {
terror(ecode, "Getting summary");
} else {
printf("%s", summary);
free(summary);
}
}
static void speed_tdb(const char *tlimit)
{
unsigned timelimit = tlimit?atoi(tlimit):0;
double t;
int ops;
if (timelimit == 0) timelimit = 5;
ops = 0;
printf("Testing store speed for %u seconds\n", timelimit);
_start_timer();
do {
long int r = random();
TDB_DATA key, dbuf;
key = tdb_mkdata("store test", strlen("store test"));
dbuf.dptr = (unsigned char *)&r;
dbuf.dsize = sizeof(r);
tdb_store(tdb, key, dbuf, TDB_REPLACE);
t = _end_timer();
ops++;
} while (t < timelimit);
printf("%10.3f ops/sec\n", ops/t);
ops = 0;
printf("Testing fetch speed for %u seconds\n", timelimit);
_start_timer();
do {
long int r = random();
TDB_DATA key, dbuf;
key = tdb_mkdata("store test", strlen("store test"));
dbuf.dptr = (unsigned char *)&r;
dbuf.dsize = sizeof(r);
tdb_fetch(tdb, key, &dbuf);
t = _end_timer();
ops++;
} while (t < timelimit);
printf("%10.3f ops/sec\n", ops/t);
ops = 0;
printf("Testing transaction speed for %u seconds\n", timelimit);
_start_timer();
do {
long int r = random();
TDB_DATA key, dbuf;
key = tdb_mkdata("transaction test", strlen("transaction test"));
dbuf.dptr = (unsigned char *)&r;
dbuf.dsize = sizeof(r);
tdb_transaction_start(tdb);
tdb_store(tdb, key, dbuf, TDB_REPLACE);
tdb_transaction_commit(tdb);
t = _end_timer();
ops++;
} while (t < timelimit);
printf("%10.3f ops/sec\n", ops/t);
ops = 0;
printf("Testing traverse speed for %u seconds\n", timelimit);
_start_timer();
do {
tdb_traverse(tdb, traverse_fn, NULL);
t = _end_timer();
ops++;
} while (t < timelimit);
printf("%10.3f ops/sec\n", ops/t);
}
static void toggle_mmap(void)
{
disable_mmap = !disable_mmap;
if (disable_mmap) {
printf("mmap is disabled\n");
} else {
printf("mmap is enabled\n");
}
}
static char *tdb_getline(const char *prompt)
{
static char thisline[1024];
char *p;
fputs(prompt, stdout);
thisline[0] = 0;
p = fgets(thisline, sizeof(thisline)-1, stdin);
if (p) p = strchr(p, '\n');
if (p) *p = 0;
return p?thisline:NULL;
}
static int do_delete_fn(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf,
void *state)
{
return tdb_delete(the_tdb, key);
}
static void first_record(struct tdb_context *the_tdb, TDB_DATA *pkey)
{
TDB_DATA dbuf;
enum TDB_ERROR ecode;
ecode = tdb_firstkey(the_tdb, pkey);
if (!ecode)
ecode = tdb_fetch(the_tdb, *pkey, &dbuf);
if (ecode) terror(ecode, "fetch failed");
else {
print_rec(the_tdb, *pkey, dbuf, NULL);
}
}
static void next_record(struct tdb_context *the_tdb, TDB_DATA *pkey)
{
TDB_DATA dbuf;
enum TDB_ERROR ecode;
ecode = tdb_nextkey(the_tdb, pkey);
if (!ecode)
ecode = tdb_fetch(the_tdb, *pkey, &dbuf);
if (ecode)
terror(ecode, "fetch failed");
else
print_rec(the_tdb, *pkey, dbuf, NULL);
}
static void check_db(struct tdb_context *the_tdb)
{
if (!the_tdb) {
printf("Error: No database opened!\n");
} else {
if (tdb_check(the_tdb, NULL, NULL) != 0)
printf("Integrity check for the opened database failed.\n");
else
printf("Database integrity is OK.\n");
}
}
static int do_command(void)
{
COMMAND_TABLE *ctp = cmd_table;
enum commands mycmd = CMD_HELP;
int cmd_len;
if (cmdname && strlen(cmdname) == 0) {
mycmd = CMD_NEXT;
} else {
while (ctp->name) {
cmd_len = strlen(ctp->name);
if (strncmp(ctp->name,cmdname,cmd_len) == 0) {
mycmd = ctp->cmd;
break;
}
ctp++;
}
}
switch (mycmd) {
case CMD_CREATE_TDB:
bIterate = 0;
create_tdb(arg1);
return 0;
case CMD_OPEN_TDB:
bIterate = 0;
open_tdb(arg1);
return 0;
case CMD_SYSTEM:
/* Shell command */
if (system(arg1) == -1) {
terror(TDB_SUCCESS, "system() call failed\n");
}
return 0;
case CMD_QUIT:
return 1;
default:
/* all the rest require a open database */
if (!tdb) {
bIterate = 0;
terror(TDB_SUCCESS, "database not open");
help();
return 0;
}
switch (mycmd) {
case CMD_TRANSACTION_START:
bIterate = 0;
tdb_transaction_start(tdb);
return 0;
case CMD_TRANSACTION_COMMIT:
bIterate = 0;
tdb_transaction_commit(tdb);
return 0;
case CMD_TRANSACTION_CANCEL:
bIterate = 0;
tdb_transaction_cancel(tdb);
return 0;
case CMD_ERASE:
bIterate = 0;
tdb_traverse(tdb, do_delete_fn, NULL);
return 0;
case CMD_DUMP:
bIterate = 0;
tdb_traverse(tdb, print_rec, NULL);
return 0;
case CMD_INSERT:
bIterate = 0;
insert_tdb(arg1, arg1len,arg2,arg2len);
return 0;
case CMD_MOVE:
bIterate = 0;
move_rec(arg1,arg1len,arg2);
return 0;
case CMD_STORE:
bIterate = 0;
store_tdb(arg1,arg1len,arg2,arg2len);
return 0;
case CMD_SHOW:
bIterate = 0;
show_tdb(arg1, arg1len);
return 0;
case CMD_KEYS:
tdb_traverse(tdb, print_key, NULL);
return 0;
case CMD_HEXKEYS:
tdb_traverse(tdb, print_hexkey, NULL);
return 0;
case CMD_DELETE:
bIterate = 0;
delete_tdb(arg1,arg1len);
return 0;
#if 0
case CMD_LIST_HASH_FREE:
tdb_dump_all(tdb);
return 0;
case CMD_LIST_FREE:
tdb_printfreelist(tdb);
return 0;
#endif
case CMD_INFO:
info_tdb();
return 0;
case CMD_SPEED:
speed_tdb(arg1);
return 0;
case CMD_MMAP:
toggle_mmap();
return 0;
case CMD_FIRST:
bIterate = 1;
first_record(tdb, &iterate_kbuf);
return 0;
case CMD_NEXT:
if (bIterate)
next_record(tdb, &iterate_kbuf);
return 0;
case CMD_CHECK:
check_db(tdb);
return 0;
case CMD_HELP:
help();
return 0;
case CMD_CREATE_TDB:
case CMD_OPEN_TDB:
case CMD_SYSTEM:
case CMD_QUIT:
/*
* unhandled commands. cases included here to avoid compiler
* warnings.
*/
return 0;
}
}
return 0;
}
static char *convert_string(char *instring, size_t *sizep)
{
size_t length = 0;
char *outp, *inp;
char temp[3];
outp = inp = instring;
while (*inp) {
if (*inp == '\\') {
inp++;
if (*inp && strchr("0123456789abcdefABCDEF",(int)*inp)) {
temp[0] = *inp++;
temp[1] = '\0';
if (*inp && strchr("0123456789abcdefABCDEF",(int)*inp)) {
temp[1] = *inp++;
temp[2] = '\0';
}
*outp++ = (char)strtol((const char *)temp,NULL,16);
} else {
*outp++ = *inp++;
}
} else {
*outp++ = *inp++;
}
length++;
}
*sizep = length;
return instring;
}
int main(int argc, char *argv[])
{
cmdname = "";
arg1 = NULL;
arg1len = 0;
arg2 = NULL;
arg2len = 0;
if (argv[1]) {
cmdname = "open";
arg1 = argv[1];
do_command();
cmdname = "";
arg1 = NULL;
}
switch (argc) {
case 1:
case 2:
/* Interactive mode */
while ((cmdname = tdb_getline("tdb> "))) {
arg2 = arg1 = NULL;
if ((arg1 = strchr((const char *)cmdname,' ')) != NULL) {
arg1++;
arg2 = arg1;
while (*arg2) {
if (*arg2 == ' ') {
*arg2++ = '\0';
break;
}
if ((*arg2++ == '\\') && (*arg2 == ' ')) {
arg2++;
}
}
}
if (arg1) arg1 = convert_string(arg1,&arg1len);
if (arg2) arg2 = convert_string(arg2,&arg2len);
if (do_command()) break;
}
break;
case 5:
arg2 = convert_string(argv[4],&arg2len);
case 4:
arg1 = convert_string(argv[3],&arg1len);
case 3:
cmdname = argv[2];
default:
do_command();
break;
}
if (tdb) tdb_close(tdb);
return 0;
}

View File

@ -0,0 +1,494 @@
/* this tests tdb by doing lots of ops from several simultaneous
writers - that stresses the locking code.
*/
#include "tdb2.h"
#include <stdlib.h>
#include <err.h>
#include <getopt.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <fcntl.h>
#include <time.h>
#include <sys/wait.h>
//#define REOPEN_PROB 30
#define DELETE_PROB 8
#define STORE_PROB 4
#define APPEND_PROB 6
#define TRANSACTION_PROB 10
#define TRANSACTION_PREPARE_PROB 2
#define LOCKSTORE_PROB 5
#define TRAVERSE_PROB 20
#define TRAVERSE_MOD_PROB 100
#define TRAVERSE_ABORT_PROB 500
#define CULL_PROB 100
#define KEYLEN 3
#define DATALEN 100
static struct tdb_context *db;
static int in_transaction;
static int in_traverse;
static int error_count;
#if TRANSACTION_PROB
static int always_transaction = 0;
#endif
static int loopnum;
static int count_pipe;
static union tdb_attribute log_attr;
static union tdb_attribute seed_attr;
static void tdb_log(struct tdb_context *tdb, enum tdb_log_level level,
const char *message, void *data)
{
fputs(message, stdout);
fflush(stdout);
#if 0
{
char str[200];
signal(SIGUSR1, SIG_IGN);
sprintf(str,"xterm -e gdb /proc/%d/exe %d", getpid(), getpid());
system(str);
}
#endif
}
#include "../private.h"
static void segv_handler(int sig, siginfo_t *info, void *p)
{
char string[100];
sprintf(string, "%u: death at %p (map_ptr %p, map_size %zu)\n",
getpid(), info->si_addr, db->file->map_ptr,
(size_t)db->file->map_size);
if (write(2, string, strlen(string)) > 0)
sleep(60);
_exit(11);
}
static void fatal(struct tdb_context *tdb, const char *why)
{
fprintf(stderr, "%u:%s:%s\n", getpid(), why,
tdb ? tdb_errorstr(tdb_error(tdb)) : "(no tdb)");
error_count++;
}
static char *randbuf(int len)
{
char *buf;
int i;
buf = (char *)malloc(len+1);
for (i=0;i<len;i++) {
buf[i] = 'a' + (rand() % 26);
}
buf[i] = 0;
return buf;
}
static void addrec_db(void);
static int modify_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
void *state)
{
#if CULL_PROB
if (random() % CULL_PROB == 0) {
tdb_delete(tdb, key);
}
#endif
#if TRAVERSE_MOD_PROB
if (random() % TRAVERSE_MOD_PROB == 0) {
addrec_db();
}
#endif
#if TRAVERSE_ABORT_PROB
if (random() % TRAVERSE_ABORT_PROB == 0)
return 1;
#endif
return 0;
}
static void addrec_db(void)
{
int klen, dlen;
char *k, *d;
TDB_DATA key, data;
klen = 1 + (rand() % KEYLEN);
dlen = 1 + (rand() % DATALEN);
k = randbuf(klen);
d = randbuf(dlen);
key.dptr = (unsigned char *)k;
key.dsize = klen+1;
data.dptr = (unsigned char *)d;
data.dsize = dlen+1;
#if REOPEN_PROB
if (in_traverse == 0 && in_transaction == 0 && random() % REOPEN_PROB == 0) {
tdb_reopen_all(0);
goto next;
}
#endif
#if TRANSACTION_PROB
if (in_traverse == 0 && in_transaction == 0 && (always_transaction || random() % TRANSACTION_PROB == 0)) {
if (tdb_transaction_start(db) != 0) {
fatal(db, "tdb_transaction_start failed");
}
in_transaction++;
goto next;
}
if (in_traverse == 0 && in_transaction && random() % TRANSACTION_PROB == 0) {
if (random() % TRANSACTION_PREPARE_PROB == 0) {
if (tdb_transaction_prepare_commit(db) != 0) {
fatal(db, "tdb_transaction_prepare_commit failed");
}
}
if (tdb_transaction_commit(db) != 0) {
fatal(db, "tdb_transaction_commit failed");
}
in_transaction--;
goto next;
}
if (in_traverse == 0 && in_transaction && random() % TRANSACTION_PROB == 0) {
tdb_transaction_cancel(db);
in_transaction--;
goto next;
}
#endif
#if DELETE_PROB
if (random() % DELETE_PROB == 0) {
tdb_delete(db, key);
goto next;
}
#endif
#if STORE_PROB
if (random() % STORE_PROB == 0) {
if (tdb_store(db, key, data, TDB_REPLACE) != 0) {
fatal(db, "tdb_store failed");
}
goto next;
}
#endif
#if APPEND_PROB
if (random() % APPEND_PROB == 0) {
if (tdb_append(db, key, data) != 0) {
fatal(db, "tdb_append failed");
}
goto next;
}
#endif
#if LOCKSTORE_PROB
if (random() % LOCKSTORE_PROB == 0) {
tdb_chainlock(db, key);
if (tdb_fetch(db, key, &data) != TDB_SUCCESS) {
data.dsize = 0;
data.dptr = NULL;
}
if (tdb_store(db, key, data, TDB_REPLACE) != 0) {
fatal(db, "tdb_store failed");
}
if (data.dptr) free(data.dptr);
tdb_chainunlock(db, key);
goto next;
}
#endif
#if TRAVERSE_PROB
/* FIXME: recursive traverses break transactions? */
if (in_traverse == 0 && random() % TRAVERSE_PROB == 0) {
in_traverse++;
tdb_traverse(db, modify_traverse, NULL);
in_traverse--;
goto next;
}
#endif
if (tdb_fetch(db, key, &data) == TDB_SUCCESS)
free(data.dptr);
next:
free(k);
free(d);
}
static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
void *state)
{
tdb_delete(tdb, key);
return 0;
}
static void usage(void)
{
printf("Usage: tdbtorture"
#if TRANSACTION_PROB
" [-t]"
#endif
" [-k] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-S]\n");
exit(0);
}
static void send_count_and_suicide(int sig)
{
/* This ensures our successor can continue where we left off. */
if (write(count_pipe, &loopnum, sizeof(loopnum)) != sizeof(loopnum))
exit(2);
/* This gives a unique signature. */
kill(getpid(), SIGUSR2);
}
static int run_child(int i, int seed, unsigned num_loops, unsigned start,
int tdb_flags)
{
struct sigaction act = { .sa_sigaction = segv_handler,
.sa_flags = SA_SIGINFO };
sigaction(11, &act, NULL);
db = tdb_open("torture.tdb", tdb_flags, O_RDWR | O_CREAT, 0600,
&log_attr);
if (!db) {
fatal(NULL, "db open failed");
}
#if 0
if (i == 0) {
printf("pid %i\n", getpid());
sleep(9);
} else
sleep(10);
#endif
srand(seed + i);
srandom(seed + i);
/* Set global, then we're ready to handle being killed. */
loopnum = start;
signal(SIGUSR1, send_count_and_suicide);
for (;loopnum<num_loops && error_count == 0;loopnum++) {
addrec_db();
}
if (error_count == 0) {
tdb_traverse(db, NULL, NULL);
#if TRANSACTION_PROB
if (always_transaction) {
while (in_transaction) {
tdb_transaction_cancel(db);
in_transaction--;
}
if (tdb_transaction_start(db) != 0)
fatal(db, "tdb_transaction_start failed");
}
#endif
tdb_traverse(db, traverse_fn, NULL);
tdb_traverse(db, traverse_fn, NULL);
#if TRANSACTION_PROB
if (always_transaction) {
if (tdb_transaction_commit(db) != 0)
fatal(db, "tdb_transaction_commit failed");
}
#endif
}
tdb_close(db);
return (error_count < 100 ? error_count : 100);
}
int main(int argc, char * const *argv)
{
int i, seed = -1;
int num_loops = 5000;
int num_procs = 3;
int c, pfds[2];
extern char *optarg;
pid_t *pids;
int kill_random = 0;
int *done;
int tdb_flags = TDB_DEFAULT;
log_attr.base.attr = TDB_ATTRIBUTE_LOG;
log_attr.base.next = &seed_attr;
log_attr.log.fn = tdb_log;
seed_attr.base.attr = TDB_ATTRIBUTE_SEED;
while ((c = getopt(argc, argv, "n:l:s:thkS")) != -1) {
switch (c) {
case 'n':
num_procs = strtol(optarg, NULL, 0);
break;
case 'l':
num_loops = strtol(optarg, NULL, 0);
break;
case 's':
seed = strtol(optarg, NULL, 0);
break;
case 'S':
tdb_flags = TDB_NOSYNC;
break;
case 't':
#if TRANSACTION_PROB
always_transaction = 1;
#else
fprintf(stderr, "Transactions not supported\n");
usage();
#endif
break;
case 'k':
kill_random = 1;
break;
default:
usage();
}
}
unlink("torture.tdb");
if (seed == -1) {
seed = (getpid() + time(NULL)) & 0x7FFFFFFF;
}
seed_attr.seed.seed = (((uint64_t)seed) << 32) | seed;
if (num_procs == 1 && !kill_random) {
/* Don't fork for this case, makes debugging easier. */
error_count = run_child(0, seed, num_loops, 0, tdb_flags);
goto done;
}
pids = (pid_t *)calloc(sizeof(pid_t), num_procs);
done = (int *)calloc(sizeof(int), num_procs);
if (pipe(pfds) != 0) {
perror("Creating pipe");
exit(1);
}
count_pipe = pfds[1];
for (i=0;i<num_procs;i++) {
if ((pids[i]=fork()) == 0) {
close(pfds[0]);
if (i == 0) {
printf("testing with %d processes, %d loops, seed=%d%s\n",
num_procs, num_loops, seed,
#if TRANSACTION_PROB
always_transaction ? " (all within transactions)" : ""
#else
""
#endif
);
}
exit(run_child(i, seed, num_loops, 0, tdb_flags));
}
}
while (num_procs) {
int status, j;
pid_t pid;
if (error_count != 0) {
/* try and stop the test on any failure */
for (j=0;j<num_procs;j++) {
if (pids[j] != 0) {
kill(pids[j], SIGTERM);
}
}
}
pid = waitpid(-1, &status, kill_random ? WNOHANG : 0);
if (pid == 0) {
struct timespec ts;
/* Sleep for 1/10 second. */
ts.tv_sec = 0;
ts.tv_nsec = 100000000;
nanosleep(&ts, NULL);
/* Kill someone. */
kill(pids[random() % num_procs], SIGUSR1);
continue;
}
if (pid == -1) {
perror("failed to wait for child\n");
exit(1);
}
for (j=0;j<num_procs;j++) {
if (pids[j] == pid) break;
}
if (j == num_procs) {
printf("unknown child %d exited!?\n", (int)pid);
exit(1);
}
if (WIFSIGNALED(status)) {
if (WTERMSIG(status) == SIGUSR2
|| WTERMSIG(status) == SIGUSR1) {
/* SIGUSR2 means they wrote to pipe. */
if (WTERMSIG(status) == SIGUSR2) {
if (read(pfds[0], &done[j],
sizeof(done[j]))
!= sizeof(done[j]))
err(1,
"Short read from child?");
}
pids[j] = fork();
if (pids[j] == 0)
exit(run_child(j, seed, num_loops,
done[j], tdb_flags));
printf("Restarting child %i for %u-%u\n",
j, done[j], num_loops);
continue;
}
printf("child %d exited with signal %d\n",
(int)pid, WTERMSIG(status));
error_count++;
} else {
if (WEXITSTATUS(status) != 0) {
printf("child %d exited with status %d\n",
(int)pid, WEXITSTATUS(status));
error_count++;
}
}
memmove(&pids[j], &pids[j+1],
(num_procs - j - 1)*sizeof(pids[0]));
num_procs--;
}
free(pids);
done:
if (error_count == 0) {
db = tdb_open("torture.tdb", TDB_DEFAULT, O_RDWR | O_CREAT,
0600, &log_attr);
if (!db) {
fatal(db, "db open failed");
exit(1);
}
if (tdb_check(db, NULL, NULL) != 0) {
fatal(db, "db check failed");
exit(1);
}
tdb_close(db);
printf("OK\n");
}
return error_count;
}

1308
lib/tdb2/transaction.c Normal file

File diff suppressed because it is too large Load Diff

99
lib/tdb2/traverse.c Normal file
View File

@ -0,0 +1,99 @@
/*
Trivial Database 2: traverse function.
Copyright (C) Rusty Russell 2010
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "private.h"
#include <ccan/likely/likely.h>
int64_t tdb_traverse_(struct tdb_context *tdb,
int (*fn)(struct tdb_context *,
TDB_DATA, TDB_DATA, void *),
void *p)
{
enum TDB_ERROR ecode;
struct traverse_info tinfo;
struct tdb_data k, d;
int64_t count = 0;
k.dptr = NULL;
for (ecode = first_in_hash(tdb, &tinfo, &k, &d.dsize);
ecode == TDB_SUCCESS;
ecode = next_in_hash(tdb, &tinfo, &k, &d.dsize)) {
d.dptr = k.dptr + k.dsize;
count++;
if (fn && fn(tdb, k, d, p)) {
free(k.dptr);
tdb->last_error = TDB_SUCCESS;
return count;
}
free(k.dptr);
}
if (ecode != TDB_ERR_NOEXIST) {
return tdb->last_error = ecode;
}
tdb->last_error = TDB_SUCCESS;
return count;
}
enum TDB_ERROR tdb_firstkey(struct tdb_context *tdb, struct tdb_data *key)
{
struct traverse_info tinfo;
return tdb->last_error = first_in_hash(tdb, &tinfo, key, NULL);
}
/* We lock twice, not very efficient. We could keep last key & tinfo cached. */
enum TDB_ERROR tdb_nextkey(struct tdb_context *tdb, struct tdb_data *key)
{
struct traverse_info tinfo;
struct hash_info h;
struct tdb_used_record rec;
tinfo.prev = find_and_lock(tdb, *key, F_RDLCK, &h, &rec, &tinfo);
free(key->dptr);
if (TDB_OFF_IS_ERR(tinfo.prev)) {
return tdb->last_error = tinfo.prev;
}
tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
return tdb->last_error = next_in_hash(tdb, &tinfo, key, NULL);
}
static int wipe_one(struct tdb_context *tdb,
TDB_DATA key, TDB_DATA data, enum TDB_ERROR *ecode)
{
*ecode = tdb_delete(tdb, key);
return (*ecode != TDB_SUCCESS);
}
enum TDB_ERROR tdb_wipe_all(struct tdb_context *tdb)
{
enum TDB_ERROR ecode;
int64_t count;
ecode = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false);
if (ecode != TDB_SUCCESS)
return tdb->last_error = ecode;
/* FIXME: Be smarter. */
count = tdb_traverse(tdb, wipe_one, &ecode);
if (count < 0)
ecode = count;
tdb_allrecord_unlock(tdb, F_WRLCK);
return tdb->last_error = ecode;
}