mirror of
https://github.com/samba-team/samba.git
synced 2025-01-13 13:18:06 +03:00
4fca8d7aa7
Signed-off-by: Volker Lendecke <vl@samba.org> Reviewed-by: Jeremy Allison <jra@samba.org> Autobuild-User(master): Jeremy Allison <jra@samba.org> Autobuild-Date(master): Thu Jan 23 20:41:46 UTC 2020 on sn-devel-184
1079 lines
22 KiB
C
1079 lines
22 KiB
C
/*
|
|
Unix SMB/CIFS implementation.
|
|
|
|
trivial database library
|
|
|
|
Copyright (C) Volker Lendecke 2012,2013
|
|
Copyright (C) Stefan Metzmacher 2013,2014
|
|
Copyright (C) Michael Adam 2014
|
|
|
|
** NOTE! The following LGPL license applies to the tdb
|
|
** library. This does NOT imply that all of Samba is released
|
|
** under the LGPL
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 3 of the License, or (at your option) any later version.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
#include "tdb_private.h"
|
|
#include "system/threads.h"
|
|
|
|
#ifdef USE_TDB_MUTEX_LOCKING
|
|
|
|
/*
|
|
* If we run with mutexes, we store the "struct tdb_mutexes" at the
|
|
* beginning of the file. We store an additional tdb_header right
|
|
* beyond the mutex area, page aligned. All the offsets within the tdb
|
|
* are relative to the area behind the mutex area. tdb->map_ptr points
|
|
* behind the mmap area as well, so the read and write path in the
|
|
* mutex case can remain unchanged.
|
|
*
|
|
* Early in the mutex development the mutexes were placed between the hash
|
|
* chain pointers and the real tdb data. This had two drawbacks: First, it
|
|
* made pointer calculations more complex. Second, we had to mmap the mutex
|
|
* area twice. One was the normal map_ptr in the tdb. This frequently changed
|
|
* from within tdb_oob. At least the Linux glibc robust mutex code assumes
|
|
* constant pointers in memory, so a constantly changing mmap area destroys
|
|
* the mutex list. So we had to mmap the first bytes of the file with a second
|
|
* mmap call. With that scheme, very weird errors happened that could be
|
|
* easily fixed by doing the mutex mmap in a second file. It seemed that
|
|
* mapping the same memory area twice does not end up in accessing the same
|
|
* physical page, looking at the mutexes in gdb it seemed that old data showed
|
|
* up after some re-mapping. To avoid a separate mutex file, the code now puts
|
|
* the real content of the tdb file after the mutex area. This way we do not
|
|
* have overlapping mmap areas, the mutex area is mmapped once and not
|
|
* changed, the tdb data area's mmap is constantly changed but does not
|
|
* overlap.
|
|
*/
|
|
|
|
struct tdb_mutexes {
|
|
struct tdb_header hdr;
|
|
|
|
/* protect allrecord_lock */
|
|
pthread_mutex_t allrecord_mutex;
|
|
|
|
/*
|
|
* F_UNLCK: free,
|
|
* F_RDLCK: shared,
|
|
* F_WRLCK: exclusive
|
|
*/
|
|
short int allrecord_lock;
|
|
|
|
/*
|
|
* Index 0 is the freelist mutex, followed by
|
|
* one mutex per hashchain.
|
|
*/
|
|
pthread_mutex_t hashchains[1];
|
|
};
|
|
|
|
bool tdb_have_mutexes(struct tdb_context *tdb)
|
|
{
|
|
return ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) != 0);
|
|
}
|
|
|
|
size_t tdb_mutex_size(struct tdb_context *tdb)
|
|
{
|
|
size_t mutex_size;
|
|
|
|
if (!tdb_have_mutexes(tdb)) {
|
|
return 0;
|
|
}
|
|
|
|
mutex_size = sizeof(struct tdb_mutexes);
|
|
mutex_size += tdb->hash_size * sizeof(pthread_mutex_t);
|
|
|
|
return TDB_ALIGN(mutex_size, tdb->page_size);
|
|
}
|
|
|
|
/*
|
|
* Get the index for a chain mutex
|
|
*/
|
|
static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len,
|
|
unsigned *idx)
|
|
{
|
|
/*
|
|
* Weird but true: We fcntl lock 1 byte at an offset 4 bytes before
|
|
* the 4 bytes of the freelist start and the hash chain that is about
|
|
* to be locked. See lock_offset() where the freelist is -1 vs the
|
|
* "+1" in TDB_HASH_TOP(). Because the mutex array is represented in
|
|
* the tdb file itself as data, we need to adjust the offset here.
|
|
*/
|
|
const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t);
|
|
|
|
if (!tdb_have_mutexes(tdb)) {
|
|
return false;
|
|
}
|
|
if (len != 1) {
|
|
/* Possibly the allrecord lock */
|
|
return false;
|
|
}
|
|
if (off < freelist_lock_ofs) {
|
|
/* One of the special locks */
|
|
return false;
|
|
}
|
|
if (tdb->hash_size == 0) {
|
|
/* tdb not initialized yet, called from tdb_open_ex() */
|
|
return false;
|
|
}
|
|
if (off >= TDB_DATA_START(tdb->hash_size)) {
|
|
/* Single record lock from traverses */
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Now we know it's a freelist or hash chain lock. Those are always 4
|
|
* byte aligned. Paranoia check.
|
|
*/
|
|
if ((off % sizeof(tdb_off_t)) != 0) {
|
|
abort();
|
|
}
|
|
|
|
/*
|
|
* Re-index the fcntl offset into an offset into the mutex array
|
|
*/
|
|
off -= freelist_lock_ofs; /* rebase to index 0 */
|
|
off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */
|
|
|
|
*idx = off;
|
|
return true;
|
|
}
|
|
|
|
static bool tdb_have_mutex_chainlocks(struct tdb_context *tdb)
|
|
{
|
|
int i;
|
|
|
|
for (i=0; i < tdb->num_lockrecs; i++) {
|
|
bool ret;
|
|
unsigned idx;
|
|
|
|
ret = tdb_mutex_index(tdb,
|
|
tdb->lockrecs[i].off,
|
|
tdb->lockrecs[i].count,
|
|
&idx);
|
|
if (!ret) {
|
|
continue;
|
|
}
|
|
|
|
if (idx == 0) {
|
|
/* this is the freelist mutex */
|
|
continue;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static int chain_mutex_lock(pthread_mutex_t *m, bool waitflag)
|
|
{
|
|
int ret;
|
|
|
|
if (waitflag) {
|
|
ret = pthread_mutex_lock(m);
|
|
} else {
|
|
ret = pthread_mutex_trylock(m);
|
|
}
|
|
if (ret != EOWNERDEAD) {
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* For chainlocks, we don't do any cleanup (yet?)
|
|
*/
|
|
return pthread_mutex_consistent(m);
|
|
}
|
|
|
|
static int allrecord_mutex_lock(struct tdb_mutexes *m, bool waitflag)
|
|
{
|
|
int ret;
|
|
|
|
if (waitflag) {
|
|
ret = pthread_mutex_lock(&m->allrecord_mutex);
|
|
} else {
|
|
ret = pthread_mutex_trylock(&m->allrecord_mutex);
|
|
}
|
|
if (ret != EOWNERDEAD) {
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* The allrecord lock holder died. We need to reset the allrecord_lock
|
|
* to F_UNLCK. This should also be the indication for
|
|
* tdb_needs_recovery.
|
|
*/
|
|
m->allrecord_lock = F_UNLCK;
|
|
|
|
return pthread_mutex_consistent(&m->allrecord_mutex);
|
|
}
|
|
|
|
bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len,
|
|
bool waitflag, int *pret)
|
|
{
|
|
struct tdb_mutexes *m = tdb->mutexes;
|
|
pthread_mutex_t *chain;
|
|
int ret;
|
|
unsigned idx;
|
|
bool allrecord_ok;
|
|
|
|
if (!tdb_mutex_index(tdb, off, len, &idx)) {
|
|
return false;
|
|
}
|
|
chain = &m->hashchains[idx];
|
|
|
|
again:
|
|
ret = chain_mutex_lock(chain, waitflag);
|
|
if (ret == EBUSY) {
|
|
ret = EAGAIN;
|
|
}
|
|
if (ret != 0) {
|
|
errno = ret;
|
|
goto fail;
|
|
}
|
|
|
|
if (idx == 0) {
|
|
/*
|
|
* This is a freelist lock, which is independent to
|
|
* the allrecord lock. So we're done once we got the
|
|
* freelist mutex.
|
|
*/
|
|
*pret = 0;
|
|
return true;
|
|
}
|
|
|
|
if (tdb_have_mutex_chainlocks(tdb)) {
|
|
/*
|
|
* We can only check the allrecord lock once. If we do it with
|
|
* one chain mutex locked, we will deadlock with the allrecord
|
|
* locker process in the following way: We lock the first hash
|
|
* chain, we check for the allrecord lock. We keep the hash
|
|
* chain locked. Then the allrecord locker locks the
|
|
* allrecord_mutex. It walks the list of chain mutexes,
|
|
* locking them all in sequence. Meanwhile, we have the chain
|
|
* mutex locked, so the allrecord locker blocks trying to lock
|
|
* our chain mutex. Then we come in and try to lock the second
|
|
* chain lock, which in most cases will be the freelist. We
|
|
* see that the allrecord lock is locked and put ourselves on
|
|
* the allrecord_mutex. This will never be signalled though
|
|
* because the allrecord locker waits for us to give up the
|
|
* chain lock.
|
|
*/
|
|
|
|
*pret = 0;
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Check if someone is has the allrecord lock: queue if so.
|
|
*/
|
|
|
|
allrecord_ok = false;
|
|
|
|
if (m->allrecord_lock == F_UNLCK) {
|
|
/*
|
|
* allrecord lock not taken
|
|
*/
|
|
allrecord_ok = true;
|
|
}
|
|
|
|
if ((m->allrecord_lock == F_RDLCK) && (rw == F_RDLCK)) {
|
|
/*
|
|
* allrecord shared lock taken, but we only want to read
|
|
*/
|
|
allrecord_ok = true;
|
|
}
|
|
|
|
if (allrecord_ok) {
|
|
*pret = 0;
|
|
return true;
|
|
}
|
|
|
|
ret = pthread_mutex_unlock(chain);
|
|
if (ret != 0) {
|
|
TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
|
|
"(chain_mutex) failed: %s\n", strerror(ret)));
|
|
errno = ret;
|
|
goto fail;
|
|
}
|
|
ret = allrecord_mutex_lock(m, waitflag);
|
|
if (ret == EBUSY) {
|
|
ret = EAGAIN;
|
|
}
|
|
if (ret != 0) {
|
|
if (waitflag || (ret != EAGAIN)) {
|
|
TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_%slock"
|
|
"(allrecord_mutex) failed: %s\n",
|
|
waitflag ? "" : "try_", strerror(ret)));
|
|
}
|
|
errno = ret;
|
|
goto fail;
|
|
}
|
|
ret = pthread_mutex_unlock(&m->allrecord_mutex);
|
|
if (ret != 0) {
|
|
TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
|
|
"(allrecord_mutex) failed: %s\n", strerror(ret)));
|
|
errno = ret;
|
|
goto fail;
|
|
}
|
|
goto again;
|
|
|
|
fail:
|
|
*pret = -1;
|
|
return true;
|
|
}
|
|
|
|
bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len,
|
|
int *pret)
|
|
{
|
|
struct tdb_mutexes *m = tdb->mutexes;
|
|
pthread_mutex_t *chain;
|
|
int ret;
|
|
unsigned idx;
|
|
|
|
if (!tdb_mutex_index(tdb, off, len, &idx)) {
|
|
return false;
|
|
}
|
|
chain = &m->hashchains[idx];
|
|
|
|
ret = pthread_mutex_unlock(chain);
|
|
if (ret == 0) {
|
|
*pret = 0;
|
|
return true;
|
|
}
|
|
errno = ret;
|
|
*pret = -1;
|
|
return true;
|
|
}
|
|
|
|
int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
|
|
enum tdb_lock_flags flags)
|
|
{
|
|
struct tdb_mutexes *m = tdb->mutexes;
|
|
int ret;
|
|
uint32_t i;
|
|
bool waitflag = (flags & TDB_LOCK_WAIT);
|
|
int saved_errno;
|
|
|
|
if (tdb->flags & TDB_NOLOCK) {
|
|
return 0;
|
|
}
|
|
|
|
if (flags & TDB_LOCK_MARK_ONLY) {
|
|
return 0;
|
|
}
|
|
|
|
ret = allrecord_mutex_lock(m, waitflag);
|
|
if (!waitflag && (ret == EBUSY)) {
|
|
errno = EAGAIN;
|
|
tdb->ecode = TDB_ERR_LOCK;
|
|
return -1;
|
|
}
|
|
if (ret != 0) {
|
|
if (!(flags & TDB_LOCK_PROBE)) {
|
|
TDB_LOG((tdb, TDB_DEBUG_TRACE,
|
|
"allrecord_mutex_lock() failed: %s\n",
|
|
strerror(ret)));
|
|
}
|
|
tdb->ecode = TDB_ERR_LOCK;
|
|
return -1;
|
|
}
|
|
|
|
if (m->allrecord_lock != F_UNLCK) {
|
|
TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
|
|
(int)m->allrecord_lock));
|
|
goto fail_unlock_allrecord_mutex;
|
|
}
|
|
m->allrecord_lock = (ltype == F_RDLCK) ? F_RDLCK : F_WRLCK;
|
|
|
|
for (i=0; i<tdb->hash_size; i++) {
|
|
|
|
/* ignore hashchains[0], the freelist */
|
|
pthread_mutex_t *chain = &m->hashchains[i+1];
|
|
|
|
ret = chain_mutex_lock(chain, waitflag);
|
|
if (!waitflag && (ret == EBUSY)) {
|
|
errno = EAGAIN;
|
|
goto fail_unroll_allrecord_lock;
|
|
}
|
|
if (ret != 0) {
|
|
if (!(flags & TDB_LOCK_PROBE)) {
|
|
TDB_LOG((tdb, TDB_DEBUG_TRACE,
|
|
"chain_mutex_lock() failed: %s\n",
|
|
strerror(ret)));
|
|
}
|
|
errno = ret;
|
|
goto fail_unroll_allrecord_lock;
|
|
}
|
|
|
|
ret = pthread_mutex_unlock(chain);
|
|
if (ret != 0) {
|
|
TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
|
|
"(chainlock) failed: %s\n", strerror(ret)));
|
|
errno = ret;
|
|
goto fail_unroll_allrecord_lock;
|
|
}
|
|
}
|
|
/*
|
|
* We leave this routine with m->allrecord_mutex locked
|
|
*/
|
|
return 0;
|
|
|
|
fail_unroll_allrecord_lock:
|
|
m->allrecord_lock = F_UNLCK;
|
|
|
|
fail_unlock_allrecord_mutex:
|
|
saved_errno = errno;
|
|
ret = pthread_mutex_unlock(&m->allrecord_mutex);
|
|
if (ret != 0) {
|
|
TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
|
|
"(allrecord_mutex) failed: %s\n", strerror(ret)));
|
|
}
|
|
errno = saved_errno;
|
|
tdb->ecode = TDB_ERR_LOCK;
|
|
return -1;
|
|
}
|
|
|
|
int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
|
|
{
|
|
struct tdb_mutexes *m = tdb->mutexes;
|
|
int ret;
|
|
uint32_t i;
|
|
|
|
if (tdb->flags & TDB_NOLOCK) {
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Our only caller tdb_allrecord_upgrade()
|
|
* garantees that we already own the allrecord lock.
|
|
*
|
|
* Which means m->allrecord_mutex is still locked by us.
|
|
*/
|
|
|
|
if (m->allrecord_lock != F_RDLCK) {
|
|
tdb->ecode = TDB_ERR_LOCK;
|
|
TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
|
|
(int)m->allrecord_lock));
|
|
return -1;
|
|
}
|
|
|
|
m->allrecord_lock = F_WRLCK;
|
|
|
|
for (i=0; i<tdb->hash_size; i++) {
|
|
|
|
/* ignore hashchains[0], the freelist */
|
|
pthread_mutex_t *chain = &m->hashchains[i+1];
|
|
|
|
ret = chain_mutex_lock(chain, true);
|
|
if (ret != 0) {
|
|
TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_lock"
|
|
"(chainlock) failed: %s\n", strerror(ret)));
|
|
goto fail_unroll_allrecord_lock;
|
|
}
|
|
|
|
ret = pthread_mutex_unlock(chain);
|
|
if (ret != 0) {
|
|
TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
|
|
"(chainlock) failed: %s\n", strerror(ret)));
|
|
goto fail_unroll_allrecord_lock;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
|
|
fail_unroll_allrecord_lock:
|
|
m->allrecord_lock = F_RDLCK;
|
|
tdb->ecode = TDB_ERR_LOCK;
|
|
return -1;
|
|
}
|
|
|
|
void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
|
|
{
|
|
struct tdb_mutexes *m = tdb->mutexes;
|
|
|
|
/*
|
|
* Our only caller tdb_allrecord_upgrade() (in the error case)
|
|
* garantees that we already own the allrecord lock.
|
|
*
|
|
* Which means m->allrecord_mutex is still locked by us.
|
|
*/
|
|
|
|
if (m->allrecord_lock != F_WRLCK) {
|
|
TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
|
|
(int)m->allrecord_lock));
|
|
return;
|
|
}
|
|
|
|
m->allrecord_lock = F_RDLCK;
|
|
return;
|
|
}
|
|
|
|
|
|
int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
|
|
{
|
|
struct tdb_mutexes *m = tdb->mutexes;
|
|
short old;
|
|
int ret;
|
|
|
|
if (tdb->flags & TDB_NOLOCK) {
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Our only callers tdb_allrecord_unlock() and
|
|
* tdb_allrecord_lock() (in the error path)
|
|
* garantee that we already own the allrecord lock.
|
|
*
|
|
* Which means m->allrecord_mutex is still locked by us.
|
|
*/
|
|
|
|
if ((m->allrecord_lock != F_RDLCK) && (m->allrecord_lock != F_WRLCK)) {
|
|
TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
|
|
(int)m->allrecord_lock));
|
|
return -1;
|
|
}
|
|
|
|
old = m->allrecord_lock;
|
|
m->allrecord_lock = F_UNLCK;
|
|
|
|
ret = pthread_mutex_unlock(&m->allrecord_mutex);
|
|
if (ret != 0) {
|
|
m->allrecord_lock = old;
|
|
TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
|
|
"(allrecord_mutex) failed: %s\n", strerror(ret)));
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int tdb_mutex_init(struct tdb_context *tdb)
|
|
{
|
|
struct tdb_mutexes *m;
|
|
pthread_mutexattr_t ma;
|
|
uint32_t i;
|
|
int ret;
|
|
|
|
ret = tdb_mutex_mmap(tdb);
|
|
if (ret == -1) {
|
|
return -1;
|
|
}
|
|
m = tdb->mutexes;
|
|
|
|
ret = pthread_mutexattr_init(&ma);
|
|
if (ret != 0) {
|
|
goto fail_munmap;
|
|
}
|
|
ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
|
|
if (ret != 0) {
|
|
goto fail;
|
|
}
|
|
ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
|
|
if (ret != 0) {
|
|
goto fail;
|
|
}
|
|
ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
|
|
if (ret != 0) {
|
|
goto fail;
|
|
}
|
|
|
|
for (i=0; i<tdb->hash_size+1; i++) {
|
|
pthread_mutex_t *chain = &m->hashchains[i];
|
|
|
|
ret = pthread_mutex_init(chain, &ma);
|
|
if (ret != 0) {
|
|
goto fail;
|
|
}
|
|
}
|
|
|
|
m->allrecord_lock = F_UNLCK;
|
|
|
|
ret = pthread_mutex_init(&m->allrecord_mutex, &ma);
|
|
if (ret != 0) {
|
|
goto fail;
|
|
}
|
|
ret = 0;
|
|
fail:
|
|
pthread_mutexattr_destroy(&ma);
|
|
fail_munmap:
|
|
|
|
if (ret == 0) {
|
|
return 0;
|
|
}
|
|
|
|
tdb_mutex_munmap(tdb);
|
|
|
|
errno = ret;
|
|
return -1;
|
|
}
|
|
|
|
int tdb_mutex_mmap(struct tdb_context *tdb)
|
|
{
|
|
size_t len;
|
|
void *ptr;
|
|
|
|
len = tdb_mutex_size(tdb);
|
|
if (len == 0) {
|
|
return 0;
|
|
}
|
|
|
|
if (tdb->mutexes != NULL) {
|
|
return 0;
|
|
}
|
|
|
|
ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE,
|
|
tdb->fd, 0);
|
|
if (ptr == MAP_FAILED) {
|
|
return -1;
|
|
}
|
|
tdb->mutexes = (struct tdb_mutexes *)ptr;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int tdb_mutex_munmap(struct tdb_context *tdb)
|
|
{
|
|
size_t len;
|
|
int ret;
|
|
|
|
len = tdb_mutex_size(tdb);
|
|
if (len == 0) {
|
|
return 0;
|
|
}
|
|
|
|
ret = munmap(tdb->mutexes, len);
|
|
if (ret == -1) {
|
|
return -1;
|
|
}
|
|
tdb->mutexes = NULL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool tdb_mutex_locking_cached;
|
|
|
|
static bool tdb_mutex_locking_supported(void)
|
|
{
|
|
pthread_mutexattr_t ma;
|
|
pthread_mutex_t m;
|
|
int ret;
|
|
static bool initialized;
|
|
|
|
if (initialized) {
|
|
return tdb_mutex_locking_cached;
|
|
}
|
|
|
|
initialized = true;
|
|
|
|
ret = pthread_mutexattr_init(&ma);
|
|
if (ret != 0) {
|
|
return false;
|
|
}
|
|
ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
|
|
if (ret != 0) {
|
|
goto cleanup_ma;
|
|
}
|
|
ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
|
|
if (ret != 0) {
|
|
goto cleanup_ma;
|
|
}
|
|
ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
|
|
if (ret != 0) {
|
|
goto cleanup_ma;
|
|
}
|
|
ret = pthread_mutex_init(&m, &ma);
|
|
if (ret != 0) {
|
|
goto cleanup_ma;
|
|
}
|
|
ret = pthread_mutex_lock(&m);
|
|
if (ret != 0) {
|
|
goto cleanup_m;
|
|
}
|
|
/*
|
|
* This makes sure we have real mutexes
|
|
* from a threading library instead of just
|
|
* stubs from libc.
|
|
*/
|
|
ret = pthread_mutex_lock(&m);
|
|
if (ret != EDEADLK) {
|
|
goto cleanup_lock;
|
|
}
|
|
ret = pthread_mutex_unlock(&m);
|
|
if (ret != 0) {
|
|
goto cleanup_m;
|
|
}
|
|
|
|
tdb_mutex_locking_cached = true;
|
|
goto cleanup_m;
|
|
|
|
cleanup_lock:
|
|
pthread_mutex_unlock(&m);
|
|
cleanup_m:
|
|
pthread_mutex_destroy(&m);
|
|
cleanup_ma:
|
|
pthread_mutexattr_destroy(&ma);
|
|
return tdb_mutex_locking_cached;
|
|
}
|
|
|
|
static void (*tdb_robust_mutext_old_handler)(int) = SIG_ERR;
|
|
static pid_t tdb_robust_mutex_pid = -1;
|
|
|
|
static bool tdb_robust_mutex_setup_sigchild(void (*handler)(int),
|
|
void (**p_old_handler)(int))
|
|
{
|
|
#ifdef HAVE_SIGACTION
|
|
struct sigaction act;
|
|
struct sigaction oldact;
|
|
|
|
memset(&act, '\0', sizeof(act));
|
|
|
|
act.sa_handler = handler;
|
|
#ifdef SA_RESTART
|
|
act.sa_flags = SA_RESTART;
|
|
#endif
|
|
sigemptyset(&act.sa_mask);
|
|
sigaddset(&act.sa_mask, SIGCHLD);
|
|
sigaction(SIGCHLD, &act, &oldact);
|
|
if (p_old_handler) {
|
|
*p_old_handler = oldact.sa_handler;
|
|
}
|
|
return true;
|
|
#else /* !HAVE_SIGACTION */
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
static void tdb_robust_mutex_handler(int sig)
|
|
{
|
|
pid_t child_pid = tdb_robust_mutex_pid;
|
|
|
|
if (child_pid != -1) {
|
|
pid_t pid;
|
|
|
|
pid = waitpid(child_pid, NULL, WNOHANG);
|
|
if (pid == -1) {
|
|
switch (errno) {
|
|
case ECHILD:
|
|
tdb_robust_mutex_pid = -1;
|
|
return;
|
|
|
|
default:
|
|
return;
|
|
}
|
|
}
|
|
if (pid == child_pid) {
|
|
tdb_robust_mutex_pid = -1;
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (tdb_robust_mutext_old_handler == SIG_DFL) {
|
|
return;
|
|
}
|
|
if (tdb_robust_mutext_old_handler == SIG_IGN) {
|
|
return;
|
|
}
|
|
if (tdb_robust_mutext_old_handler == SIG_ERR) {
|
|
return;
|
|
}
|
|
|
|
tdb_robust_mutext_old_handler(sig);
|
|
}
|
|
|
|
static void tdb_robust_mutex_wait_for_child(pid_t *child_pid)
|
|
{
|
|
int options = WNOHANG;
|
|
|
|
if (*child_pid == -1) {
|
|
return;
|
|
}
|
|
|
|
while (tdb_robust_mutex_pid > 0) {
|
|
pid_t pid;
|
|
|
|
/*
|
|
* First we try with WNOHANG, as the process might not exist
|
|
* anymore. Once we've sent SIGKILL we block waiting for the
|
|
* exit.
|
|
*/
|
|
pid = waitpid(*child_pid, NULL, options);
|
|
if (pid == -1) {
|
|
if (errno == EINTR) {
|
|
continue;
|
|
} else if (errno == ECHILD) {
|
|
break;
|
|
} else {
|
|
abort();
|
|
}
|
|
}
|
|
if (pid == *child_pid) {
|
|
break;
|
|
}
|
|
|
|
kill(*child_pid, SIGKILL);
|
|
options = 0;
|
|
}
|
|
|
|
tdb_robust_mutex_pid = -1;
|
|
*child_pid = -1;
|
|
}
|
|
|
|
_PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
|
|
{
|
|
void *ptr = NULL;
|
|
pthread_mutex_t *m = NULL;
|
|
pthread_mutexattr_t ma;
|
|
int ret = 1;
|
|
int pipe_down[2] = { -1, -1 };
|
|
int pipe_up[2] = { -1, -1 };
|
|
ssize_t nread;
|
|
char c = 0;
|
|
bool ok;
|
|
static bool initialized;
|
|
pid_t saved_child_pid = -1;
|
|
bool cleanup_ma = false;
|
|
|
|
if (initialized) {
|
|
return tdb_mutex_locking_cached;
|
|
}
|
|
|
|
initialized = true;
|
|
|
|
ok = tdb_mutex_locking_supported();
|
|
if (!ok) {
|
|
return false;
|
|
}
|
|
|
|
tdb_mutex_locking_cached = false;
|
|
|
|
ptr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE,
|
|
MAP_SHARED|MAP_ANON, -1 /* fd */, 0);
|
|
if (ptr == MAP_FAILED) {
|
|
return false;
|
|
}
|
|
|
|
ret = pipe(pipe_down);
|
|
if (ret != 0) {
|
|
goto cleanup;
|
|
}
|
|
ret = pipe(pipe_up);
|
|
if (ret != 0) {
|
|
goto cleanup;
|
|
}
|
|
|
|
ret = pthread_mutexattr_init(&ma);
|
|
if (ret != 0) {
|
|
goto cleanup;
|
|
}
|
|
cleanup_ma = true;
|
|
ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
|
|
if (ret != 0) {
|
|
goto cleanup;
|
|
}
|
|
ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
|
|
if (ret != 0) {
|
|
goto cleanup;
|
|
}
|
|
ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
|
|
if (ret != 0) {
|
|
goto cleanup;
|
|
}
|
|
ret = pthread_mutex_init(ptr, &ma);
|
|
if (ret != 0) {
|
|
goto cleanup;
|
|
}
|
|
m = (pthread_mutex_t *)ptr;
|
|
|
|
if (tdb_robust_mutex_setup_sigchild(tdb_robust_mutex_handler,
|
|
&tdb_robust_mutext_old_handler) == false) {
|
|
goto cleanup;
|
|
}
|
|
|
|
tdb_robust_mutex_pid = fork();
|
|
saved_child_pid = tdb_robust_mutex_pid;
|
|
if (tdb_robust_mutex_pid == 0) {
|
|
size_t nwritten;
|
|
close(pipe_down[1]);
|
|
close(pipe_up[0]);
|
|
ret = pthread_mutex_lock(m);
|
|
nwritten = write(pipe_up[1], &ret, sizeof(ret));
|
|
if (nwritten != sizeof(ret)) {
|
|
_exit(1);
|
|
}
|
|
if (ret != 0) {
|
|
_exit(1);
|
|
}
|
|
nread = read(pipe_down[0], &c, 1);
|
|
if (nread != 1) {
|
|
_exit(1);
|
|
}
|
|
/* leave locked */
|
|
_exit(0);
|
|
}
|
|
if (tdb_robust_mutex_pid == -1) {
|
|
goto cleanup;
|
|
}
|
|
close(pipe_down[0]);
|
|
pipe_down[0] = -1;
|
|
close(pipe_up[1]);
|
|
pipe_up[1] = -1;
|
|
|
|
nread = read(pipe_up[0], &ret, sizeof(ret));
|
|
if (nread != sizeof(ret)) {
|
|
goto cleanup;
|
|
}
|
|
|
|
ret = pthread_mutex_trylock(m);
|
|
if (ret != EBUSY) {
|
|
if (ret == 0) {
|
|
pthread_mutex_unlock(m);
|
|
}
|
|
goto cleanup;
|
|
}
|
|
|
|
if (write(pipe_down[1], &c, 1) != 1) {
|
|
goto cleanup;
|
|
}
|
|
|
|
nread = read(pipe_up[0], &c, 1);
|
|
if (nread != 0) {
|
|
goto cleanup;
|
|
}
|
|
|
|
tdb_robust_mutex_wait_for_child(&saved_child_pid);
|
|
|
|
ret = pthread_mutex_trylock(m);
|
|
if (ret != EOWNERDEAD) {
|
|
if (ret == 0) {
|
|
pthread_mutex_unlock(m);
|
|
}
|
|
goto cleanup;
|
|
}
|
|
|
|
ret = pthread_mutex_consistent(m);
|
|
if (ret != 0) {
|
|
goto cleanup;
|
|
}
|
|
|
|
ret = pthread_mutex_trylock(m);
|
|
if (ret != EDEADLK && ret != EBUSY) {
|
|
pthread_mutex_unlock(m);
|
|
goto cleanup;
|
|
}
|
|
|
|
ret = pthread_mutex_unlock(m);
|
|
if (ret != 0) {
|
|
goto cleanup;
|
|
}
|
|
|
|
tdb_mutex_locking_cached = true;
|
|
|
|
cleanup:
|
|
/*
|
|
* Note that we don't reset the signal handler we just reset
|
|
* tdb_robust_mutex_pid to -1. This is ok as this code path is only
|
|
* called once per process.
|
|
*
|
|
* Leaving our signal handler avoids races with other threads potentialy
|
|
* setting up their SIGCHLD handlers.
|
|
*
|
|
* The worst thing that can happen is that the other newer signal
|
|
* handler will get the SIGCHLD signal for our child and/or reap the
|
|
* child with a wait() function. tdb_robust_mutex_wait_for_child()
|
|
* handles the case where waitpid returns ECHILD.
|
|
*/
|
|
tdb_robust_mutex_wait_for_child(&saved_child_pid);
|
|
|
|
if (m != NULL) {
|
|
pthread_mutex_destroy(m);
|
|
}
|
|
if (cleanup_ma) {
|
|
pthread_mutexattr_destroy(&ma);
|
|
}
|
|
if (pipe_down[0] != -1) {
|
|
close(pipe_down[0]);
|
|
}
|
|
if (pipe_down[1] != -1) {
|
|
close(pipe_down[1]);
|
|
}
|
|
if (pipe_up[0] != -1) {
|
|
close(pipe_up[0]);
|
|
}
|
|
if (pipe_up[1] != -1) {
|
|
close(pipe_up[1]);
|
|
}
|
|
if (ptr != NULL) {
|
|
munmap(ptr, sizeof(pthread_mutex_t));
|
|
}
|
|
|
|
return tdb_mutex_locking_cached;
|
|
}
|
|
|
|
#else
|
|
|
|
size_t tdb_mutex_size(struct tdb_context *tdb)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
bool tdb_have_mutexes(struct tdb_context *tdb)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
|
|
enum tdb_lock_flags flags)
|
|
{
|
|
tdb->ecode = TDB_ERR_LOCK;
|
|
return -1;
|
|
}
|
|
|
|
int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
|
|
{
|
|
tdb->ecode = TDB_ERR_LOCK;
|
|
return -1;
|
|
}
|
|
|
|
void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
|
|
{
|
|
return;
|
|
}
|
|
|
|
int tdb_mutex_mmap(struct tdb_context *tdb)
|
|
{
|
|
errno = ENOSYS;
|
|
return -1;
|
|
}
|
|
|
|
int tdb_mutex_munmap(struct tdb_context *tdb)
|
|
{
|
|
errno = ENOSYS;
|
|
return -1;
|
|
}
|
|
|
|
int tdb_mutex_init(struct tdb_context *tdb)
|
|
{
|
|
errno = ENOSYS;
|
|
return -1;
|
|
}
|
|
|
|
_PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
#endif
|