samba-mirror/mutex.c at 5a388453e0cb038fa3ed5fb46f972470f7793566

mirror of https://github.com/samba-team/samba.git synced 2024-12-23 17:34:34 +03:00

Ralph Boehme 19b193ebc9 tdb: runtime check for robust mutexes may hang in threaded programs

The current runtime check for robust mutexes in
tdb_runtime_check_for_robust_mutexes() is not thread-safe.

When called in a multi-threaded program where any another thread doesn't
have SIGCHLD blocked, we may end up hung in sigsuspend() waiting for a
SIGCHLD of a child procecss and the signal was delivered to another
thread.

Revert to the previous behaviour of waiting for the child instead of
waiting for the SIGCHLD signal.

Ensure the pid we wait for is not reset to -1 in a toctou race with the
signal handler.

Check whether waitpid() returns ECHILD which can happen if the signal
handler is run by more then one thread in parallel (yes, this can
happen) or if tdb_robust_mutex_wait_for_child() and the signal handler
are racing.

Bug: https://bugzilla.samba.org/show_bug.cgi?id=12593

Pair-programmed-with: Stefan Metzmacher <metze@samba.org>

Signed-off-by: Ralph Boehme <slow@samba.org>
Signed-off-by: Stefan Metzmacher <metze@samba.org>
Reviewed-by: Andreas Schneider <asn@samba.org>

2017-04-27 14:52:16 +02:00

1078 lines

22 KiB

C

Raw Blame History

 /*
    Unix SMB/CIFS implementation.
    trivial database library
    Copyright (C) Volker Lendecke 2012,2013
    Copyright (C) Stefan Metzmacher 2013,2014
    Copyright (C) Michael Adam 2014
      ** NOTE! The following LGPL license applies to the tdb
      ** library. This does NOT imply that all of Samba is released
      ** under the LGPL
    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 3 of the License, or (at your option) any later version.
    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, see <http://www.gnu.org/licenses/>.
 */
 #include "tdb_private.h"
 #include "system/threads.h"
 #ifdef USE_TDB_MUTEX_LOCKING
 /*
  * If we run with mutexes, we store the "struct tdb_mutexes" at the
  * beginning of the file. We store an additional tdb_header right
  * beyond the mutex area, page aligned. All the offsets within the tdb
  * are relative to the area behind the mutex area. tdb->map_ptr points
  * behind the mmap area as well, so the read and write path in the
  * mutex case can remain unchanged.
  *
  * Early in the mutex development the mutexes were placed between the hash
  * chain pointers and the real tdb data. This had two drawbacks: First, it
  * made pointer calculations more complex. Second, we had to mmap the mutex
  * area twice. One was the normal map_ptr in the tdb. This frequently changed
  * from within tdb_oob. At least the Linux glibc robust mutex code assumes
  * constant pointers in memory, so a constantly changing mmap area destroys
  * the mutex list. So we had to mmap the first bytes of the file with a second
  * mmap call. With that scheme, very weird errors happened that could be
  * easily fixed by doing the mutex mmap in a second file. It seemed that
  * mapping the same memory area twice does not end up in accessing the same
  * physical page, looking at the mutexes in gdb it seemed that old data showed
  * up after some re-mapping. To avoid a separate mutex file, the code now puts
  * the real content of the tdb file after the mutex area. This way we do not
  * have overlapping mmap areas, the mutex area is mmapped once and not
  * changed, the tdb data area's mmap is constantly changed but does not
  * overlap.
  */
 struct tdb_mutexes {
 	struct tdb_header hdr;
 	/* protect allrecord_lock */
 	pthread_mutex_t allrecord_mutex;
 	/*
 	 * F_UNLCK: free,
 	 * F_RDLCK: shared,
 	 * F_WRLCK: exclusive
 	 */
 	short int allrecord_lock;
 	/*
 	 * Index 0 is the freelist mutex, followed by
 	 * one mutex per hashchain.
 	 */
 	pthread_mutex_t hashchains[1];
 };
 bool tdb_have_mutexes(struct tdb_context *tdb)
 {
 	return ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) != 0);
 }
 size_t tdb_mutex_size(struct tdb_context *tdb)
 {
 	size_t mutex_size;
 	if (!tdb_have_mutexes(tdb)) {
 		return 0;
 	}
 	mutex_size = sizeof(struct tdb_mutexes);
 	mutex_size += tdb->hash_size * sizeof(pthread_mutex_t);
 	return TDB_ALIGN(mutex_size, tdb->page_size);
 }
 /*
  * Get the index for a chain mutex
  */
 static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len,
 			    unsigned *idx)
 {
 	/*
 	 * Weird but true: We fcntl lock 1 byte at an offset 4 bytes before
 	 * the 4 bytes of the freelist start and the hash chain that is about
 	 * to be locked. See lock_offset() where the freelist is -1 vs the
 	 * "+1" in TDB_HASH_TOP(). Because the mutex array is represented in
 	 * the tdb file itself as data, we need to adjust the offset here.
 	 */
 	const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t);
 	if (!tdb_have_mutexes(tdb)) {
 		return false;
 	}
 	if (len != 1) {
 		/* Possibly the allrecord lock */
 		return false;
 	}
 	if (off < freelist_lock_ofs) {
 		/* One of the special locks */
 		return false;
 	}
 	if (tdb->hash_size == 0) {
 		/* tdb not initialized yet, called from tdb_open_ex() */
 		return false;
 	}
 	if (off >= TDB_DATA_START(tdb->hash_size)) {
 		/* Single record lock from traverses */
 		return false;
 	}
 	/*
 	 * Now we know it's a freelist or hash chain lock. Those are always 4
 	 * byte aligned. Paranoia check.
 	 */
 	if ((off % sizeof(tdb_off_t)) != 0) {
 		abort();
 	}
 	/*
 	 * Re-index the fcntl offset into an offset into the mutex array
 	 */
 	off -= freelist_lock_ofs; /* rebase to index 0 */
 	off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */
 	*idx = off;
 	return true;
 }
 static bool tdb_have_mutex_chainlocks(struct tdb_context *tdb)
 {
 	size_t i;
 	for (i=0; i < tdb->num_lockrecs; i++) {
 		bool ret;
 		unsigned idx;
 		ret = tdb_mutex_index(tdb,
 				      tdb->lockrecs[i].off,
 				      tdb->lockrecs[i].count,
 				      &idx);
 		if (!ret) {
 			continue;
 		}
 		if (idx == 0) {
 			/* this is the freelist mutex */
 			continue;
 		}
 		return true;
 	}
 	return false;
 }
 static int chain_mutex_lock(pthread_mutex_t *m, bool waitflag)
 {
 	int ret;
 	if (waitflag) {
 		ret = pthread_mutex_lock(m);
 	} else {
 		ret = pthread_mutex_trylock(m);
 	}
 	if (ret != EOWNERDEAD) {
 		return ret;
 	}
 	/*
 	 * For chainlocks, we don't do any cleanup (yet?)
 	 */
 	return pthread_mutex_consistent(m);
 }
 static int allrecord_mutex_lock(struct tdb_mutexes *m, bool waitflag)
 {
 	int ret;
 	if (waitflag) {
 		ret = pthread_mutex_lock(&m->allrecord_mutex);
 	} else {
 		ret = pthread_mutex_trylock(&m->allrecord_mutex);
 	}
 	if (ret != EOWNERDEAD) {
 		return ret;
 	}
 	/*
 	 * The allrecord lock holder died. We need to reset the allrecord_lock
 	 * to F_UNLCK. This should also be the indication for
 	 * tdb_needs_recovery.
 	 */
 	m->allrecord_lock = F_UNLCK;
 	return pthread_mutex_consistent(&m->allrecord_mutex);
 }
 bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len,
 		    bool waitflag, int *pret)
 {
 	struct tdb_mutexes *m = tdb->mutexes;
 	pthread_mutex_t *chain;
 	int ret;
 	unsigned idx;
 	bool allrecord_ok;
 	if (!tdb_mutex_index(tdb, off, len, &idx)) {
 		return false;
 	}
 	chain = &m->hashchains[idx];
 again:
 	ret = chain_mutex_lock(chain, waitflag);
 	if (ret == EBUSY) {
 		ret = EAGAIN;
 	}
 	if (ret != 0) {
 		errno = ret;
 		goto fail;
 	}
 	if (idx == 0) {
 		/*
 		 * This is a freelist lock, which is independent to
 		 * the allrecord lock. So we're done once we got the
 		 * freelist mutex.
 		 */
 		*pret = 0;
 		return true;
 	}
 	if (tdb_have_mutex_chainlocks(tdb)) {
 		/*
 		 * We can only check the allrecord lock once. If we do it with
 		 * one chain mutex locked, we will deadlock with the allrecord
 		 * locker process in the following way: We lock the first hash
 		 * chain, we check for the allrecord lock. We keep the hash
 		 * chain locked. Then the allrecord locker locks the
 		 * allrecord_mutex. It walks the list of chain mutexes,
 		 * locking them all in sequence. Meanwhile, we have the chain
 		 * mutex locked, so the allrecord locker blocks trying to lock
 		 * our chain mutex. Then we come in and try to lock the second
 		 * chain lock, which in most cases will be the freelist. We
 		 * see that the allrecord lock is locked and put ourselves on
 		 * the allrecord_mutex. This will never be signalled though
 		 * because the allrecord locker waits for us to give up the
 		 * chain lock.
 		 */
 		*pret = 0;
 		return true;
 	}
 	/*
 	 * Check if someone is has the allrecord lock: queue if so.
 	 */
 	allrecord_ok = false;
 	if (m->allrecord_lock == F_UNLCK) {
 		/*
 		 * allrecord lock not taken
 		 */
 		allrecord_ok = true;
 	}
 	if ((m->allrecord_lock == F_RDLCK) && (rw == F_RDLCK)) {
 		/*
 		 * allrecord shared lock taken, but we only want to read
 		 */
 		allrecord_ok = true;
 	}
 	if (allrecord_ok) {
 		*pret = 0;
 		return true;
 	}
 	ret = pthread_mutex_unlock(chain);
 	if (ret != 0) {
 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
 			 "(chain_mutex) failed: %s\n", strerror(ret)));
 		errno = ret;
 		goto fail;
 	}
 	ret = allrecord_mutex_lock(m, waitflag);
 	if (ret == EBUSY) {
 		ret = EAGAIN;
 	}
 	if (ret != 0) {
 		if (waitflag || (ret != EAGAIN)) {
 			TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_%slock"
 				 "(allrecord_mutex) failed: %s\n",
 				 waitflag ? "" : "try_",  strerror(ret)));
 		}
 		errno = ret;
 		goto fail;
 	}
 	ret = pthread_mutex_unlock(&m->allrecord_mutex);
 	if (ret != 0) {
 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
 			 "(allrecord_mutex) failed: %s\n", strerror(ret)));
 		errno = ret;
 		goto fail;
 	}
 	goto again;
 fail:
 	*pret = -1;
 	return true;
 }
 bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len,
 		      int *pret)
 {
 	struct tdb_mutexes *m = tdb->mutexes;
 	pthread_mutex_t *chain;
 	int ret;
 	unsigned idx;
 	if (!tdb_mutex_index(tdb, off, len, &idx)) {
 		return false;
 	}
 	chain = &m->hashchains[idx];
 	ret = pthread_mutex_unlock(chain);
 	if (ret == 0) {
 		*pret = 0;
 		return true;
 	}
 	errno = ret;
 	*pret = -1;
 	return true;
 }
 int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
 			     enum tdb_lock_flags flags)
 {
 	struct tdb_mutexes *m = tdb->mutexes;
 	int ret;
 	uint32_t i;
 	bool waitflag = (flags & TDB_LOCK_WAIT);
 	int saved_errno;
 	if (tdb->flags & TDB_NOLOCK) {
 		return 0;
 	}
 	if (flags & TDB_LOCK_MARK_ONLY) {
 		return 0;
 	}
 	ret = allrecord_mutex_lock(m, waitflag);
 	if (!waitflag && (ret == EBUSY)) {
 		errno = EAGAIN;
 		tdb->ecode = TDB_ERR_LOCK;
 		return -1;
 	}
 	if (ret != 0) {
 		if (!(flags & TDB_LOCK_PROBE)) {
 			TDB_LOG((tdb, TDB_DEBUG_TRACE,
 				 "allrecord_mutex_lock() failed: %s\n",
 				 strerror(ret)));
 		}
 		tdb->ecode = TDB_ERR_LOCK;
 		return -1;
 	}
 	if (m->allrecord_lock != F_UNLCK) {
 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
 			 (int)m->allrecord_lock));
 		goto fail_unlock_allrecord_mutex;
 	}
 	m->allrecord_lock = (ltype == F_RDLCK) ? F_RDLCK : F_WRLCK;
 	for (i=0; i<tdb->hash_size; i++) {
 		/* ignore hashchains[0], the freelist */
 		pthread_mutex_t *chain = &m->hashchains[i+1];
 		ret = chain_mutex_lock(chain, waitflag);
 		if (!waitflag && (ret == EBUSY)) {
 			errno = EAGAIN;
 			goto fail_unroll_allrecord_lock;
 		}
 		if (ret != 0) {
 			if (!(flags & TDB_LOCK_PROBE)) {
 				TDB_LOG((tdb, TDB_DEBUG_TRACE,
 					 "chain_mutex_lock() failed: %s\n",
 					 strerror(ret)));
 			}
 			errno = ret;
 			goto fail_unroll_allrecord_lock;
 		}
 		ret = pthread_mutex_unlock(chain);
 		if (ret != 0) {
 			TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
 				 "(chainlock) failed: %s\n", strerror(ret)));
 			errno = ret;
 			goto fail_unroll_allrecord_lock;
 		}
 	}
 	/*
 	 * We leave this routine with m->allrecord_mutex locked
 	 */
 	return 0;
 fail_unroll_allrecord_lock:
 	m->allrecord_lock = F_UNLCK;
 fail_unlock_allrecord_mutex:
 	saved_errno = errno;
 	ret = pthread_mutex_unlock(&m->allrecord_mutex);
 	if (ret != 0) {
 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
 			 "(allrecord_mutex) failed: %s\n", strerror(ret)));
 	}
 	errno = saved_errno;
 	tdb->ecode = TDB_ERR_LOCK;
 	return -1;
 }
 int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
 {
 	struct tdb_mutexes *m = tdb->mutexes;
 	int ret;
 	uint32_t i;
 	if (tdb->flags & TDB_NOLOCK) {
 		return 0;
 	}
 	/*
 	 * Our only caller tdb_allrecord_upgrade()
 	 * garantees that we already own the allrecord lock.
 	 *
 	 * Which means m->allrecord_mutex is still locked by us.
 	 */
 	if (m->allrecord_lock != F_RDLCK) {
 		tdb->ecode = TDB_ERR_LOCK;
 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
 			 (int)m->allrecord_lock));
 		return -1;
 	}
 	m->allrecord_lock = F_WRLCK;
 	for (i=0; i<tdb->hash_size; i++) {
 		/* ignore hashchains[0], the freelist */
 		pthread_mutex_t *chain = &m->hashchains[i+1];
 		ret = chain_mutex_lock(chain, true);
 		if (ret != 0) {
 			TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_lock"
 				 "(chainlock) failed: %s\n", strerror(ret)));
 			goto fail_unroll_allrecord_lock;
 		}
 		ret = pthread_mutex_unlock(chain);
 		if (ret != 0) {
 			TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
 				 "(chainlock) failed: %s\n", strerror(ret)));
 			goto fail_unroll_allrecord_lock;
 		}
 	}
 	return 0;
 fail_unroll_allrecord_lock:
 	m->allrecord_lock = F_RDLCK;
 	tdb->ecode = TDB_ERR_LOCK;
 	return -1;
 }
 void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
 {
 	struct tdb_mutexes *m = tdb->mutexes;
 	/*
 	 * Our only caller tdb_allrecord_upgrade() (in the error case)
 	 * garantees that we already own the allrecord lock.
 	 *
 	 * Which means m->allrecord_mutex is still locked by us.
 	 */
 	if (m->allrecord_lock != F_WRLCK) {
 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
 			 (int)m->allrecord_lock));
 		return;
 	}
 	m->allrecord_lock = F_RDLCK;
 	return;
 }
 int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
 {
 	struct tdb_mutexes *m = tdb->mutexes;
 	short old;
 	int ret;
 	if (tdb->flags & TDB_NOLOCK) {
 		return 0;
 	}
 	/*
 	 * Our only callers tdb_allrecord_unlock() and
 	 * tdb_allrecord_lock() (in the error path)
 	 * garantee that we already own the allrecord lock.
 	 *
 	 * Which means m->allrecord_mutex is still locked by us.
 	 */
 	if ((m->allrecord_lock != F_RDLCK) && (m->allrecord_lock != F_WRLCK)) {
 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
 			 (int)m->allrecord_lock));
 		return -1;
 	}
 	old = m->allrecord_lock;
 	m->allrecord_lock = F_UNLCK;
 	ret = pthread_mutex_unlock(&m->allrecord_mutex);
 	if (ret != 0) {
 		m->allrecord_lock = old;
 		TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
 			 "(allrecord_mutex) failed: %s\n", strerror(ret)));
 		return -1;
 	}
 	return 0;
 }
 int tdb_mutex_init(struct tdb_context *tdb)
 {
 	struct tdb_mutexes *m;
 	pthread_mutexattr_t ma;
 	int i, ret;
 	ret = tdb_mutex_mmap(tdb);
 	if (ret == -1) {
 		return -1;
 	}
 	m = tdb->mutexes;
 	ret = pthread_mutexattr_init(&ma);
 	if (ret != 0) {
 		goto fail_munmap;
 	}
 	ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
 	if (ret != 0) {
 		goto fail;
 	}
 	ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
 	if (ret != 0) {
 		goto fail;
 	}
 	ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
 	if (ret != 0) {
 		goto fail;
 	}
 	for (i=0; i<tdb->hash_size+1; i++) {
 		pthread_mutex_t *chain = &m->hashchains[i];
 		ret = pthread_mutex_init(chain, &ma);
 		if (ret != 0) {
 			goto fail;
 		}
 	}
 	m->allrecord_lock = F_UNLCK;
 	ret = pthread_mutex_init(&m->allrecord_mutex, &ma);
 	if (ret != 0) {
 		goto fail;
 	}
 	ret = 0;
 fail:
 	pthread_mutexattr_destroy(&ma);
 fail_munmap:
 	if (ret == 0) {
 		return 0;
 	}
 	tdb_mutex_munmap(tdb);
 	errno = ret;
 	return -1;
 }
 int tdb_mutex_mmap(struct tdb_context *tdb)
 {
 	size_t len;
 	void *ptr;
 	len = tdb_mutex_size(tdb);
 	if (len == 0) {
 		return 0;
 	}
 	if (tdb->mutexes != NULL) {
 		return 0;
 	}
 	ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE,
 		   tdb->fd, 0);
 	if (ptr == MAP_FAILED) {
 		return -1;
 	}
 	tdb->mutexes = (struct tdb_mutexes *)ptr;
 	return 0;
 }
 int tdb_mutex_munmap(struct tdb_context *tdb)
 {
 	size_t len;
 	int ret;
 	len = tdb_mutex_size(tdb);
 	if (len == 0) {
 		return 0;
 	}
 	ret = munmap(tdb->mutexes, len);
 	if (ret == -1) {
 		return -1;
 	}
 	tdb->mutexes = NULL;
 	return 0;
 }
 static bool tdb_mutex_locking_cached;
 static bool tdb_mutex_locking_supported(void)
 {
 	pthread_mutexattr_t ma;
 	pthread_mutex_t m;
 	int ret;
 	static bool initialized;
 	if (initialized) {
 		return tdb_mutex_locking_cached;
 	}
 	initialized = true;
 	ret = pthread_mutexattr_init(&ma);
 	if (ret != 0) {
 		return false;
 	}
 	ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
 	if (ret != 0) {
 		goto cleanup_ma;
 	}
 	ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
 	if (ret != 0) {
 		goto cleanup_ma;
 	}
 	ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
 	if (ret != 0) {
 		goto cleanup_ma;
 	}
 	ret = pthread_mutex_init(&m, &ma);
 	if (ret != 0) {
 		goto cleanup_ma;
 	}
 	ret = pthread_mutex_lock(&m);
 	if (ret != 0) {
 		goto cleanup_m;
 	}
 	/*
 	 * This makes sure we have real mutexes
 	 * from a threading library instead of just
 	 * stubs from libc.
 	 */
 	ret = pthread_mutex_lock(&m);
 	if (ret != EDEADLK) {
 		goto cleanup_lock;
 	}
 	ret = pthread_mutex_unlock(&m);
 	if (ret != 0) {
 		goto cleanup_m;
 	}
 	tdb_mutex_locking_cached = true;
 	goto cleanup_m;
 cleanup_lock:
 	pthread_mutex_unlock(&m);
 cleanup_m:
 	pthread_mutex_destroy(&m);
 cleanup_ma:
 	pthread_mutexattr_destroy(&ma);
 	return tdb_mutex_locking_cached;
 }
 static void (*tdb_robust_mutext_old_handler)(int) = SIG_ERR;
 static pid_t tdb_robust_mutex_pid = -1;
 static bool tdb_robust_mutex_setup_sigchild(void (*handler)(int),
 			void (**p_old_handler)(int))
 {
 #ifdef HAVE_SIGACTION
 	struct sigaction act;
 	struct sigaction oldact;
 	memset(&act, '\0', sizeof(act));
 	act.sa_handler = handler;
 #ifdef SA_RESTART
 	act.sa_flags = SA_RESTART;
 #endif
 	sigemptyset(&act.sa_mask);
 	sigaddset(&act.sa_mask, SIGCHLD);
 	sigaction(SIGCHLD, &act, &oldact);
 	if (p_old_handler) {
 		*p_old_handler = oldact.sa_handler;
 	}
 	return true;
 #else /* !HAVE_SIGACTION */
 	return false;
 #endif
 }
 static void tdb_robust_mutex_handler(int sig)
 {
 	pid_t child_pid = tdb_robust_mutex_pid;
 	if (child_pid != -1) {
 		pid_t pid;
 		pid = waitpid(child_pid, NULL, WNOHANG);
 		if (pid == -1) {
 			switch (errno) {
 			case ECHILD:
 				tdb_robust_mutex_pid = -1;
 				return;
 			default:
 				return;
 			}
 		}
 		if (pid == child_pid) {
 			tdb_robust_mutex_pid = -1;
 			return;
 		}
 	}
 	if (tdb_robust_mutext_old_handler == SIG_DFL) {
 		return;
 	}
 	if (tdb_robust_mutext_old_handler == SIG_IGN) {
 		return;
 	}
 	if (tdb_robust_mutext_old_handler == SIG_ERR) {
 		return;
 	}
 	tdb_robust_mutext_old_handler(sig);
 }
 static void tdb_robust_mutex_wait_for_child(pid_t *child_pid)
 {
 	int options = WNOHANG;
 	if (*child_pid == -1) {
 		return;
 	}
 	while (tdb_robust_mutex_pid > 0) {
 		pid_t pid;
 		/*
 		 * First we try with WNOHANG, as the process might not exist
 		 * anymore. Once we've sent SIGKILL we block waiting for the
 		 * exit.
 		 */
 		pid = waitpid(*child_pid, NULL, options);
 		if (pid == -1) {
 			if (errno == EINTR) {
 				continue;
 			} else if (errno == ECHILD) {
 				break;
 			} else {
 				abort();
 			}
 		}
 		if (pid == *child_pid) {
 			break;
 		}
 		kill(*child_pid, SIGKILL);
 		options = 0;
 	}
 	tdb_robust_mutex_pid = -1;
 	*child_pid = -1;
 }
 _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
 {
 	void *ptr = NULL;
 	pthread_mutex_t *m = NULL;
 	pthread_mutexattr_t ma;
 	int ret = 1;
 	int pipe_down[2] = { -1, -1 };
 	int pipe_up[2] = { -1, -1 };
 	ssize_t nread;
 	char c = 0;
 	bool ok;
 	static bool initialized;
 	pid_t saved_child_pid = -1;
 	bool cleanup_ma = false;
 	if (initialized) {
 		return tdb_mutex_locking_cached;
 	}
 	initialized = true;
 	ok = tdb_mutex_locking_supported();
 	if (!ok) {
 		return false;
 	}
 	tdb_mutex_locking_cached = false;
 	ptr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE,
 		   MAP_SHARED|MAP_ANON, -1 /* fd */, 0);
 	if (ptr == MAP_FAILED) {
 		return false;
 	}
 	ret = pipe(pipe_down);
 	if (ret != 0) {
 		goto cleanup;
 	}
 	ret = pipe(pipe_up);
 	if (ret != 0) {
 		goto cleanup;
 	}
 	ret = pthread_mutexattr_init(&ma);
 	if (ret != 0) {
 		goto cleanup;
 	}
 	cleanup_ma = true;
 	ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
 	if (ret != 0) {
 		goto cleanup;
 	}
 	ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
 	if (ret != 0) {
 		goto cleanup;
 	}
 	ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
 	if (ret != 0) {
 		goto cleanup;
 	}
 	ret = pthread_mutex_init(ptr, &ma);
 	if (ret != 0) {
 		goto cleanup;
 	}
 	m = (pthread_mutex_t *)ptr;
 	if (tdb_robust_mutex_setup_sigchild(tdb_robust_mutex_handler,
 			&tdb_robust_mutext_old_handler) == false) {
 		goto cleanup;
 	}
 	tdb_robust_mutex_pid = fork();
 	saved_child_pid = tdb_robust_mutex_pid;
 	if (tdb_robust_mutex_pid == 0) {
 		size_t nwritten;
 		close(pipe_down[1]);
 		close(pipe_up[0]);
 		ret = pthread_mutex_lock(m);
 		nwritten = write(pipe_up[1], &ret, sizeof(ret));
 		if (nwritten != sizeof(ret)) {
 			_exit(1);
 		}
 		if (ret != 0) {
 			_exit(1);
 		}
 		nread = read(pipe_down[0], &c, 1);
 		if (nread != 1) {
 			_exit(1);
 		}
 		/* leave locked */
 		_exit(0);
 	}
 	if (tdb_robust_mutex_pid == -1) {
 		goto cleanup;
 	}
 	close(pipe_down[0]);
 	pipe_down[0] = -1;
 	close(pipe_up[1]);
 	pipe_up[1] = -1;
 	nread = read(pipe_up[0], &ret, sizeof(ret));
 	if (nread != sizeof(ret)) {
 		goto cleanup;
 	}
 	ret = pthread_mutex_trylock(m);
 	if (ret != EBUSY) {
 		if (ret == 0) {
 			pthread_mutex_unlock(m);
 		}
 		goto cleanup;
 	}
 	if (write(pipe_down[1], &c, 1) != 1) {
 		goto cleanup;
 	}
 	nread = read(pipe_up[0], &c, 1);
 	if (nread != 0) {
 		goto cleanup;
 	}
 	tdb_robust_mutex_wait_for_child(&saved_child_pid);
 	ret = pthread_mutex_trylock(m);
 	if (ret != EOWNERDEAD) {
 		if (ret == 0) {
 			pthread_mutex_unlock(m);
 		}
 		goto cleanup;
 	}
 	ret = pthread_mutex_consistent(m);
 	if (ret != 0) {
 		goto cleanup;
 	}
 	ret = pthread_mutex_trylock(m);
 	if (ret != EDEADLK && ret != EBUSY) {
 		pthread_mutex_unlock(m);
 		goto cleanup;
 	}
 	ret = pthread_mutex_unlock(m);
 	if (ret != 0) {
 		goto cleanup;
 	}
 	tdb_mutex_locking_cached = true;
 cleanup:
 	/*
 	 * Note that we don't reset the signal handler we just reset
 	 * tdb_robust_mutex_pid to -1. This is ok as this code path is only
 	 * called once per process.
 	 *
 	 * Leaving our signal handler avoids races with other threads potentialy
 	 * setting up their SIGCHLD handlers.
 	 *
 	 * The worst thing that can happen is that the other newer signal
 	 * handler will get the SIGCHLD signal for our child and/or reap the
 	 * child with a wait() function. tdb_robust_mutex_wait_for_child()
 	 * handles the case where waitpid returns ECHILD.
 	 */
 	tdb_robust_mutex_wait_for_child(&saved_child_pid);
 	if (m != NULL) {
 		pthread_mutex_destroy(m);
 	}
 	if (cleanup_ma) {
 		pthread_mutexattr_destroy(&ma);
 	}
 	if (pipe_down[0] != -1) {
 		close(pipe_down[0]);
 	}
 	if (pipe_down[1] != -1) {
 		close(pipe_down[1]);
 	}
 	if (pipe_up[0] != -1) {
 		close(pipe_up[0]);
 	}
 	if (pipe_up[1] != -1) {
 		close(pipe_up[1]);
 	}
 	if (ptr != NULL) {
 		munmap(ptr, sizeof(pthread_mutex_t));
 	}
 	return tdb_mutex_locking_cached;
 }
 #else
 size_t tdb_mutex_size(struct tdb_context *tdb)
 {
 	return 0;
 }
 bool tdb_have_mutexes(struct tdb_context *tdb)
 {
 	return false;
 }
 int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
 			     enum tdb_lock_flags flags)
 {
 	tdb->ecode = TDB_ERR_LOCK;
 	return -1;
 }
 int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
 {
 	return -1;
 }
 int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
 {
 	tdb->ecode = TDB_ERR_LOCK;
 	return -1;
 }
 void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
 {
 	return;
 }
 int tdb_mutex_mmap(struct tdb_context *tdb)
 {
 	errno = ENOSYS;
 	return -1;
 }
 int tdb_mutex_munmap(struct tdb_context *tdb)
 {
 	errno = ENOSYS;
 	return -1;
 }
 int tdb_mutex_init(struct tdb_context *tdb)
 {
 	errno = ENOSYS;
 	return -1;
 }
 _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
 {
 	return false;
 }
 #endif

1078 lines 22 KiB C Raw Blame History

1078 lines

22 KiB

C

Raw Blame History