2006-07-11 22:01:26 +04:00
/*
Unix SMB / CIFS implementation .
trivial database library
Copyright ( C ) Andrew Tridgell 1999 - 2005
Copyright ( C ) Paul ` Rusty ' Russell 2000
Copyright ( C ) Jeremy Allison 2000 - 2003
* * NOTE ! The following LGPL license applies to the tdb
* * library . This does NOT imply that all of Samba is released
* * under the LGPL
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA
*/
# include "tdb_private.h"
# ifndef HAVE_PREAD
static ssize_t pread ( int fd , void * buf , size_t count , off_t offset )
{
if ( lseek ( fd , offset , SEEK_SET ) ! = offset ) {
errno = EIO ;
return - 1 ;
}
return read ( fd , buf , count ) ;
}
# endif
# ifndef HAVE_PWRITE
static ssize_t pwrite ( int fd , const void * buf , size_t count , off_t offset )
{
if ( lseek ( fd , offset , SEEK_SET ) ! = offset ) {
errno = EIO ;
return - 1 ;
}
return write ( fd , buf , count ) ;
}
# endif
/* check for an out of bounds access - if it is out of bounds then
see if the database has been expanded by someone else and expand
if necessary
note that " len " is the minimum length needed for the db
*/
static int tdb_oob ( struct tdb_context * tdb , tdb_off_t len , int probe )
{
struct stat st ;
if ( len < = tdb - > map_size )
return 0 ;
if ( tdb - > flags & TDB_INTERNAL ) {
if ( ! probe ) {
/* Ensure ecode is set for log fn. */
tdb - > ecode = TDB_ERR_IO ;
TDB_LOG ( ( tdb , 0 , " tdb_oob len %d beyond internal malloc size %d \n " ,
( int ) len , ( int ) tdb - > map_size ) ) ;
}
return TDB_ERRCODE ( TDB_ERR_IO , - 1 ) ;
}
if ( fstat ( tdb - > fd , & st ) = = - 1 ) {
return TDB_ERRCODE ( TDB_ERR_IO , - 1 ) ;
}
if ( st . st_size < ( size_t ) len ) {
if ( ! probe ) {
/* Ensure ecode is set for log fn. */
tdb - > ecode = TDB_ERR_IO ;
TDB_LOG ( ( tdb , 0 , " tdb_oob len %d beyond eof at %d \n " ,
( int ) len , ( int ) st . st_size ) ) ;
}
return TDB_ERRCODE ( TDB_ERR_IO , - 1 ) ;
}
/* Unmap, update size, remap */
if ( tdb_munmap ( tdb ) = = - 1 )
return TDB_ERRCODE ( TDB_ERR_IO , - 1 ) ;
tdb - > map_size = st . st_size ;
tdb_mmap ( tdb ) ;
return 0 ;
}
/* write a lump of data at a specified offset */
static int tdb_write ( struct tdb_context * tdb , tdb_off_t off ,
const void * buf , tdb_len_t len )
{
if ( len = = 0 ) {
return 0 ;
}
if ( tdb - > read_only | | tdb - > traverse_read ) {
tdb - > ecode = TDB_ERR_RDONLY ;
return - 1 ;
}
if ( tdb - > methods - > tdb_oob ( tdb , off + len , 0 ) ! = 0 )
return - 1 ;
if ( tdb - > map_ptr ) {
memcpy ( off + ( char * ) tdb - > map_ptr , buf , len ) ;
} else if ( pwrite ( tdb - > fd , buf , len , off ) ! = ( ssize_t ) len ) {
/* Ensure ecode is set for log fn. */
tdb - > ecode = TDB_ERR_IO ;
TDB_LOG ( ( tdb , 0 , " tdb_write failed at %d len=%d (%s) \n " ,
off , len , strerror ( errno ) ) ) ;
return TDB_ERRCODE ( TDB_ERR_IO , - 1 ) ;
}
return 0 ;
}
/* Endian conversion: we only ever deal with 4 byte quantities */
void * tdb_convert ( void * buf , u32 size )
{
2006-07-30 14:42:11 +04:00
u32 i , * p = ( u32 * ) buf ;
2006-07-11 22:01:26 +04:00
for ( i = 0 ; i < size / 4 ; i + + )
p [ i ] = TDB_BYTEREV ( p [ i ] ) ;
return buf ;
}
/* read a lump of data at a specified offset, maybe convert */
static int tdb_read ( struct tdb_context * tdb , tdb_off_t off , void * buf ,
tdb_len_t len , int cv )
{
if ( tdb - > methods - > tdb_oob ( tdb , off + len , 0 ) ! = 0 ) {
return - 1 ;
}
if ( tdb - > map_ptr ) {
memcpy ( buf , off + ( char * ) tdb - > map_ptr , len ) ;
} else {
ssize_t ret = pread ( tdb - > fd , buf , len , off ) ;
if ( ret ! = ( ssize_t ) len ) {
/* Ensure ecode is set for log fn. */
tdb - > ecode = TDB_ERR_IO ;
TDB_LOG ( ( tdb , 0 , " tdb_read failed at %d len=%d ret=%d (%s) map_size=%d \n " ,
off , len , ( int ) ret , strerror ( errno ) , ( int ) tdb - > map_size ) ) ;
return TDB_ERRCODE ( TDB_ERR_IO , - 1 ) ;
}
}
if ( cv ) {
tdb_convert ( buf , len ) ;
}
return 0 ;
}
/*
do an unlocked scan of the hash table heads to find the next non - zero head . The value
will then be confirmed with the lock held
*/
static void tdb_next_hash_chain ( struct tdb_context * tdb , u32 * chain )
{
u32 h = * chain ;
if ( tdb - > map_ptr ) {
for ( ; h < tdb - > header . hash_size ; h + + ) {
if ( 0 ! = * ( u32 * ) ( TDB_HASH_TOP ( h ) + ( unsigned char * ) tdb - > map_ptr ) ) {
break ;
}
}
} else {
u32 off = 0 ;
for ( ; h < tdb - > header . hash_size ; h + + ) {
if ( tdb_ofs_read ( tdb , TDB_HASH_TOP ( h ) , & off ) ! = 0 | | off ! = 0 ) {
break ;
}
}
}
( * chain ) = h ;
}
int tdb_munmap ( struct tdb_context * tdb )
{
if ( tdb - > flags & TDB_INTERNAL )
return 0 ;
# ifdef HAVE_MMAP
if ( tdb - > map_ptr ) {
int ret = munmap ( tdb - > map_ptr , tdb - > map_size ) ;
if ( ret ! = 0 )
return ret ;
}
# endif
tdb - > map_ptr = NULL ;
return 0 ;
}
void tdb_mmap ( struct tdb_context * tdb )
{
if ( tdb - > flags & TDB_INTERNAL )
return ;
# ifdef HAVE_MMAP
if ( ! ( tdb - > flags & TDB_NOMMAP ) ) {
tdb - > map_ptr = mmap ( NULL , tdb - > map_size ,
PROT_READ | ( tdb - > read_only ? 0 : PROT_WRITE ) ,
MAP_SHARED | MAP_FILE , tdb - > fd , 0 ) ;
/*
* NB . When mmap fails it returns MAP_FAILED * NOT * NULL ! ! ! !
*/
if ( tdb - > map_ptr = = MAP_FAILED ) {
tdb - > map_ptr = NULL ;
TDB_LOG ( ( tdb , 2 , " tdb_mmap failed for size %d (%s) \n " ,
tdb - > map_size , strerror ( errno ) ) ) ;
}
} else {
tdb - > map_ptr = NULL ;
}
# else
tdb - > map_ptr = NULL ;
# endif
}
/* expand a file. we prefer to use ftruncate, as that is what posix
says to use for mmap expansion */
static int tdb_expand_file ( struct tdb_context * tdb , tdb_off_t size , tdb_off_t addition )
{
char buf [ 1024 ] ;
if ( tdb - > read_only | | tdb - > traverse_read ) {
tdb - > ecode = TDB_ERR_RDONLY ;
return - 1 ;
}
if ( ftruncate ( tdb - > fd , size + addition ) = = - 1 ) {
char b = 0 ;
if ( pwrite ( tdb - > fd , & b , 1 , ( size + addition ) - 1 ) ! = 1 ) {
TDB_LOG ( ( tdb , 0 , " expand_file to %d failed (%s) \n " ,
size + addition , strerror ( errno ) ) ) ;
return - 1 ;
}
}
/* now fill the file with something. This ensures that the
file isn ' t sparse , which would be very bad if we ran out of
disk . This must be done with write , not via mmap */
memset ( buf , TDB_PAD_BYTE , sizeof ( buf ) ) ;
while ( addition ) {
int n = addition > sizeof ( buf ) ? sizeof ( buf ) : addition ;
int ret = pwrite ( tdb - > fd , buf , n , size ) ;
if ( ret ! = n ) {
TDB_LOG ( ( tdb , 0 , " expand_file write of %d failed (%s) \n " ,
n , strerror ( errno ) ) ) ;
return - 1 ;
}
addition - = n ;
size + = n ;
}
return 0 ;
}
/* expand the database at least size bytes by expanding the underlying
file and doing the mmap again if necessary */
int tdb_expand ( struct tdb_context * tdb , tdb_off_t size )
{
struct list_struct rec ;
tdb_off_t offset ;
if ( tdb_lock ( tdb , - 1 , F_WRLCK ) = = - 1 ) {
TDB_LOG ( ( tdb , 0 , " lock failed in tdb_expand \n " ) ) ;
return - 1 ;
}
/* must know about any previous expansions by another process */
tdb - > methods - > tdb_oob ( tdb , tdb - > map_size + 1 , 1 ) ;
/* always make room for at least 10 more records, and round
the database up to a multiple of the page size */
size = TDB_ALIGN ( tdb - > map_size + size * 10 , tdb - > page_size ) - tdb - > map_size ;
if ( ! ( tdb - > flags & TDB_INTERNAL ) )
tdb_munmap ( tdb ) ;
/*
* We must ensure the file is unmapped before doing this
* to ensure consistency with systems like OpenBSD where
* writes and mmaps are not consistent .
*/
/* expand the file itself */
if ( ! ( tdb - > flags & TDB_INTERNAL ) ) {
if ( tdb - > methods - > tdb_expand_file ( tdb , tdb - > map_size , size ) ! = 0 )
goto fail ;
}
tdb - > map_size + = size ;
if ( tdb - > flags & TDB_INTERNAL ) {
2006-07-30 14:42:11 +04:00
char * new_map_ptr = ( char * ) realloc ( tdb - > map_ptr ,
tdb - > map_size ) ;
2006-07-11 22:01:26 +04:00
if ( ! new_map_ptr ) {
tdb - > map_size - = size ;
goto fail ;
}
tdb - > map_ptr = new_map_ptr ;
} else {
/*
* We must ensure the file is remapped before adding the space
* to ensure consistency with systems like OpenBSD where
* writes and mmaps are not consistent .
*/
/* We're ok if the mmap fails as we'll fallback to read/write */
tdb_mmap ( tdb ) ;
}
/* form a new freelist record */
memset ( & rec , ' \0 ' , sizeof ( rec ) ) ;
rec . rec_len = size - sizeof ( rec ) ;
/* link it into the free list */
offset = tdb - > map_size - size ;
if ( tdb_free ( tdb , offset , & rec ) = = - 1 )
goto fail ;
tdb_unlock ( tdb , - 1 , F_WRLCK ) ;
return 0 ;
fail :
tdb_unlock ( tdb , - 1 , F_WRLCK ) ;
return - 1 ;
}
/* read/write a tdb_off_t */
int tdb_ofs_read ( struct tdb_context * tdb , tdb_off_t offset , tdb_off_t * d )
{
return tdb - > methods - > tdb_read ( tdb , offset , ( char * ) d , sizeof ( * d ) , DOCONV ( ) ) ;
}
int tdb_ofs_write ( struct tdb_context * tdb , tdb_off_t offset , tdb_off_t * d )
{
tdb_off_t off = * d ;
return tdb - > methods - > tdb_write ( tdb , offset , CONVERT ( off ) , sizeof ( * d ) ) ;
}
/* read a lump of data, allocating the space for it */
char * tdb_alloc_read ( struct tdb_context * tdb , tdb_off_t offset , tdb_len_t len )
{
char * buf ;
/* some systems don't like zero length malloc */
if ( len = = 0 ) {
len = 1 ;
}
2006-07-30 14:42:11 +04:00
if ( ! ( buf = ( char * ) malloc ( len ) ) ) {
2006-07-11 22:01:26 +04:00
/* Ensure ecode is set for log fn. */
tdb - > ecode = TDB_ERR_OOM ;
TDB_LOG ( ( tdb , 0 , " tdb_alloc_read malloc failed len=%d (%s) \n " ,
len , strerror ( errno ) ) ) ;
return TDB_ERRCODE ( TDB_ERR_OOM , buf ) ;
}
if ( tdb - > methods - > tdb_read ( tdb , offset , buf , len , 0 ) = = - 1 ) {
SAFE_FREE ( buf ) ;
return NULL ;
}
return buf ;
}
/* read/write a record */
int tdb_rec_read ( struct tdb_context * tdb , tdb_off_t offset , struct list_struct * rec )
{
if ( tdb - > methods - > tdb_read ( tdb , offset , rec , sizeof ( * rec ) , DOCONV ( ) ) = = - 1 )
return - 1 ;
if ( TDB_BAD_MAGIC ( rec ) ) {
/* Ensure ecode is set for log fn. */
tdb - > ecode = TDB_ERR_CORRUPT ;
TDB_LOG ( ( tdb , 0 , " tdb_rec_read bad magic 0x%x at offset=%d \n " , rec - > magic , offset ) ) ;
return TDB_ERRCODE ( TDB_ERR_CORRUPT , - 1 ) ;
}
return tdb - > methods - > tdb_oob ( tdb , rec - > next + sizeof ( * rec ) , 0 ) ;
}
int tdb_rec_write ( struct tdb_context * tdb , tdb_off_t offset , struct list_struct * rec )
{
struct list_struct r = * rec ;
return tdb - > methods - > tdb_write ( tdb , offset , CONVERT ( r ) , sizeof ( r ) ) ;
}
static const struct tdb_methods io_methods = {
tdb_read ,
tdb_write ,
tdb_next_hash_chain ,
tdb_oob ,
tdb_expand_file ,
tdb_brlock
} ;
/*
initialise the default methods table
*/
void tdb_io_init ( struct tdb_context * tdb )
{
tdb - > methods = & io_methods ;
}