2010-04-06 15:14:15 -07:00
# include <linux/ceph/ceph_debug.h>
2010-08-02 15:34:23 -07:00
# include <linux/file.h>
# include <linux/namei.h>
2014-03-09 23:16:40 +08:00
# include <linux/random.h>
2010-08-02 15:34:23 -07:00
# include "super.h"
# include "mds_client.h"
2010-04-06 15:14:15 -07:00
# include <linux/ceph/pagelist.h>
2010-08-02 15:34:23 -07:00
2014-03-09 23:16:40 +08:00
static u64 lock_secret ;
2014-10-14 10:33:35 +08:00
static int ceph_lock_wait_for_completion ( struct ceph_mds_client * mdsc ,
struct ceph_mds_request * req ) ;
2014-03-09 23:16:40 +08:00
static inline u64 secure_addr ( void * addr )
{
u64 v = lock_secret ^ ( u64 ) ( unsigned long ) addr ;
/*
* Set the most significant bit , so that MDS knows the ' owner '
* is sufficient to identify the owner of lock . ( old code uses
* both ' owner ' and ' pid ' )
*/
v | = ( 1ULL < < 63 ) ;
return v ;
}
void __init ceph_flock_init ( void )
{
get_random_bytes ( & lock_secret , sizeof ( lock_secret ) ) ;
}
2010-08-02 15:34:23 -07:00
/**
* Implement fcntl and flock locking functions .
*/
static int ceph_lock_message ( u8 lock_type , u16 operation , struct file * file ,
2010-11-23 13:42:23 -08:00
int cmd , u8 wait , struct file_lock * fl )
2010-08-02 15:34:23 -07:00
{
2013-01-23 17:07:38 -05:00
struct inode * inode = file_inode ( file ) ;
2014-03-09 23:16:40 +08:00
struct ceph_mds_client * mdsc = ceph_sb_to_client ( inode - > i_sb ) - > mdsc ;
2010-08-02 15:34:23 -07:00
struct ceph_mds_request * req ;
int err ;
2010-11-23 13:42:23 -08:00
u64 length = 0 ;
2014-03-09 23:16:40 +08:00
u64 owner ;
2010-08-02 15:34:23 -07:00
2014-10-14 10:33:35 +08:00
if ( operation ! = CEPH_MDS_OP_SETFILELOCK | | cmd = = CEPH_LOCK_UNLOCK )
wait = 0 ;
2010-08-02 15:34:23 -07:00
req = ceph_mdsc_create_request ( mdsc , operation , USE_AUTH_MDS ) ;
if ( IS_ERR ( req ) )
return PTR_ERR ( req ) ;
2011-05-27 09:24:26 -07:00
req - > r_inode = inode ;
ihold ( inode ) ;
2014-04-27 09:17:45 +08:00
req - > r_num_caps = 1 ;
2010-08-02 15:34:23 -07:00
2010-11-23 13:42:23 -08:00
/* mds requires start and length rather than start and end */
if ( LLONG_MAX = = fl - > fl_end )
length = 0 ;
else
length = fl - > fl_end - fl - > fl_start + 1 ;
2014-05-09 14:13:04 -04:00
owner = secure_addr ( fl - > fl_owner ) ;
2014-03-09 23:16:40 +08:00
dout ( " ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
" start: %llu, length: %llu, wait: %d, type: %d " , ( int ) lock_type ,
( int ) operation , owner , ( u64 ) fl - > fl_pid , fl - > fl_start , length ,
wait , fl - > fl_type ) ;
2010-11-23 13:42:23 -08:00
2010-08-02 15:34:23 -07:00
req - > r_args . filelock_change . rule = lock_type ;
req - > r_args . filelock_change . type = cmd ;
2014-03-09 23:16:40 +08:00
req - > r_args . filelock_change . owner = cpu_to_le64 ( owner ) ;
2010-11-23 13:42:23 -08:00
req - > r_args . filelock_change . pid = cpu_to_le64 ( ( u64 ) fl - > fl_pid ) ;
req - > r_args . filelock_change . start = cpu_to_le64 ( fl - > fl_start ) ;
2010-08-02 15:34:23 -07:00
req - > r_args . filelock_change . length = cpu_to_le64 ( length ) ;
req - > r_args . filelock_change . wait = wait ;
2014-10-14 10:33:35 +08:00
if ( wait )
req - > r_wait_for_completion = ceph_lock_wait_for_completion ;
2010-08-02 15:34:23 -07:00
err = ceph_mdsc_do_request ( mdsc , inode , req ) ;
2010-11-23 13:58:29 -08:00
2014-03-09 23:16:40 +08:00
if ( operation = = CEPH_MDS_OP_GETFILELOCK ) {
2010-11-23 13:58:29 -08:00
fl - > fl_pid = le64_to_cpu ( req - > r_reply_info . filelock_reply - > pid ) ;
if ( CEPH_LOCK_SHARED = = req - > r_reply_info . filelock_reply - > type )
fl - > fl_type = F_RDLCK ;
else if ( CEPH_LOCK_EXCL = = req - > r_reply_info . filelock_reply - > type )
fl - > fl_type = F_WRLCK ;
else
fl - > fl_type = F_UNLCK ;
fl - > fl_start = le64_to_cpu ( req - > r_reply_info . filelock_reply - > start ) ;
length = le64_to_cpu ( req - > r_reply_info . filelock_reply - > start ) +
le64_to_cpu ( req - > r_reply_info . filelock_reply - > length ) ;
if ( length > = 1 )
fl - > fl_end = length - 1 ;
else
fl - > fl_end = 0 ;
}
2010-08-02 15:34:23 -07:00
ceph_mdsc_put_request ( req ) ;
dout ( " ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
2011-05-25 14:56:12 -07:00
" length: %llu, wait: %d, type: %d, err code %d " , ( int ) lock_type ,
2010-11-23 13:42:23 -08:00
( int ) operation , ( u64 ) fl - > fl_pid , fl - > fl_start ,
length , wait , fl - > fl_type , err ) ;
2010-08-02 15:34:23 -07:00
return err ;
}
2014-10-14 10:33:35 +08:00
static int ceph_lock_wait_for_completion ( struct ceph_mds_client * mdsc ,
struct ceph_mds_request * req )
{
struct ceph_mds_request * intr_req ;
struct inode * inode = req - > r_inode ;
int err , lock_type ;
BUG_ON ( req - > r_op ! = CEPH_MDS_OP_SETFILELOCK ) ;
if ( req - > r_args . filelock_change . rule = = CEPH_LOCK_FCNTL )
lock_type = CEPH_LOCK_FCNTL_INTR ;
else if ( req - > r_args . filelock_change . rule = = CEPH_LOCK_FLOCK )
lock_type = CEPH_LOCK_FLOCK_INTR ;
else
BUG_ON ( 1 ) ;
BUG_ON ( req - > r_args . filelock_change . type = = CEPH_LOCK_UNLOCK ) ;
err = wait_for_completion_interruptible ( & req - > r_completion ) ;
if ( ! err )
return 0 ;
dout ( " ceph_lock_wait_for_completion: request %llu was interrupted \n " ,
req - > r_tid ) ;
intr_req = ceph_mdsc_create_request ( mdsc , CEPH_MDS_OP_SETFILELOCK ,
USE_AUTH_MDS ) ;
if ( IS_ERR ( intr_req ) )
return PTR_ERR ( intr_req ) ;
intr_req - > r_inode = inode ;
ihold ( inode ) ;
intr_req - > r_num_caps = 1 ;
intr_req - > r_args . filelock_change = req - > r_args . filelock_change ;
intr_req - > r_args . filelock_change . rule = lock_type ;
intr_req - > r_args . filelock_change . type = CEPH_LOCK_UNLOCK ;
err = ceph_mdsc_do_request ( mdsc , inode , intr_req ) ;
ceph_mdsc_put_request ( intr_req ) ;
if ( err & & err ! = - ERESTARTSYS )
return err ;
wait_for_completion ( & req - > r_completion ) ;
return 0 ;
}
2010-08-02 15:34:23 -07:00
/**
* Attempt to set an fcntl lock .
* For now , this just goes away to the server . Later it may be more awesome .
*/
int ceph_lock ( struct file * file , int cmd , struct file_lock * fl )
{
u8 lock_cmd ;
int err ;
u8 wait = 0 ;
u16 op = CEPH_MDS_OP_SETFILELOCK ;
2014-03-04 15:50:06 +08:00
if ( ! ( fl - > fl_flags & FL_POSIX ) )
return - ENOLCK ;
/* No mandatory locks */
if ( __mandatory_lock ( file - > f_mapping - > host ) & & fl - > fl_type ! = F_UNLCK )
return - ENOLCK ;
2014-03-09 23:16:40 +08:00
dout ( " ceph_lock, fl_owner: %p " , fl - > fl_owner ) ;
2010-08-02 15:34:23 -07:00
/* set wait bit as appropriate, then make command as Ceph expects it*/
2014-03-04 15:42:24 +08:00
if ( IS_GETLK ( cmd ) )
2010-08-02 15:34:23 -07:00
op = CEPH_MDS_OP_GETFILELOCK ;
2014-03-04 15:42:24 +08:00
else if ( IS_SETLKW ( cmd ) )
wait = 1 ;
2010-08-02 15:34:23 -07:00
if ( F_RDLCK = = fl - > fl_type )
lock_cmd = CEPH_LOCK_SHARED ;
else if ( F_WRLCK = = fl - > fl_type )
lock_cmd = CEPH_LOCK_EXCL ;
else
lock_cmd = CEPH_LOCK_UNLOCK ;
2010-11-23 13:42:23 -08:00
err = ceph_lock_message ( CEPH_LOCK_FCNTL , op , file , lock_cmd , wait , fl ) ;
2010-08-02 15:34:23 -07:00
if ( ! err ) {
2014-03-09 23:16:40 +08:00
if ( op ! = CEPH_MDS_OP_GETFILELOCK ) {
2010-11-23 13:58:29 -08:00
dout ( " mds locked, locking locally " ) ;
err = posix_lock_file ( file , fl , NULL ) ;
if ( err & & ( CEPH_MDS_OP_SETFILELOCK = = op ) ) {
2011-05-25 14:56:12 -07:00
/* undo! This should only happen if
* the kernel detects local
* deadlock . */
2010-11-23 13:58:29 -08:00
ceph_lock_message ( CEPH_LOCK_FCNTL , op , file ,
CEPH_LOCK_UNLOCK , 0 , fl ) ;
2011-05-25 14:56:12 -07:00
dout ( " got %d on posix_lock_file, undid lock " ,
err ) ;
2010-11-23 13:58:29 -08:00
}
2010-08-02 15:34:23 -07:00
}
}
return err ;
}
int ceph_flock ( struct file * file , int cmd , struct file_lock * fl )
{
u8 lock_cmd ;
int err ;
2014-03-04 15:42:24 +08:00
u8 wait = 0 ;
2010-08-02 15:34:23 -07:00
2014-03-04 15:50:06 +08:00
if ( ! ( fl - > fl_flags & FL_FLOCK ) )
return - ENOLCK ;
/* No mandatory locks */
if ( __mandatory_lock ( file - > f_mapping - > host ) & & fl - > fl_type ! = F_UNLCK )
return - ENOLCK ;
2014-03-09 23:16:40 +08:00
dout ( " ceph_flock, fl_file: %p " , fl - > fl_file ) ;
2010-08-02 15:34:23 -07:00
2014-03-04 15:42:24 +08:00
if ( IS_SETLKW ( cmd ) )
wait = 1 ;
if ( F_RDLCK = = fl - > fl_type )
2010-08-02 15:34:23 -07:00
lock_cmd = CEPH_LOCK_SHARED ;
2014-03-04 15:42:24 +08:00
else if ( F_WRLCK = = fl - > fl_type )
2010-08-02 15:34:23 -07:00
lock_cmd = CEPH_LOCK_EXCL ;
else
lock_cmd = CEPH_LOCK_UNLOCK ;
err = ceph_lock_message ( CEPH_LOCK_FLOCK , CEPH_MDS_OP_SETFILELOCK ,
2010-11-23 13:42:23 -08:00
file , lock_cmd , wait , fl ) ;
2010-08-02 15:34:23 -07:00
if ( ! err ) {
2015-10-22 13:38:14 -04:00
err = locks_lock_file_wait ( file , fl ) ;
2010-08-02 15:34:23 -07:00
if ( err ) {
ceph_lock_message ( CEPH_LOCK_FLOCK ,
CEPH_MDS_OP_SETFILELOCK ,
2010-11-23 13:42:23 -08:00
file , CEPH_LOCK_UNLOCK , 0 , fl ) ;
2015-10-22 13:38:14 -04:00
dout ( " got %d on locks_lock_file_wait, undid lock " , err ) ;
2010-08-02 15:34:23 -07:00
}
}
return err ;
}
2015-01-16 15:05:55 -05:00
/*
* Fills in the passed counter variables , so you can prepare pagelist metadata
* before calling ceph_encode_locks .
2010-08-02 15:34:23 -07:00
*/
void ceph_count_locks ( struct inode * inode , int * fcntl_count , int * flock_count )
{
2015-02-16 14:32:03 -05:00
struct file_lock * lock ;
2015-01-16 15:05:55 -05:00
struct file_lock_context * ctx ;
2010-08-02 15:34:23 -07:00
* fcntl_count = 0 ;
* flock_count = 0 ;
2015-01-16 15:05:55 -05:00
ctx = inode - > i_flctx ;
if ( ctx ) {
2015-02-16 14:32:03 -05:00
spin_lock ( & ctx - > flc_lock ) ;
list_for_each_entry ( lock , & ctx - > flc_posix , fl_list )
+ + ( * fcntl_count ) ;
list_for_each_entry ( lock , & ctx - > flc_flock , fl_list )
+ + ( * flock_count ) ;
spin_unlock ( & ctx - > flc_lock ) ;
2010-08-02 15:34:23 -07:00
}
dout ( " counted %d flock locks and %d fcntl locks " ,
* flock_count , * fcntl_count ) ;
}
/**
2013-05-15 13:03:35 -05:00
* Encode the flock and fcntl locks for the given inode into the ceph_filelock
2013-06-21 08:58:15 -04:00
* array . Must be called with inode - > i_lock already held .
2013-05-15 13:03:35 -05:00
* If we encounter more of a specific lock type than expected , return - ENOSPC .
2010-08-02 15:34:23 -07:00
*/
2013-05-15 13:03:35 -05:00
int ceph_encode_locks_to_buffer ( struct inode * inode ,
struct ceph_filelock * flocks ,
int num_fcntl_locks , int num_flock_locks )
2010-08-02 15:34:23 -07:00
{
struct file_lock * lock ;
2015-01-16 15:05:55 -05:00
struct file_lock_context * ctx = inode - > i_flctx ;
2010-08-02 15:34:23 -07:00
int err = 0 ;
2010-09-17 10:24:02 -07:00
int seen_fcntl = 0 ;
int seen_flock = 0 ;
2013-05-15 13:03:35 -05:00
int l = 0 ;
2010-08-02 15:34:23 -07:00
dout ( " encoding %d flock and %d fcntl locks " , num_flock_locks ,
num_fcntl_locks ) ;
2013-05-15 13:03:35 -05:00
2015-01-16 15:05:55 -05:00
if ( ! ctx )
return 0 ;
2015-01-16 15:05:57 -05:00
spin_lock ( & ctx - > flc_lock ) ;
2015-07-07 16:18:46 +08:00
list_for_each_entry ( lock , & ctx - > flc_posix , fl_list ) {
2015-01-16 15:05:55 -05:00
+ + seen_fcntl ;
if ( seen_fcntl > num_fcntl_locks ) {
err = - ENOSPC ;
goto fail ;
2010-08-02 15:34:23 -07:00
}
2015-01-16 15:05:55 -05:00
err = lock_to_ceph_filelock ( lock , & flocks [ l ] ) ;
if ( err )
goto fail ;
+ + l ;
2010-08-02 15:34:23 -07:00
}
2015-01-16 15:05:55 -05:00
list_for_each_entry ( lock , & ctx - > flc_flock , fl_list ) {
+ + seen_flock ;
if ( seen_flock > num_flock_locks ) {
err = - ENOSPC ;
goto fail ;
2010-08-02 15:34:23 -07:00
}
2015-01-16 15:05:55 -05:00
err = lock_to_ceph_filelock ( lock , & flocks [ l ] ) ;
if ( err )
goto fail ;
+ + l ;
2010-08-02 15:34:23 -07:00
}
fail :
2015-01-16 15:05:57 -05:00
spin_unlock ( & ctx - > flc_lock ) ;
2010-08-02 15:34:23 -07:00
return err ;
}
2013-05-15 13:03:35 -05:00
/**
* Copy the encoded flock and fcntl locks into the pagelist .
* Format is : # fcntl locks , sequential fcntl locks , # flock locks ,
* sequential flock locks .
* Returns zero on success .
*/
int ceph_locks_to_pagelist ( struct ceph_filelock * flocks ,
struct ceph_pagelist * pagelist ,
int num_fcntl_locks , int num_flock_locks )
{
int err = 0 ;
__le32 nlocks ;
nlocks = cpu_to_le32 ( num_fcntl_locks ) ;
err = ceph_pagelist_append ( pagelist , & nlocks , sizeof ( nlocks ) ) ;
if ( err )
goto out_fail ;
err = ceph_pagelist_append ( pagelist , flocks ,
num_fcntl_locks * sizeof ( * flocks ) ) ;
if ( err )
goto out_fail ;
nlocks = cpu_to_le32 ( num_flock_locks ) ;
err = ceph_pagelist_append ( pagelist , & nlocks , sizeof ( nlocks ) ) ;
if ( err )
goto out_fail ;
err = ceph_pagelist_append ( pagelist ,
& flocks [ num_fcntl_locks ] ,
num_flock_locks * sizeof ( * flocks ) ) ;
out_fail :
return err ;
}
2010-08-02 15:34:23 -07:00
/*
* Given a pointer to a lock , convert it to a ceph filelock
*/
int lock_to_ceph_filelock ( struct file_lock * lock ,
struct ceph_filelock * cephlock )
{
int err = 0 ;
cephlock - > start = cpu_to_le64 ( lock - > fl_start ) ;
cephlock - > length = cpu_to_le64 ( lock - > fl_end - lock - > fl_start + 1 ) ;
cephlock - > client = cpu_to_le64 ( 0 ) ;
2014-03-09 23:16:40 +08:00
cephlock - > pid = cpu_to_le64 ( ( u64 ) lock - > fl_pid ) ;
2014-05-09 14:13:04 -04:00
cephlock - > owner = cpu_to_le64 ( secure_addr ( lock - > fl_owner ) ) ;
2010-08-02 15:34:23 -07:00
switch ( lock - > fl_type ) {
case F_RDLCK :
cephlock - > type = CEPH_LOCK_SHARED ;
break ;
case F_WRLCK :
cephlock - > type = CEPH_LOCK_EXCL ;
break ;
case F_UNLCK :
cephlock - > type = CEPH_LOCK_UNLOCK ;
break ;
default :
dout ( " Have unknown lock type %d " , lock - > fl_type ) ;
err = - EINVAL ;
}
return err ;
}