2019-05-29 07:12:40 -07:00
// SPDX-License-Identifier: GPL-2.0-only
2012-03-15 21:58:34 +00:00
/*
*
* Copyright 2010 Paul Mackerras , IBM Corp . < paulus @ au1 . ibm . com >
* Copyright 2011 David Gibson , IBM Corporation < dwg @ au1 . ibm . com >
2016-02-15 12:55:09 +11:00
* Copyright 2016 Alexey Kardashevskiy , IBM Corporation < aik @ au1 . ibm . com >
2012-03-15 21:58:34 +00:00
*/
# include <linux/types.h>
# include <linux/string.h>
# include <linux/kvm.h>
# include <linux/kvm_host.h>
# include <linux/highmem.h>
# include <linux/gfp.h>
# include <linux/slab.h>
2017-02-08 18:51:30 +01:00
# include <linux/sched/signal.h>
2012-03-15 21:58:34 +00:00
# include <linux/hugetlb.h>
# include <linux/list.h>
# include <linux/anon_inodes.h>
2017-03-22 15:21:56 +11:00
# include <linux/iommu.h>
# include <linux/file.h>
2019-07-16 16:30:54 -07:00
# include <linux/mm.h>
2012-03-15 21:58:34 +00:00
# include <asm/kvm_ppc.h>
# include <asm/kvm_book3s.h>
2016-03-01 12:59:20 +05:30
# include <asm/book3s/64/mmu-hash.h>
2012-03-15 21:58:34 +00:00
# include <asm/hvcall.h>
# include <asm/synch.h>
# include <asm/ppc-opcode.h>
# include <asm/kvm_host.h>
# include <asm/udbg.h>
2016-02-15 12:55:07 +11:00
# include <asm/iommu.h>
2016-02-15 12:55:09 +11:00
# include <asm/tce.h>
2017-03-22 15:21:56 +11:00
# include <asm/mmu_context.h>
2012-03-15 21:58:34 +00:00
2016-03-01 17:54:38 +11:00
static unsigned long kvmppc_tce_pages ( unsigned long iommu_pages )
2012-03-15 21:58:34 +00:00
{
2016-03-01 17:54:38 +11:00
return ALIGN ( iommu_pages * sizeof ( u64 ) , PAGE_SIZE ) / PAGE_SIZE ;
2012-03-15 21:58:34 +00:00
}
2016-02-15 12:55:06 +11:00
static unsigned long kvmppc_stt_pages ( unsigned long tce_pages )
{
unsigned long stt_bytes = sizeof ( struct kvmppc_spapr_tce_table ) +
( tce_pages * sizeof ( struct page * ) ) ;
return tce_pages + ALIGN ( stt_bytes , PAGE_SIZE ) / PAGE_SIZE ;
}
2017-03-22 15:21:56 +11:00
static void kvm_spapr_tce_iommu_table_free ( struct rcu_head * head )
{
struct kvmppc_spapr_tce_iommu_table * stit = container_of ( head ,
struct kvmppc_spapr_tce_iommu_table , rcu ) ;
iommu_tce_table_put ( stit - > tbl ) ;
kfree ( stit ) ;
}
static void kvm_spapr_tce_liobn_put ( struct kref * kref )
{
struct kvmppc_spapr_tce_iommu_table * stit = container_of ( kref ,
struct kvmppc_spapr_tce_iommu_table , kref ) ;
list_del_rcu ( & stit - > next ) ;
call_rcu ( & stit - > rcu , kvm_spapr_tce_iommu_table_free ) ;
}
extern void kvm_spapr_tce_release_iommu_group ( struct kvm * kvm ,
struct iommu_group * grp )
{
int i ;
struct kvmppc_spapr_tce_table * stt ;
struct kvmppc_spapr_tce_iommu_table * stit , * tmp ;
struct iommu_table_group * table_group = NULL ;
list_for_each_entry_rcu ( stt , & kvm - > arch . spapr_tce_tables , list ) {
table_group = iommu_group_get_iommudata ( grp ) ;
if ( WARN_ON ( ! table_group ) )
continue ;
list_for_each_entry_safe ( stit , tmp , & stt - > iommu_tables , next ) {
for ( i = 0 ; i < IOMMU_TABLE_GROUP_MAX_TABLES ; + + i ) {
if ( table_group - > tables [ i ] ! = stit - > tbl )
continue ;
kref_put ( & stit - > kref , kvm_spapr_tce_liobn_put ) ;
}
}
}
}
extern long kvm_spapr_tce_attach_iommu_group ( struct kvm * kvm , int tablefd ,
struct iommu_group * grp )
{
struct kvmppc_spapr_tce_table * stt = NULL ;
bool found = false ;
struct iommu_table * tbl = NULL ;
struct iommu_table_group * table_group ;
long i ;
struct kvmppc_spapr_tce_iommu_table * stit ;
struct fd f ;
f = fdget ( tablefd ) ;
if ( ! f . file )
return - EBADF ;
list_for_each_entry_rcu ( stt , & kvm - > arch . spapr_tce_tables , list ) {
if ( stt = = f . file - > private_data ) {
found = true ;
break ;
}
}
fdput ( f ) ;
if ( ! found )
return - EINVAL ;
table_group = iommu_group_get_iommudata ( grp ) ;
if ( WARN_ON ( ! table_group ) )
return - EFAULT ;
for ( i = 0 ; i < IOMMU_TABLE_GROUP_MAX_TABLES ; + + i ) {
struct iommu_table * tbltmp = table_group - > tables [ i ] ;
if ( ! tbltmp )
continue ;
2018-05-14 20:00:28 +10:00
/* Make sure hardware table parameters are compatible */
if ( ( tbltmp - > it_page_shift < = stt - > page_shift ) & &
( tbltmp - > it_offset < < tbltmp - > it_page_shift = =
stt - > offset < < stt - > page_shift ) & &
2018-06-20 18:42:58 +10:00
( tbltmp - > it_size < < tbltmp - > it_page_shift > =
2018-05-14 20:00:28 +10:00
stt - > size < < stt - > page_shift ) ) {
2017-03-22 15:21:56 +11:00
/*
* Reference the table to avoid races with
* add / remove DMA windows .
*/
tbl = iommu_tce_table_get ( tbltmp ) ;
break ;
}
}
if ( ! tbl )
return - EINVAL ;
list_for_each_entry_rcu ( stit , & stt - > iommu_tables , next ) {
if ( tbl ! = stit - > tbl )
continue ;
if ( ! kref_get_unless_zero ( & stit - > kref ) ) {
/* stit is being destroyed */
iommu_tce_table_put ( tbl ) ;
return - ENOTTY ;
}
/*
* The table is already known to this KVM , we just increased
* its KVM reference counter and can return .
*/
return 0 ;
}
stit = kzalloc ( sizeof ( * stit ) , GFP_KERNEL ) ;
if ( ! stit ) {
iommu_tce_table_put ( tbl ) ;
return - ENOMEM ;
}
stit - > tbl = tbl ;
kref_init ( & stit - > kref ) ;
list_add_rcu ( & stit - > next , & stt - > iommu_tables ) ;
return 0 ;
}
2016-02-15 12:55:05 +11:00
static void release_spapr_tce_table ( struct rcu_head * head )
2012-03-15 21:58:34 +00:00
{
2016-02-15 12:55:05 +11:00
struct kvmppc_spapr_tce_table * stt = container_of ( head ,
struct kvmppc_spapr_tce_table , rcu ) ;
2016-03-01 17:54:38 +11:00
unsigned long i , npages = kvmppc_tce_pages ( stt - > size ) ;
2012-03-15 21:58:34 +00:00
2016-02-15 12:55:06 +11:00
for ( i = 0 ; i < npages ; i + + )
2019-03-29 16:43:26 +11:00
if ( stt - > pages [ i ] )
__free_page ( stt - > pages [ i ] ) ;
2012-03-15 21:58:34 +00:00
2016-02-15 12:55:05 +11:00
kfree ( stt ) ;
2012-03-15 21:58:34 +00:00
}
2019-03-29 16:43:26 +11:00
static struct page * kvm_spapr_get_tce_page ( struct kvmppc_spapr_tce_table * stt ,
unsigned long sttpage )
{
struct page * page = stt - > pages [ sttpage ] ;
if ( page )
return page ;
mutex_lock ( & stt - > alloc_lock ) ;
page = stt - > pages [ sttpage ] ;
if ( ! page ) {
page = alloc_page ( GFP_KERNEL | __GFP_ZERO ) ;
WARN_ON_ONCE ( ! page ) ;
if ( page )
stt - > pages [ sttpage ] = page ;
}
mutex_unlock ( & stt - > alloc_lock ) ;
return page ;
}
2018-05-10 23:57:19 +05:30
static vm_fault_t kvm_spapr_tce_fault ( struct vm_fault * vmf )
2012-03-15 21:58:34 +00:00
{
2017-02-24 14:56:41 -08:00
struct kvmppc_spapr_tce_table * stt = vmf - > vma - > vm_file - > private_data ;
2012-03-15 21:58:34 +00:00
struct page * page ;
2016-03-01 17:54:38 +11:00
if ( vmf - > pgoff > = kvmppc_tce_pages ( stt - > size ) )
2012-03-15 21:58:34 +00:00
return VM_FAULT_SIGBUS ;
2019-03-29 16:43:26 +11:00
page = kvm_spapr_get_tce_page ( stt , vmf - > pgoff ) ;
if ( ! page )
return VM_FAULT_OOM ;
2012-03-15 21:58:34 +00:00
get_page ( page ) ;
vmf - > page = page ;
return 0 ;
}
static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
. fault = kvm_spapr_tce_fault ,
} ;
static int kvm_spapr_tce_mmap ( struct file * file , struct vm_area_struct * vma )
{
vma - > vm_ops = & kvm_spapr_tce_vm_ops ;
return 0 ;
}
static int kvm_spapr_tce_release ( struct inode * inode , struct file * filp )
{
struct kvmppc_spapr_tce_table * stt = filp - > private_data ;
2017-03-22 15:21:56 +11:00
struct kvmppc_spapr_tce_iommu_table * stit , * tmp ;
KVM: PPC: Book3S HV: Protect updates to spapr_tce_tables list
Al Viro pointed out that while one thread of a process is executing
in kvm_vm_ioctl_create_spapr_tce(), another thread could guess the
file descriptor returned by anon_inode_getfd() and close() it before
the first thread has added it to the kvm->arch.spapr_tce_tables list.
That highlights a more general problem: there is no mutual exclusion
between writers to the spapr_tce_tables list, leading to the
possibility of the list becoming corrupted, which could cause a
host kernel crash.
To fix the mutual exclusion problem, we add a mutex_lock/unlock
pair around the list_del_rce in kvm_spapr_tce_release(). Also,
this moves the call to anon_inode_getfd() inside the region
protected by the kvm->lock mutex, after we have done the check for
a duplicate LIOBN. This means that if another thread does guess the
file descriptor and closes it, its call to kvm_spapr_tce_release()
will not do any harm because it will have to wait until the first
thread has released kvm->lock. With this, there are no failure
points in kvm_vm_ioctl_create_spapr_tce() after the call to
anon_inode_getfd().
The other things that the second thread could do with the guessed
file descriptor are to mmap it or to pass it as a parameter to a
KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl on a KVM device fd. An mmap
call won't cause any harm because kvm_spapr_tce_mmap() and
kvm_spapr_tce_fault() don't access the spapr_tce_tables list or
the kvmppc_spapr_tce_table.list field, and the fields that they do use
have been properly initialized by the time of the anon_inode_getfd()
call.
The KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl calls
kvm_spapr_tce_attach_iommu_group(), which scans the spapr_tce_tables
list looking for the kvmppc_spapr_tce_table struct corresponding to
the fd given as the parameter. Either it will find the new entry
or it won't; if it doesn't, it just returns an error, and if it
does, it will function normally. So, in each case there is no
harmful effect.
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2017-08-28 14:31:24 +10:00
struct kvm * kvm = stt - > kvm ;
2012-03-15 21:58:34 +00:00
KVM: PPC: Book3S HV: Protect updates to spapr_tce_tables list
Al Viro pointed out that while one thread of a process is executing
in kvm_vm_ioctl_create_spapr_tce(), another thread could guess the
file descriptor returned by anon_inode_getfd() and close() it before
the first thread has added it to the kvm->arch.spapr_tce_tables list.
That highlights a more general problem: there is no mutual exclusion
between writers to the spapr_tce_tables list, leading to the
possibility of the list becoming corrupted, which could cause a
host kernel crash.
To fix the mutual exclusion problem, we add a mutex_lock/unlock
pair around the list_del_rce in kvm_spapr_tce_release(). Also,
this moves the call to anon_inode_getfd() inside the region
protected by the kvm->lock mutex, after we have done the check for
a duplicate LIOBN. This means that if another thread does guess the
file descriptor and closes it, its call to kvm_spapr_tce_release()
will not do any harm because it will have to wait until the first
thread has released kvm->lock. With this, there are no failure
points in kvm_vm_ioctl_create_spapr_tce() after the call to
anon_inode_getfd().
The other things that the second thread could do with the guessed
file descriptor are to mmap it or to pass it as a parameter to a
KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl on a KVM device fd. An mmap
call won't cause any harm because kvm_spapr_tce_mmap() and
kvm_spapr_tce_fault() don't access the spapr_tce_tables list or
the kvmppc_spapr_tce_table.list field, and the fields that they do use
have been properly initialized by the time of the anon_inode_getfd()
call.
The KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl calls
kvm_spapr_tce_attach_iommu_group(), which scans the spapr_tce_tables
list looking for the kvmppc_spapr_tce_table struct corresponding to
the fd given as the parameter. Either it will find the new entry
or it won't; if it doesn't, it just returns an error, and if it
does, it will function normally. So, in each case there is no
harmful effect.
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2017-08-28 14:31:24 +10:00
mutex_lock ( & kvm - > lock ) ;
2016-02-15 12:55:05 +11:00
list_del_rcu ( & stt - > list ) ;
KVM: PPC: Book3S HV: Protect updates to spapr_tce_tables list
Al Viro pointed out that while one thread of a process is executing
in kvm_vm_ioctl_create_spapr_tce(), another thread could guess the
file descriptor returned by anon_inode_getfd() and close() it before
the first thread has added it to the kvm->arch.spapr_tce_tables list.
That highlights a more general problem: there is no mutual exclusion
between writers to the spapr_tce_tables list, leading to the
possibility of the list becoming corrupted, which could cause a
host kernel crash.
To fix the mutual exclusion problem, we add a mutex_lock/unlock
pair around the list_del_rce in kvm_spapr_tce_release(). Also,
this moves the call to anon_inode_getfd() inside the region
protected by the kvm->lock mutex, after we have done the check for
a duplicate LIOBN. This means that if another thread does guess the
file descriptor and closes it, its call to kvm_spapr_tce_release()
will not do any harm because it will have to wait until the first
thread has released kvm->lock. With this, there are no failure
points in kvm_vm_ioctl_create_spapr_tce() after the call to
anon_inode_getfd().
The other things that the second thread could do with the guessed
file descriptor are to mmap it or to pass it as a parameter to a
KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl on a KVM device fd. An mmap
call won't cause any harm because kvm_spapr_tce_mmap() and
kvm_spapr_tce_fault() don't access the spapr_tce_tables list or
the kvmppc_spapr_tce_table.list field, and the fields that they do use
have been properly initialized by the time of the anon_inode_getfd()
call.
The KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl calls
kvm_spapr_tce_attach_iommu_group(), which scans the spapr_tce_tables
list looking for the kvmppc_spapr_tce_table struct corresponding to
the fd given as the parameter. Either it will find the new entry
or it won't; if it doesn't, it just returns an error, and if it
does, it will function normally. So, in each case there is no
harmful effect.
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2017-08-28 14:31:24 +10:00
mutex_unlock ( & kvm - > lock ) ;
2016-02-15 12:55:05 +11:00
2017-03-22 15:21:56 +11:00
list_for_each_entry_safe ( stit , tmp , & stt - > iommu_tables , next ) {
WARN_ON ( ! kref_read ( & stit - > kref ) ) ;
while ( 1 ) {
if ( kref_put ( & stit - > kref , kvm_spapr_tce_liobn_put ) )
break ;
}
}
2016-02-15 12:55:05 +11:00
kvm_put_kvm ( stt - > kvm ) ;
2019-07-16 16:30:54 -07:00
account_locked_vm ( current - > mm ,
2016-03-01 17:54:38 +11:00
kvmppc_stt_pages ( kvmppc_tce_pages ( stt - > size ) ) , false ) ;
2016-02-15 12:55:05 +11:00
call_rcu ( & stt - > rcu , release_spapr_tce_table ) ;
2012-03-15 21:58:34 +00:00
return 0 ;
}
2013-04-04 19:09:41 -04:00
static const struct file_operations kvm_spapr_tce_fops = {
2012-03-15 21:58:34 +00:00
. mmap = kvm_spapr_tce_mmap ,
. release = kvm_spapr_tce_release ,
} ;
long kvm_vm_ioctl_create_spapr_tce ( struct kvm * kvm ,
2016-03-01 17:54:40 +11:00
struct kvm_create_spapr_tce_64 * args )
2012-03-15 21:58:34 +00:00
{
struct kvmppc_spapr_tce_table * stt = NULL ;
2017-08-24 19:14:47 +10:00
struct kvmppc_spapr_tce_table * siter ;
2018-06-20 18:42:58 +10:00
unsigned long npages , size = args - > size ;
2012-03-15 21:58:34 +00:00
int ret = - ENOMEM ;
2018-05-14 20:00:29 +10:00
if ( ! args - > size | | args - > page_shift < 12 | | args - > page_shift > 34 | |
( args - > offset + args - > size > ( ULLONG_MAX > > args - > page_shift ) ) )
2016-03-01 17:54:40 +11:00
return - EINVAL ;
2016-03-01 17:54:38 +11:00
npages = kvmppc_tce_pages ( size ) ;
2019-07-16 16:30:54 -07:00
ret = account_locked_vm ( current - > mm , kvmppc_stt_pages ( npages ) , true ) ;
2017-08-24 19:14:47 +10:00
if ( ret )
return ret ;
2012-03-15 21:58:34 +00:00
2017-02-08 16:20:01 +00:00
ret = - ENOMEM ;
2012-03-15 21:58:34 +00:00
stt = kzalloc ( sizeof ( * stt ) + npages * sizeof ( struct page * ) ,
GFP_KERNEL ) ;
if ( ! stt )
2017-08-24 19:14:47 +10:00
goto fail_acct ;
2012-03-15 21:58:34 +00:00
stt - > liobn = args - > liobn ;
2016-03-01 17:54:40 +11:00
stt - > page_shift = args - > page_shift ;
stt - > offset = args - > offset ;
2016-03-01 17:54:38 +11:00
stt - > size = size ;
2012-03-15 21:58:34 +00:00
stt - > kvm = kvm ;
2019-03-29 16:43:26 +11:00
mutex_init ( & stt - > alloc_lock ) ;
2017-03-22 15:21:56 +11:00
INIT_LIST_HEAD_RCU ( & stt - > iommu_tables ) ;
2012-03-15 21:58:34 +00:00
mutex_lock ( & kvm - > lock ) ;
2017-08-24 19:14:47 +10:00
/* Check this LIOBN hasn't been previously allocated */
ret = 0 ;
list_for_each_entry ( siter , & kvm - > arch . spapr_tce_tables , list ) {
if ( siter - > liobn = = args - > liobn ) {
ret = - EBUSY ;
break ;
}
}
2019-02-21 14:44:14 +11:00
kvm_get_kvm ( kvm ) ;
KVM: PPC: Book3S HV: Protect updates to spapr_tce_tables list
Al Viro pointed out that while one thread of a process is executing
in kvm_vm_ioctl_create_spapr_tce(), another thread could guess the
file descriptor returned by anon_inode_getfd() and close() it before
the first thread has added it to the kvm->arch.spapr_tce_tables list.
That highlights a more general problem: there is no mutual exclusion
between writers to the spapr_tce_tables list, leading to the
possibility of the list becoming corrupted, which could cause a
host kernel crash.
To fix the mutual exclusion problem, we add a mutex_lock/unlock
pair around the list_del_rce in kvm_spapr_tce_release(). Also,
this moves the call to anon_inode_getfd() inside the region
protected by the kvm->lock mutex, after we have done the check for
a duplicate LIOBN. This means that if another thread does guess the
file descriptor and closes it, its call to kvm_spapr_tce_release()
will not do any harm because it will have to wait until the first
thread has released kvm->lock. With this, there are no failure
points in kvm_vm_ioctl_create_spapr_tce() after the call to
anon_inode_getfd().
The other things that the second thread could do with the guessed
file descriptor are to mmap it or to pass it as a parameter to a
KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl on a KVM device fd. An mmap
call won't cause any harm because kvm_spapr_tce_mmap() and
kvm_spapr_tce_fault() don't access the spapr_tce_tables list or
the kvmppc_spapr_tce_table.list field, and the fields that they do use
have been properly initialized by the time of the anon_inode_getfd()
call.
The KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl calls
kvm_spapr_tce_attach_iommu_group(), which scans the spapr_tce_tables
list looking for the kvmppc_spapr_tce_table struct corresponding to
the fd given as the parameter. Either it will find the new entry
or it won't; if it doesn't, it just returns an error, and if it
does, it will function normally. So, in each case there is no
harmful effect.
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2017-08-28 14:31:24 +10:00
if ( ! ret )
ret = anon_inode_getfd ( " kvm-spapr-tce " , & kvm_spapr_tce_fops ,
stt , O_RDWR | O_CLOEXEC ) ;
2019-02-21 14:44:14 +11:00
if ( ret > = 0 )
2017-08-24 19:14:47 +10:00
list_add_rcu ( & stt - > list , & kvm - > arch . spapr_tce_tables ) ;
2019-02-21 14:44:14 +11:00
else
kvm_put_kvm ( kvm ) ;
2012-03-15 21:58:34 +00:00
mutex_unlock ( & kvm - > lock ) ;
KVM: PPC: Book3S HV: Protect updates to spapr_tce_tables list
Al Viro pointed out that while one thread of a process is executing
in kvm_vm_ioctl_create_spapr_tce(), another thread could guess the
file descriptor returned by anon_inode_getfd() and close() it before
the first thread has added it to the kvm->arch.spapr_tce_tables list.
That highlights a more general problem: there is no mutual exclusion
between writers to the spapr_tce_tables list, leading to the
possibility of the list becoming corrupted, which could cause a
host kernel crash.
To fix the mutual exclusion problem, we add a mutex_lock/unlock
pair around the list_del_rce in kvm_spapr_tce_release(). Also,
this moves the call to anon_inode_getfd() inside the region
protected by the kvm->lock mutex, after we have done the check for
a duplicate LIOBN. This means that if another thread does guess the
file descriptor and closes it, its call to kvm_spapr_tce_release()
will not do any harm because it will have to wait until the first
thread has released kvm->lock. With this, there are no failure
points in kvm_vm_ioctl_create_spapr_tce() after the call to
anon_inode_getfd().
The other things that the second thread could do with the guessed
file descriptor are to mmap it or to pass it as a parameter to a
KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl on a KVM device fd. An mmap
call won't cause any harm because kvm_spapr_tce_mmap() and
kvm_spapr_tce_fault() don't access the spapr_tce_tables list or
the kvmppc_spapr_tce_table.list field, and the fields that they do use
have been properly initialized by the time of the anon_inode_getfd()
call.
The KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl calls
kvm_spapr_tce_attach_iommu_group(), which scans the spapr_tce_tables
list looking for the kvmppc_spapr_tce_table struct corresponding to
the fd given as the parameter. Either it will find the new entry
or it won't; if it doesn't, it just returns an error, and if it
does, it will function normally. So, in each case there is no
harmful effect.
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2017-08-28 14:31:24 +10:00
if ( ret > = 0 )
return ret ;
2012-03-15 21:58:34 +00:00
2017-08-24 19:14:47 +10:00
kfree ( stt ) ;
fail_acct :
2019-07-16 16:30:54 -07:00
account_locked_vm ( current - > mm , kvmppc_stt_pages ( npages ) , false ) ;
2012-03-15 21:58:34 +00:00
return ret ;
}
2016-02-15 12:55:09 +11:00
2019-03-29 16:42:20 +11:00
static long kvmppc_tce_to_ua ( struct kvm * kvm , unsigned long tce ,
unsigned long * ua )
{
unsigned long gfn = tce > > PAGE_SHIFT ;
struct kvm_memory_slot * memslot ;
memslot = search_memslots ( kvm_memslots ( kvm ) , gfn ) ;
if ( ! memslot )
return - EINVAL ;
* ua = __gfn_to_hva_memslot ( memslot , gfn ) |
( tce & ~ ( PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE ) ) ;
return 0 ;
}
2018-09-10 18:29:10 +10:00
static long kvmppc_tce_validate ( struct kvmppc_spapr_tce_table * stt ,
unsigned long tce )
{
unsigned long gpa = tce & ~ ( TCE_PCI_READ | TCE_PCI_WRITE ) ;
enum dma_data_direction dir = iommu_tce_direction ( tce ) ;
struct kvmppc_spapr_tce_iommu_table * stit ;
unsigned long ua = 0 ;
/* Allow userspace to poison TCE table */
if ( dir = = DMA_NONE )
return H_SUCCESS ;
if ( iommu_tce_check_gpa ( stt - > page_shift , gpa ) )
return H_TOO_HARD ;
2019-03-29 16:42:20 +11:00
if ( kvmppc_tce_to_ua ( stt - > kvm , tce , & ua ) )
2018-09-10 18:29:10 +10:00
return H_TOO_HARD ;
list_for_each_entry_rcu ( stit , & stt - > iommu_tables , next ) {
unsigned long hpa = 0 ;
struct mm_iommu_table_group_mem_t * mem ;
long shift = stit - > tbl - > it_page_shift ;
mem = mm_iommu_lookup ( stt - > kvm - > mm , ua , 1ULL < < shift ) ;
if ( ! mem )
return H_TOO_HARD ;
if ( mm_iommu_ua_to_hpa ( mem , ua , shift , & hpa ) )
return H_TOO_HARD ;
}
return H_SUCCESS ;
}
2019-03-29 16:43:26 +11:00
/*
* Handles TCE requests for emulated devices .
* Puts guest TCE values to the table and expects user space to convert them .
* Cannot fail so kvmppc_tce_validate must be called before it .
*/
static void kvmppc_tce_put ( struct kvmppc_spapr_tce_table * stt ,
unsigned long idx , unsigned long tce )
{
struct page * page ;
u64 * tbl ;
unsigned long sttpage ;
idx - = stt - > offset ;
sttpage = idx / TCES_PER_PAGE ;
page = stt - > pages [ sttpage ] ;
if ( ! page ) {
/* We allow any TCE, not just with read|write permissions */
if ( ! tce )
return ;
page = kvm_spapr_get_tce_page ( stt , sttpage ) ;
if ( ! page )
return ;
}
tbl = page_to_virt ( page ) ;
tbl [ idx % TCES_PER_PAGE ] = tce ;
}
2018-12-19 19:52:15 +11:00
static void kvmppc_clear_tce ( struct mm_struct * mm , struct iommu_table * tbl ,
unsigned long entry )
2017-03-22 15:21:56 +11:00
{
unsigned long hpa = 0 ;
enum dma_data_direction dir = DMA_NONE ;
2018-12-19 19:52:15 +11:00
iommu_tce_xchg ( mm , tbl , entry , & hpa , & dir ) ;
2017-03-22 15:21:56 +11:00
}
static long kvmppc_tce_iommu_mapped_dec ( struct kvm * kvm ,
struct iommu_table * tbl , unsigned long entry )
{
struct mm_iommu_table_group_mem_t * mem = NULL ;
const unsigned long pgsize = 1ULL < < tbl - > it_page_shift ;
2018-10-15 21:08:41 +11:00
__be64 * pua = IOMMU_TABLE_USERSPACE_ENTRY_RO ( tbl , entry ) ;
2017-03-22 15:21:56 +11:00
if ( ! pua )
2018-10-15 21:08:41 +11:00
return H_SUCCESS ;
2017-03-22 15:21:56 +11:00
2018-07-04 16:13:46 +10:00
mem = mm_iommu_lookup ( kvm - > mm , be64_to_cpu ( * pua ) , pgsize ) ;
2017-03-22 15:21:56 +11:00
if ( ! mem )
return H_TOO_HARD ;
mm_iommu_mapped_dec ( mem ) ;
2018-07-04 16:13:46 +10:00
* pua = cpu_to_be64 ( 0 ) ;
2017-03-22 15:21:56 +11:00
return H_SUCCESS ;
}
2018-05-14 20:00:28 +10:00
static long kvmppc_tce_iommu_do_unmap ( struct kvm * kvm ,
2017-03-22 15:21:56 +11:00
struct iommu_table * tbl , unsigned long entry )
{
enum dma_data_direction dir = DMA_NONE ;
unsigned long hpa = 0 ;
long ret ;
2018-12-19 19:52:15 +11:00
if ( WARN_ON_ONCE ( iommu_tce_xchg ( kvm - > mm , tbl , entry , & hpa , & dir ) ) )
2018-09-10 18:29:09 +10:00
return H_TOO_HARD ;
2017-03-22 15:21:56 +11:00
if ( dir = = DMA_NONE )
return H_SUCCESS ;
ret = kvmppc_tce_iommu_mapped_dec ( kvm , tbl , entry ) ;
if ( ret ! = H_SUCCESS )
2018-12-19 19:52:15 +11:00
iommu_tce_xchg ( kvm - > mm , tbl , entry , & hpa , & dir ) ;
2017-03-22 15:21:56 +11:00
return ret ;
}
2018-05-14 20:00:28 +10:00
static long kvmppc_tce_iommu_unmap ( struct kvm * kvm ,
struct kvmppc_spapr_tce_table * stt , struct iommu_table * tbl ,
unsigned long entry )
{
unsigned long i , ret = H_SUCCESS ;
unsigned long subpages = 1ULL < < ( stt - > page_shift - tbl - > it_page_shift ) ;
unsigned long io_entry = entry * subpages ;
for ( i = 0 ; i < subpages ; + + i ) {
ret = kvmppc_tce_iommu_do_unmap ( kvm , tbl , io_entry + i ) ;
if ( ret ! = H_SUCCESS )
break ;
}
return ret ;
}
long kvmppc_tce_iommu_do_map ( struct kvm * kvm , struct iommu_table * tbl ,
2017-03-22 15:21:56 +11:00
unsigned long entry , unsigned long ua ,
enum dma_data_direction dir )
{
long ret ;
2018-07-04 16:13:46 +10:00
unsigned long hpa ;
__be64 * pua = IOMMU_TABLE_USERSPACE_ENTRY ( tbl , entry ) ;
2017-03-22 15:21:56 +11:00
struct mm_iommu_table_group_mem_t * mem ;
if ( ! pua )
/* it_userspace allocation might be delayed */
return H_TOO_HARD ;
mem = mm_iommu_lookup ( kvm - > mm , ua , 1ULL < < tbl - > it_page_shift ) ;
if ( ! mem )
/* This only handles v2 IOMMU type, v1 is handled via ioctl() */
return H_TOO_HARD ;
2018-07-17 17:19:13 +10:00
if ( WARN_ON_ONCE ( mm_iommu_ua_to_hpa ( mem , ua , tbl - > it_page_shift , & hpa ) ) )
2018-09-10 18:29:09 +10:00
return H_TOO_HARD ;
2017-03-22 15:21:56 +11:00
if ( mm_iommu_mapped_inc ( mem ) )
2018-09-10 18:29:09 +10:00
return H_TOO_HARD ;
2017-03-22 15:21:56 +11:00
2018-12-19 19:52:15 +11:00
ret = iommu_tce_xchg ( kvm - > mm , tbl , entry , & hpa , & dir ) ;
2017-03-22 15:21:56 +11:00
if ( WARN_ON_ONCE ( ret ) ) {
mm_iommu_mapped_dec ( mem ) ;
2018-09-10 18:29:09 +10:00
return H_TOO_HARD ;
2017-03-22 15:21:56 +11:00
}
if ( dir ! = DMA_NONE )
kvmppc_tce_iommu_mapped_dec ( kvm , tbl , entry ) ;
2018-07-04 16:13:46 +10:00
* pua = cpu_to_be64 ( ua ) ;
2017-03-22 15:21:56 +11:00
return 0 ;
}
2018-05-14 20:00:28 +10:00
static long kvmppc_tce_iommu_map ( struct kvm * kvm ,
struct kvmppc_spapr_tce_table * stt , struct iommu_table * tbl ,
unsigned long entry , unsigned long ua ,
enum dma_data_direction dir )
{
unsigned long i , pgoff , ret = H_SUCCESS ;
unsigned long subpages = 1ULL < < ( stt - > page_shift - tbl - > it_page_shift ) ;
unsigned long io_entry = entry * subpages ;
for ( i = 0 , pgoff = 0 ; i < subpages ;
+ + i , pgoff + = IOMMU_PAGE_SIZE ( tbl ) ) {
ret = kvmppc_tce_iommu_do_map ( kvm , tbl ,
io_entry + i , ua + pgoff , dir ) ;
if ( ret ! = H_SUCCESS )
break ;
}
return ret ;
}
2016-03-18 13:50:42 +11:00
long kvmppc_h_put_tce ( struct kvm_vcpu * vcpu , unsigned long liobn ,
unsigned long ioba , unsigned long tce )
{
2017-03-22 15:21:53 +11:00
struct kvmppc_spapr_tce_table * stt ;
2017-03-22 15:21:56 +11:00
long ret , idx ;
struct kvmppc_spapr_tce_iommu_table * stit ;
unsigned long entry , ua = 0 ;
enum dma_data_direction dir ;
2016-03-18 13:50:42 +11:00
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
/* liobn, ioba, tce); */
2017-03-22 15:21:53 +11:00
stt = kvmppc_find_table ( vcpu - > kvm , liobn ) ;
2016-03-18 13:50:42 +11:00
if ( ! stt )
return H_TOO_HARD ;
ret = kvmppc_ioba_validate ( stt , ioba , 1 ) ;
if ( ret ! = H_SUCCESS )
return ret ;
2019-03-29 16:41:13 +11:00
idx = srcu_read_lock ( & vcpu - > kvm - > srcu ) ;
2016-03-18 13:50:42 +11:00
ret = kvmppc_tce_validate ( stt , tce ) ;
if ( ret ! = H_SUCCESS )
2019-03-29 16:41:13 +11:00
goto unlock_exit ;
2016-03-18 13:50:42 +11:00
2017-03-22 15:21:56 +11:00
dir = iommu_tce_direction ( tce ) ;
2017-10-11 16:00:34 +11:00
2019-03-29 16:42:20 +11:00
if ( ( dir ! = DMA_NONE ) & & kvmppc_tce_to_ua ( vcpu - > kvm , tce , & ua ) ) {
2017-10-11 16:00:34 +11:00
ret = H_PARAMETER ;
goto unlock_exit ;
}
2017-03-22 15:21:56 +11:00
entry = ioba > > stt - > page_shift ;
list_for_each_entry_lockless ( stit , & stt - > iommu_tables , next ) {
2017-10-11 16:00:34 +11:00
if ( dir = = DMA_NONE )
2018-05-14 20:00:28 +10:00
ret = kvmppc_tce_iommu_unmap ( vcpu - > kvm , stt ,
2017-03-22 15:21:56 +11:00
stit - > tbl , entry ) ;
2017-10-11 16:00:34 +11:00
else
2018-05-14 20:00:28 +10:00
ret = kvmppc_tce_iommu_map ( vcpu - > kvm , stt , stit - > tbl ,
2017-03-22 15:21:56 +11:00
entry , ua , dir ) ;
2018-09-10 18:29:11 +10:00
if ( ret ! = H_SUCCESS ) {
2018-12-19 19:52:15 +11:00
kvmppc_clear_tce ( vcpu - > kvm - > mm , stit - > tbl , entry ) ;
2017-10-11 16:00:34 +11:00
goto unlock_exit ;
2018-09-10 18:29:11 +10:00
}
2017-03-22 15:21:56 +11:00
}
kvmppc_tce_put ( stt , entry , tce ) ;
2016-03-18 13:50:42 +11:00
2017-10-11 16:00:34 +11:00
unlock_exit :
srcu_read_unlock ( & vcpu - > kvm - > srcu , idx ) ;
return ret ;
2016-03-18 13:50:42 +11:00
}
EXPORT_SYMBOL_GPL ( kvmppc_h_put_tce ) ;
2016-02-15 12:55:09 +11:00
long kvmppc_h_put_tce_indirect ( struct kvm_vcpu * vcpu ,
unsigned long liobn , unsigned long ioba ,
unsigned long tce_list , unsigned long npages )
{
struct kvmppc_spapr_tce_table * stt ;
long i , ret = H_SUCCESS , idx ;
unsigned long entry , ua = 0 ;
2016-07-12 10:54:48 +10:00
u64 __user * tces ;
u64 tce ;
2017-03-22 15:21:56 +11:00
struct kvmppc_spapr_tce_iommu_table * stit ;
2016-02-15 12:55:09 +11:00
2017-03-22 15:21:53 +11:00
stt = kvmppc_find_table ( vcpu - > kvm , liobn ) ;
2016-02-15 12:55:09 +11:00
if ( ! stt )
return H_TOO_HARD ;
2016-03-01 17:54:38 +11:00
entry = ioba > > stt - > page_shift ;
2016-02-15 12:55:09 +11:00
/*
* SPAPR spec says that the maximum size of the list is 512 TCEs
* so the whole table fits in 4 K page
*/
if ( npages > 512 )
return H_PARAMETER ;
if ( tce_list & ( SZ_4K - 1 ) )
return H_PARAMETER ;
ret = kvmppc_ioba_validate ( stt , ioba , npages ) ;
if ( ret ! = H_SUCCESS )
return ret ;
idx = srcu_read_lock ( & vcpu - > kvm - > srcu ) ;
2019-03-29 16:42:20 +11:00
if ( kvmppc_tce_to_ua ( vcpu - > kvm , tce_list , & ua ) ) {
2016-02-15 12:55:09 +11:00
ret = H_TOO_HARD ;
goto unlock_exit ;
}
tces = ( u64 __user * ) ua ;
for ( i = 0 ; i < npages ; + + i ) {
if ( get_user ( tce , tces + i ) ) {
ret = H_TOO_HARD ;
goto unlock_exit ;
}
tce = be64_to_cpu ( tce ) ;
ret = kvmppc_tce_validate ( stt , tce ) ;
if ( ret ! = H_SUCCESS )
goto unlock_exit ;
2018-09-10 18:29:08 +10:00
}
for ( i = 0 ; i < npages ; + + i ) {
/*
* This looks unsafe , because we validate , then regrab
* the TCE from userspace which could have been changed by
* another thread .
*
* But it actually is safe , because the relevant checks will be
* re - executed in the following code . If userspace tries to
* change this dodgily it will result in a messier failure mode
* but won ' t threaten the host .
*/
if ( get_user ( tce , tces + i ) ) {
ret = H_TOO_HARD ;
goto unlock_exit ;
}
tce = be64_to_cpu ( tce ) ;
2016-02-15 12:55:09 +11:00
2019-03-29 16:42:20 +11:00
if ( kvmppc_tce_to_ua ( vcpu - > kvm , tce , & ua ) )
2017-03-22 15:21:56 +11:00
return H_PARAMETER ;
list_for_each_entry_lockless ( stit , & stt - > iommu_tables , next ) {
2018-05-14 20:00:28 +10:00
ret = kvmppc_tce_iommu_map ( vcpu - > kvm , stt ,
2017-03-22 15:21:56 +11:00
stit - > tbl , entry + i , ua ,
iommu_tce_direction ( tce ) ) ;
2018-09-10 18:29:11 +10:00
if ( ret ! = H_SUCCESS ) {
2018-12-19 19:52:15 +11:00
kvmppc_clear_tce ( vcpu - > kvm - > mm , stit - > tbl ,
entry ) ;
2017-03-22 15:21:56 +11:00
goto unlock_exit ;
2018-09-10 18:29:11 +10:00
}
2017-03-22 15:21:56 +11:00
}
2016-02-15 12:55:09 +11:00
kvmppc_tce_put ( stt , entry + i , tce ) ;
}
unlock_exit :
srcu_read_unlock ( & vcpu - > kvm - > srcu , idx ) ;
return ret ;
}
EXPORT_SYMBOL_GPL ( kvmppc_h_put_tce_indirect ) ;
2016-03-18 13:50:42 +11:00
long kvmppc_h_stuff_tce ( struct kvm_vcpu * vcpu ,
unsigned long liobn , unsigned long ioba ,
unsigned long tce_value , unsigned long npages )
{
struct kvmppc_spapr_tce_table * stt ;
long i , ret ;
2017-03-22 15:21:56 +11:00
struct kvmppc_spapr_tce_iommu_table * stit ;
2016-03-18 13:50:42 +11:00
2017-03-22 15:21:53 +11:00
stt = kvmppc_find_table ( vcpu - > kvm , liobn ) ;
2016-03-18 13:50:42 +11:00
if ( ! stt )
return H_TOO_HARD ;
ret = kvmppc_ioba_validate ( stt , ioba , npages ) ;
if ( ret ! = H_SUCCESS )
return ret ;
/* Check permission bits only to allow userspace poison TCE for debug */
if ( tce_value & ( TCE_PCI_WRITE | TCE_PCI_READ ) )
return H_PARAMETER ;
2017-03-22 15:21:56 +11:00
list_for_each_entry_lockless ( stit , & stt - > iommu_tables , next ) {
2018-05-14 20:00:27 +10:00
unsigned long entry = ioba > > stt - > page_shift ;
2017-03-22 15:21:56 +11:00
for ( i = 0 ; i < npages ; + + i ) {
2018-05-14 20:00:28 +10:00
ret = kvmppc_tce_iommu_unmap ( vcpu - > kvm , stt ,
2017-03-22 15:21:56 +11:00
stit - > tbl , entry + i ) ;
if ( ret = = H_SUCCESS )
continue ;
if ( ret = = H_TOO_HARD )
return ret ;
WARN_ON_ONCE ( 1 ) ;
2018-12-19 19:52:15 +11:00
kvmppc_clear_tce ( vcpu - > kvm - > mm , stit - > tbl , entry ) ;
2017-03-22 15:21:56 +11:00
}
}
2016-03-18 13:50:42 +11:00
for ( i = 0 ; i < npages ; + + i , ioba + = ( 1ULL < < stt - > page_shift ) )
kvmppc_tce_put ( stt , ioba > > stt - > page_shift , tce_value ) ;
return H_SUCCESS ;
}
EXPORT_SYMBOL_GPL ( kvmppc_h_stuff_tce ) ;