xen: features and fixes for 3.19-rc0

- Fully support non-coherent devices on ARM by introducing the
   mechanisms to request the hypervisor to perform the required cache
   maintainance operations.
 
 - A number of pciback bug fixes and cleanups.  Notably a deadlock fix
   if a PCI device was manually uunbound and a fix for incorrectly
   restoring state after a function reset.
 
 - In x86 PVHVM guests, use the APIC for interrupts if this has been
   virtualized by the hardware.  This reduces the number of interrupt-
   related VM exits on such hardware.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQEcBAABAgAGBQJUiYb+AAoJEFxbo/MsZsTRwmEH+gNaJz5r8gIJlq8Q51+nOIs4
 Gw6HdjUB5MOT47vDV4treEOx0Bk8hYTfgWUWvAC81JMJ1sMWOVrUGuG/0lmzaomW
 zXvSk+o0n4LafwEhHb8LIccZMbaH7f9o3PNdNchrTkPrIl8Gf2nmBXCkDsT4mRye
 5ZFpc4ntgBrznh3baPYDS8PCAmlyZ0uVEnz1ofYI6S80dC13siEiPG0c9TrNEKzO
 glhvgCRmR0C4ZNLblM36HWBEqrdLuGCoNJSH+7okygyP2TLD3aO4R+9aD5JWYNdf
 fO2WmivX/zK+UGVAElrLx+rb8R2dv3ddeaE5piZhIBUieopIWJd32L3LhQORdtc=
 =N6DP
 -----END PGP SIGNATURE-----

Merge tag 'stable/for-linus-3.19-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen features and fixes from David Vrabel:

 - Fully support non-coherent devices on ARM by introducing the
   mechanisms to request the hypervisor to perform the required cache
   maintainance operations.

 - A number of pciback bug fixes and cleanups.  Notably a deadlock fix
   if a PCI device was manually uunbound and a fix for incorrectly
   restoring state after a function reset.

 - In x86 PVHVM guests, use the APIC for interrupts if this has been
   virtualized by the hardware.  This reduces the number of interrupt-
   related VM exits on such hardware.

* tag 'stable/for-linus-3.19-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (26 commits)
  Revert "swiotlb-xen: pass dev_addr to swiotlb_tbl_unmap_single"
  xen/pci: Use APIC directly when APIC virtualization hardware is available
  xen/pci: Defer initialization of MSI ops on HVM guests
  xen-pciback: drop SR-IOV VFs when PF driver unloads
  xen/pciback: Restore configuration space when detaching from a guest.
  PCI: Expose pci_load_saved_state for public consumption.
  xen/pciback: Remove tons of dereferences
  xen/pciback: Print out the domain owning the device.
  xen/pciback: Include the domain id if removing the device whilst still in use
  driver core: Provide an wrapper around the mutex to do lockdep warnings
  xen/pciback: Don't deadlock when unbinding.
  swiotlb-xen: pass dev_addr to swiotlb_tbl_unmap_single
  swiotlb-xen: call xen_dma_sync_single_for_device when appropriate
  swiotlb-xen: remove BUG_ON in xen_bus_to_phys
  swiotlb-xen: pass dev_addr to xen_dma_unmap_page and xen_dma_sync_single_for_cpu
  xen/arm: introduce GNTTABOP_cache_flush
  xen/arm/arm64: introduce xen_arch_need_swiotlb
  xen/arm/arm64: merge xen/mm32.c into xen/mm.c
  xen/arm: use hypercall to flush caches in map_page
  xen: add a dma_addr_t dev_addr argument to xen_dma_map_page
  ...
This commit is contained in:
Linus Torvalds 2014-12-11 18:15:33 -08:00
commit 9d050966e2
26 changed files with 488 additions and 309 deletions

View File

@ -17,6 +17,7 @@ struct dev_archdata {
#ifdef CONFIG_ARM_DMA_USE_IOMMU
struct dma_iommu_mapping *mapping;
#endif
bool dma_coherent;
};
struct omap_device;

View File

@ -123,11 +123,18 @@ static inline unsigned long dma_max_pfn(struct device *dev)
static inline int set_arch_dma_coherent_ops(struct device *dev)
{
dev->archdata.dma_coherent = true;
set_dma_ops(dev, &arm_coherent_dma_ops);
return 0;
}
#define set_arch_dma_coherent_ops(dev) set_arch_dma_coherent_ops(dev)
/* do not use this function in a driver */
static inline bool is_device_dma_coherent(struct device *dev)
{
return dev->archdata.dma_coherent;
}
static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
{
unsigned int offset = paddr & ~PAGE_MASK;

View File

@ -5,6 +5,18 @@
#include <linux/dma-attrs.h>
#include <linux/dma-mapping.h>
void __xen_dma_map_page(struct device *hwdev, struct page *page,
dma_addr_t dev_addr, unsigned long offset, size_t size,
enum dma_data_direction dir, struct dma_attrs *attrs);
void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs);
void __xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir);
void __xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir);
static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
struct dma_attrs *attrs)
@ -20,20 +32,56 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
}
static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
dma_addr_t dev_addr, unsigned long offset, size_t size,
enum dma_data_direction dir, struct dma_attrs *attrs)
{
__generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
bool local = PFN_DOWN(dev_addr) == page_to_pfn(page);
/* Dom0 is mapped 1:1, so if pfn == mfn the page is local otherwise
* is a foreign page grant-mapped in dom0. If the page is local we
* can safely call the native dma_ops function, otherwise we call
* the xen specific function. */
if (local)
__generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
else
__xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
}
void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs);
struct dma_attrs *attrs)
{
unsigned long pfn = PFN_DOWN(handle);
/* Dom0 is mapped 1:1, so calling pfn_valid on a foreign mfn will
* always return false. If the page is local we can safely call the
* native dma_ops function, otherwise we call the xen specific
* function. */
if (pfn_valid(pfn)) {
if (__generic_dma_ops(hwdev)->unmap_page)
__generic_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs);
} else
__xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
}
void xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir);
static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
unsigned long pfn = PFN_DOWN(handle);
if (pfn_valid(pfn)) {
if (__generic_dma_ops(hwdev)->sync_single_for_cpu)
__generic_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir);
} else
__xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
}
void xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir);
static inline void xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
unsigned long pfn = PFN_DOWN(handle);
if (pfn_valid(pfn)) {
if (__generic_dma_ops(hwdev)->sync_single_for_device)
__generic_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir);
} else
__xen_dma_sync_single_for_device(hwdev, handle, size, dir);
}
#endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */

View File

@ -107,4 +107,8 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
#define xen_remap(cookie, size) ioremap_cache((cookie), (size))
#define xen_unmap(cookie) iounmap((cookie))
bool xen_arch_need_swiotlb(struct device *dev,
unsigned long pfn,
unsigned long mfn);
#endif /* _ASM_ARM_XEN_PAGE_H */

View File

@ -1 +1 @@
obj-y := enlighten.o hypercall.o grant-table.o p2m.o mm.o mm32.o
obj-y := enlighten.o hypercall.o grant-table.o p2m.o mm.o

View File

@ -261,11 +261,6 @@ static int __init xen_guest_init(void)
xen_setup_features();
if (!xen_feature(XENFEAT_grant_map_identity)) {
pr_warn("Please upgrade your Xen.\n"
"If your platform has any non-coherent DMA devices, they won't work properly.\n");
}
if (xen_feature(XENFEAT_dom0))
xen_start_info->flags |= SIF_INITDOMAIN|SIF_PRIVILEGED;
else

View File

@ -1,6 +1,10 @@
#include <linux/cpu.h>
#include <linux/dma-mapping.h>
#include <linux/bootmem.h>
#include <linux/gfp.h>
#include <linux/highmem.h>
#include <linux/export.h>
#include <linux/of_address.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/dma-mapping.h>
@ -8,6 +12,7 @@
#include <linux/swiotlb.h>
#include <xen/xen.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/memory.h>
#include <xen/swiotlb-xen.h>
@ -16,6 +21,114 @@
#include <asm/xen/hypercall.h>
#include <asm/xen/interface.h>
enum dma_cache_op {
DMA_UNMAP,
DMA_MAP,
};
static bool hypercall_cflush = false;
/* functions called by SWIOTLB */
static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
size_t size, enum dma_data_direction dir, enum dma_cache_op op)
{
struct gnttab_cache_flush cflush;
unsigned long pfn;
size_t left = size;
pfn = (handle >> PAGE_SHIFT) + offset / PAGE_SIZE;
offset %= PAGE_SIZE;
do {
size_t len = left;
/* buffers in highmem or foreign pages cannot cross page
* boundaries */
if (len + offset > PAGE_SIZE)
len = PAGE_SIZE - offset;
cflush.op = 0;
cflush.a.dev_bus_addr = pfn << PAGE_SHIFT;
cflush.offset = offset;
cflush.length = len;
if (op == DMA_UNMAP && dir != DMA_TO_DEVICE)
cflush.op = GNTTAB_CACHE_INVAL;
if (op == DMA_MAP) {
if (dir == DMA_FROM_DEVICE)
cflush.op = GNTTAB_CACHE_INVAL;
else
cflush.op = GNTTAB_CACHE_CLEAN;
}
if (cflush.op)
HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1);
offset = 0;
pfn++;
left -= len;
} while (left);
}
static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir)
{
dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_UNMAP);
}
static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir)
{
dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, DMA_MAP);
}
void __xen_dma_map_page(struct device *hwdev, struct page *page,
dma_addr_t dev_addr, unsigned long offset, size_t size,
enum dma_data_direction dir, struct dma_attrs *attrs)
{
if (is_device_dma_coherent(hwdev))
return;
if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
return;
__xen_dma_page_cpu_to_dev(hwdev, dev_addr, size, dir);
}
void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
if (is_device_dma_coherent(hwdev))
return;
if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
return;
__xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
}
void __xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
if (is_device_dma_coherent(hwdev))
return;
__xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
}
void __xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
if (is_device_dma_coherent(hwdev))
return;
__xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
}
bool xen_arch_need_swiotlb(struct device *dev,
unsigned long pfn,
unsigned long mfn)
{
return (!hypercall_cflush && (pfn != mfn) && !is_device_dma_coherent(dev));
}
int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
unsigned int address_bits,
dma_addr_t *dma_handle)
@ -56,10 +169,18 @@ static struct dma_map_ops xen_swiotlb_dma_ops = {
int __init xen_mm_init(void)
{
struct gnttab_cache_flush cflush;
if (!xen_initial_domain())
return 0;
xen_swiotlb_init(1, false);
xen_dma_ops = &xen_swiotlb_dma_ops;
cflush.op = 0;
cflush.a.dev_bus_addr = 0;
cflush.offset = 0;
cflush.length = 0;
if (HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1) != -ENOSYS)
hypercall_cflush = true;
return 0;
}
arch_initcall(xen_mm_init);

View File

@ -1,202 +0,0 @@
#include <linux/cpu.h>
#include <linux/dma-mapping.h>
#include <linux/gfp.h>
#include <linux/highmem.h>
#include <xen/features.h>
static DEFINE_PER_CPU(unsigned long, xen_mm32_scratch_virt);
static DEFINE_PER_CPU(pte_t *, xen_mm32_scratch_ptep);
static int alloc_xen_mm32_scratch_page(int cpu)
{
struct page *page;
unsigned long virt;
pmd_t *pmdp;
pte_t *ptep;
if (per_cpu(xen_mm32_scratch_ptep, cpu) != NULL)
return 0;
page = alloc_page(GFP_KERNEL);
if (page == NULL) {
pr_warn("Failed to allocate xen_mm32_scratch_page for cpu %d\n", cpu);
return -ENOMEM;
}
virt = (unsigned long)__va(page_to_phys(page));
pmdp = pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt);
ptep = pte_offset_kernel(pmdp, virt);
per_cpu(xen_mm32_scratch_virt, cpu) = virt;
per_cpu(xen_mm32_scratch_ptep, cpu) = ptep;
return 0;
}
static int xen_mm32_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
int cpu = (long)hcpu;
switch (action) {
case CPU_UP_PREPARE:
if (alloc_xen_mm32_scratch_page(cpu))
return NOTIFY_BAD;
break;
default:
break;
}
return NOTIFY_OK;
}
static struct notifier_block xen_mm32_cpu_notifier = {
.notifier_call = xen_mm32_cpu_notify,
};
static void* xen_mm32_remap_page(dma_addr_t handle)
{
unsigned long virt = get_cpu_var(xen_mm32_scratch_virt);
pte_t *ptep = __get_cpu_var(xen_mm32_scratch_ptep);
*ptep = pfn_pte(handle >> PAGE_SHIFT, PAGE_KERNEL);
local_flush_tlb_kernel_page(virt);
return (void*)virt;
}
static void xen_mm32_unmap(void *vaddr)
{
put_cpu_var(xen_mm32_scratch_virt);
}
/* functions called by SWIOTLB */
static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
size_t size, enum dma_data_direction dir,
void (*op)(const void *, size_t, int))
{
unsigned long pfn;
size_t left = size;
pfn = (handle >> PAGE_SHIFT) + offset / PAGE_SIZE;
offset %= PAGE_SIZE;
do {
size_t len = left;
void *vaddr;
if (!pfn_valid(pfn))
{
/* Cannot map the page, we don't know its physical address.
* Return and hope for the best */
if (!xen_feature(XENFEAT_grant_map_identity))
return;
vaddr = xen_mm32_remap_page(handle) + offset;
op(vaddr, len, dir);
xen_mm32_unmap(vaddr - offset);
} else {
struct page *page = pfn_to_page(pfn);
if (PageHighMem(page)) {
if (len + offset > PAGE_SIZE)
len = PAGE_SIZE - offset;
if (cache_is_vipt_nonaliasing()) {
vaddr = kmap_atomic(page);
op(vaddr + offset, len, dir);
kunmap_atomic(vaddr);
} else {
vaddr = kmap_high_get(page);
if (vaddr) {
op(vaddr + offset, len, dir);
kunmap_high(page);
}
}
} else {
vaddr = page_address(page) + offset;
op(vaddr, len, dir);
}
}
offset = 0;
pfn++;
left -= len;
} while (left);
}
static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir)
{
/* Cannot use __dma_page_dev_to_cpu because we don't have a
* struct page for handle */
if (dir != DMA_TO_DEVICE)
outer_inv_range(handle, handle + size);
dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_unmap_area);
}
static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir)
{
dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_map_area);
if (dir == DMA_FROM_DEVICE) {
outer_inv_range(handle, handle + size);
} else {
outer_clean_range(handle, handle + size);
}
}
void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
if (!__generic_dma_ops(hwdev)->unmap_page)
return;
if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
return;
__xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
}
void xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
if (!__generic_dma_ops(hwdev)->sync_single_for_cpu)
return;
__xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
}
void xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
if (!__generic_dma_ops(hwdev)->sync_single_for_device)
return;
__xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
}
int __init xen_mm32_init(void)
{
int cpu;
if (!xen_initial_domain())
return 0;
register_cpu_notifier(&xen_mm32_cpu_notifier);
get_online_cpus();
for_each_online_cpu(cpu) {
if (alloc_xen_mm32_scratch_page(cpu)) {
put_online_cpus();
unregister_cpu_notifier(&xen_mm32_cpu_notifier);
return -ENOMEM;
}
}
put_online_cpus();
return 0;
}
arch_initcall(xen_mm32_init);

View File

@ -21,6 +21,7 @@ struct dev_archdata {
#ifdef CONFIG_IOMMU_API
void *iommu; /* private IOMMU data */
#endif
bool dma_coherent;
};
struct pdev_archdata {

View File

@ -54,11 +54,18 @@ static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
static inline int set_arch_dma_coherent_ops(struct device *dev)
{
dev->archdata.dma_coherent = true;
set_dma_ops(dev, &coherent_swiotlb_dma_ops);
return 0;
}
#define set_arch_dma_coherent_ops set_arch_dma_coherent_ops
/* do not use this function in a driver */
static inline bool is_device_dma_coherent(struct device *dev)
{
return dev->archdata.dma_coherent;
}
#include <asm-generic/dma-mapping-common.h>
static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)

View File

@ -1,43 +1 @@
#ifndef _ASM_ARM64_XEN_PAGE_COHERENT_H
#define _ASM_ARM64_XEN_PAGE_COHERENT_H
#include <asm/page.h>
#include <linux/dma-attrs.h>
#include <linux/dma-mapping.h>
static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
struct dma_attrs *attrs)
{
return __generic_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs);
}
static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
void *cpu_addr, dma_addr_t dma_handle,
struct dma_attrs *attrs)
{
__generic_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs);
}
static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
}
static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
}
static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
}
static inline void xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
}
#endif /* _ASM_ARM64_XEN_PAGE_COHERENT_H */
#include <../../arm/include/asm/xen/page-coherent.h>

View File

@ -0,0 +1,91 @@
/******************************************************************************
* arch-x86/cpuid.h
*
* CPUID interface to Xen.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Copyright (c) 2007 Citrix Systems, Inc.
*
* Authors:
* Keir Fraser <keir@xen.org>
*/
#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__
#define __XEN_PUBLIC_ARCH_X86_CPUID_H__
/*
* For compatibility with other hypervisor interfaces, the Xen cpuid leaves
* can be found at the first otherwise unused 0x100 aligned boundary starting
* from 0x40000000.
*
* e.g If viridian extensions are enabled for an HVM domain, the Xen cpuid
* leaves will start at 0x40000100
*/
#define XEN_CPUID_FIRST_LEAF 0x40000000
#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i))
/*
* Leaf 1 (0x40000x00)
* EAX: Largest Xen-information leaf. All leaves up to an including @EAX
* are supported by the Xen host.
* EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification
* of a Xen host.
*/
#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */
#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */
#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */
/*
* Leaf 2 (0x40000x01)
* EAX[31:16]: Xen major version.
* EAX[15: 0]: Xen minor version.
* EBX-EDX: Reserved (currently all zeroes).
*/
/*
* Leaf 3 (0x40000x02)
* EAX: Number of hypercall transfer pages. This register is always guaranteed
* to specify one hypercall page.
* EBX: Base address of Xen-specific MSRs.
* ECX: Features 1. Unused bits are set to zero.
* EDX: Features 2. Unused bits are set to zero.
*/
/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */
#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0)
/*
* Leaf 5 (0x40000x04)
* HVM-specific features
*/
/* EAX Features */
/* Virtualized APIC registers */
#define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0)
/* Virtualized x2APIC accesses */
#define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1)
/* Memory mapped from other domains has valid IOMMU entries */
#define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2)
#define XEN_CPUID_MAX_NUM_LEAVES 4
#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */

View File

@ -22,8 +22,8 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
}
static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs) { }
dma_addr_t dev_addr, unsigned long offset, size_t size,
enum dma_data_direction dir, struct dma_attrs *attrs) { }
static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,

View File

@ -236,4 +236,11 @@ void make_lowmem_page_readwrite(void *vaddr);
#define xen_remap(cookie, size) ioremap((cookie), (size));
#define xen_unmap(cookie) iounmap((cookie))
static inline bool xen_arch_need_swiotlb(struct device *dev,
unsigned long pfn,
unsigned long mfn)
{
return false;
}
#endif /* _ASM_X86_XEN_PAGE_H */

View File

@ -23,6 +23,8 @@
#include <xen/features.h>
#include <xen/events.h>
#include <asm/xen/pci.h>
#include <asm/xen/cpuid.h>
#include <asm/apic.h>
#include <asm/i8259.h>
static int xen_pcifront_enable_irq(struct pci_dev *dev)
@ -423,6 +425,28 @@ int __init pci_xen_init(void)
return 0;
}
#ifdef CONFIG_PCI_MSI
void __init xen_msi_init(void)
{
if (!disable_apic) {
/*
* If hardware supports (x2)APIC virtualization (as indicated
* by hypervisor's leaf 4) then we don't need to use pirqs/
* event channels for MSI handling and instead use regular
* APIC processing
*/
uint32_t eax = cpuid_eax(xen_cpuid_base() + 4);
if (((eax & XEN_HVM_CPUID_X2APIC_VIRT) && x2apic_mode) ||
((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && cpu_has_apic))
return;
}
x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs;
x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
}
#endif
int __init pci_xen_hvm_init(void)
{
if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs))
@ -437,8 +461,11 @@ int __init pci_xen_hvm_init(void)
#endif
#ifdef CONFIG_PCI_MSI
x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs;
x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
/*
* We need to wait until after x2apic is initialized
* before we can set MSI IRQ ops.
*/
x86_platform.apic_post_init = xen_msi_init;
#endif
return 0;
}

View File

@ -1138,8 +1138,8 @@ EXPORT_SYMBOL_GPL(pci_store_saved_state);
* @dev: PCI device that we're dealing with
* @state: Saved state returned from pci_store_saved_state()
*/
static int pci_load_saved_state(struct pci_dev *dev,
struct pci_saved_state *state)
int pci_load_saved_state(struct pci_dev *dev,
struct pci_saved_state *state)
{
struct pci_cap_saved_data *cap;
@ -1167,6 +1167,7 @@ static int pci_load_saved_state(struct pci_dev *dev,
dev->state_saved = true;
return 0;
}
EXPORT_SYMBOL_GPL(pci_load_saved_state);
/**
* pci_load_and_free_saved_state - Reload the save state pointed to by state,

View File

@ -96,8 +96,6 @@ static inline phys_addr_t xen_bus_to_phys(dma_addr_t baddr)
dma_addr_t dma = (dma_addr_t)pfn << PAGE_SHIFT;
phys_addr_t paddr = dma;
BUG_ON(paddr != dma); /* truncation has occurred, should never happen */
paddr |= baddr & ~PAGE_MASK;
return paddr;
@ -399,11 +397,13 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
* buffering it.
*/
if (dma_capable(dev, dev_addr, size) &&
!range_straddles_page_boundary(phys, size) && !swiotlb_force) {
!range_straddles_page_boundary(phys, size) &&
!xen_arch_need_swiotlb(dev, PFN_DOWN(phys), PFN_DOWN(dev_addr)) &&
!swiotlb_force) {
/* we are not interested in the dma_addr returned by
* xen_dma_map_page, only in the potential cache flushes executed
* by the function. */
xen_dma_map_page(dev, page, offset, size, dir, attrs);
xen_dma_map_page(dev, page, dev_addr, offset, size, dir, attrs);
return dev_addr;
}
@ -417,7 +417,7 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
return DMA_ERROR_CODE;
xen_dma_map_page(dev, pfn_to_page(map >> PAGE_SHIFT),
map & ~PAGE_MASK, size, dir, attrs);
dev_addr, map & ~PAGE_MASK, size, dir, attrs);
dev_addr = xen_phys_to_bus(map);
/*
@ -447,7 +447,7 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
BUG_ON(dir == DMA_NONE);
xen_dma_unmap_page(hwdev, paddr, size, dir, attrs);
xen_dma_unmap_page(hwdev, dev_addr, size, dir, attrs);
/* NOTE: We use dev_addr here, not paddr! */
if (is_xen_swiotlb_buffer(dev_addr)) {
@ -495,14 +495,14 @@ xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
BUG_ON(dir == DMA_NONE);
if (target == SYNC_FOR_CPU)
xen_dma_sync_single_for_cpu(hwdev, paddr, size, dir);
xen_dma_sync_single_for_cpu(hwdev, dev_addr, size, dir);
/* NOTE: We use dev_addr here, not paddr! */
if (is_xen_swiotlb_buffer(dev_addr))
swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
if (target == SYNC_FOR_DEVICE)
xen_dma_sync_single_for_cpu(hwdev, paddr, size, dir);
xen_dma_sync_single_for_device(hwdev, dev_addr, size, dir);
if (dir != DMA_FROM_DEVICE)
return;
@ -557,6 +557,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
dma_addr_t dev_addr = xen_phys_to_bus(paddr);
if (swiotlb_force ||
xen_arch_need_swiotlb(hwdev, PFN_DOWN(paddr), PFN_DOWN(dev_addr)) ||
!dma_capable(hwdev, dev_addr, sg->length) ||
range_straddles_page_boundary(paddr, sg->length)) {
phys_addr_t map = swiotlb_tbl_map_single(hwdev,
@ -574,6 +575,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
return 0;
}
xen_dma_map_page(hwdev, pfn_to_page(map >> PAGE_SHIFT),
dev_addr,
map & ~PAGE_MASK,
sg->length,
dir,
@ -584,6 +586,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
* xen_dma_map_page, only in the potential cache flushes executed
* by the function. */
xen_dma_map_page(hwdev, pfn_to_page(paddr >> PAGE_SHIFT),
dev_addr,
paddr & ~PAGE_MASK,
sg->length,
dir,

View File

@ -69,7 +69,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
}
static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev)
struct pci_dev *dev, bool lock)
{
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry, *t;
@ -87,8 +87,13 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
mutex_unlock(&dev_data->lock);
if (found_dev)
if (found_dev) {
if (lock)
device_lock(&found_dev->dev);
pcistub_put_pci_dev(found_dev);
if (lock)
device_unlock(&found_dev->dev);
}
}
static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
@ -156,8 +161,11 @@ static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
struct pci_dev_entry *dev_entry, *t;
list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
struct pci_dev *dev = dev_entry->dev;
list_del(&dev_entry->list);
pcistub_put_pci_dev(dev_entry->dev);
device_lock(&dev->dev);
pcistub_put_pci_dev(dev);
device_unlock(&dev->dev);
kfree(dev_entry);
}

View File

@ -105,7 +105,7 @@ static void pcistub_device_release(struct kref *kref)
*/
__pci_reset_function_locked(dev);
if (pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state))
dev_dbg(&dev->dev, "Could not reload PCI state\n");
dev_info(&dev->dev, "Could not reload PCI state\n");
else
pci_restore_state(dev);
@ -250,11 +250,15 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
* - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
*
* As such we have to be careful.
*
* To make this easier, the caller has to hold the device lock.
*/
void pcistub_put_pci_dev(struct pci_dev *dev)
{
struct pcistub_device *psdev, *found_psdev = NULL;
unsigned long flags;
struct xen_pcibk_dev_data *dev_data;
int ret;
spin_lock_irqsave(&pcistub_devices_lock, flags);
@ -276,13 +280,20 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
/* Cleanup our device
* (so it's ready for the next domain)
*/
device_lock_assert(&dev->dev);
__pci_reset_function_locked(dev);
/* This is OK - we are running from workqueue context
* and want to inhibit the user from fiddling with 'reset'
*/
pci_reset_function(dev);
pci_restore_state(dev);
dev_data = pci_get_drvdata(dev);
ret = pci_load_saved_state(dev, dev_data->pci_saved_state);
if (!ret) {
/*
* The usual sequence is pci_save_state & pci_restore_state
* but the guest might have messed the configuration space up.
* Use the initial version (when device was bound to us).
*/
pci_restore_state(dev);
} else
dev_info(&dev->dev, "Could not reload PCI state\n");
/* This disables the device. */
xen_pcibk_reset_device(dev);
@ -554,12 +565,14 @@ static void pcistub_remove(struct pci_dev *dev)
spin_unlock_irqrestore(&pcistub_devices_lock, flags);
if (found_psdev) {
dev_dbg(&dev->dev, "found device to remove - in use? %p\n",
found_psdev->pdev);
dev_dbg(&dev->dev, "found device to remove %s\n",
found_psdev->pdev ? "- in-use" : "");
if (found_psdev->pdev) {
pr_warn("****** removing device %s while still in-use! ******\n",
pci_name(found_psdev->dev));
int domid = xen_find_device_domain_owner(dev);
pr_warn("****** removing device %s while still in-use by domain %d! ******\n",
pci_name(found_psdev->dev), domid);
pr_warn("****** driver domain may still access this device's i/o resources!\n");
pr_warn("****** shutdown driver domain before binding device\n");
pr_warn("****** to other drivers or domains\n");
@ -567,7 +580,8 @@ static void pcistub_remove(struct pci_dev *dev)
/* N.B. This ends up calling pcistub_put_pci_dev which ends up
* doing the FLR. */
xen_pcibk_release_pci_dev(found_psdev->pdev,
found_psdev->dev);
found_psdev->dev,
false /* caller holds the lock. */);
}
spin_lock_irqsave(&pcistub_devices_lock, flags);
@ -629,10 +643,12 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
{
pci_ers_result_t res = result;
struct xen_pcie_aer_op *aer_op;
struct xen_pcibk_device *pdev = psdev->pdev;
struct xen_pci_sharedinfo *sh_info = pdev->sh_info;
int ret;
/*with PV AER drivers*/
aer_op = &(psdev->pdev->sh_info->aer_op);
aer_op = &(sh_info->aer_op);
aer_op->cmd = aer_cmd ;
/*useful for error_detected callback*/
aer_op->err = state;
@ -653,36 +669,36 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
* this flag to judge whether we need to check pci-front give aer
* service ack signal
*/
set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
set_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
/*It is possible that a pcifront conf_read_write ops request invokes
* the callback which cause the spurious execution of wake_up.
* Yet it is harmless and better than a spinlock here
*/
set_bit(_XEN_PCIB_active,
(unsigned long *)&psdev->pdev->sh_info->flags);
(unsigned long *)&sh_info->flags);
wmb();
notify_remote_via_irq(psdev->pdev->evtchn_irq);
notify_remote_via_irq(pdev->evtchn_irq);
ret = wait_event_timeout(xen_pcibk_aer_wait_queue,
!(test_bit(_XEN_PCIB_active, (unsigned long *)
&psdev->pdev->sh_info->flags)), 300*HZ);
&sh_info->flags)), 300*HZ);
if (!ret) {
if (test_bit(_XEN_PCIB_active,
(unsigned long *)&psdev->pdev->sh_info->flags)) {
(unsigned long *)&sh_info->flags)) {
dev_err(&psdev->dev->dev,
"pcifront aer process not responding!\n");
clear_bit(_XEN_PCIB_active,
(unsigned long *)&psdev->pdev->sh_info->flags);
(unsigned long *)&sh_info->flags);
aer_op->err = PCI_ERS_RESULT_NONE;
return res;
}
}
clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
if (test_bit(_XEN_PCIF_active,
(unsigned long *)&psdev->pdev->sh_info->flags)) {
(unsigned long *)&sh_info->flags)) {
dev_dbg(&psdev->dev->dev,
"schedule pci_conf service in " DRV_NAME "\n");
xen_pcibk_test_and_schedule_op(psdev->pdev);
@ -1502,6 +1518,53 @@ parse_error:
fs_initcall(pcistub_init);
#endif
#ifdef CONFIG_PCI_IOV
static struct pcistub_device *find_vfs(const struct pci_dev *pdev)
{
struct pcistub_device *psdev = NULL;
unsigned long flags;
bool found = false;
spin_lock_irqsave(&pcistub_devices_lock, flags);
list_for_each_entry(psdev, &pcistub_devices, dev_list) {
if (!psdev->pdev && psdev->dev != pdev
&& pci_physfn(psdev->dev) == pdev) {
found = true;
break;
}
}
spin_unlock_irqrestore(&pcistub_devices_lock, flags);
if (found)
return psdev;
return NULL;
}
static int pci_stub_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct device *dev = data;
const struct pci_dev *pdev = to_pci_dev(dev);
if (action != BUS_NOTIFY_UNBIND_DRIVER)
return NOTIFY_DONE;
if (!pdev->is_physfn)
return NOTIFY_DONE;
for (;;) {
struct pcistub_device *psdev = find_vfs(pdev);
if (!psdev)
break;
device_release_driver(&psdev->dev->dev);
}
return NOTIFY_DONE;
}
static struct notifier_block pci_stub_nb = {
.notifier_call = pci_stub_notifier,
};
#endif
static int __init xen_pcibk_init(void)
{
int err;
@ -1523,12 +1586,19 @@ static int __init xen_pcibk_init(void)
err = xen_pcibk_xenbus_register();
if (err)
pcistub_exit();
#ifdef CONFIG_PCI_IOV
else
bus_register_notifier(&pci_bus_type, &pci_stub_nb);
#endif
return err;
}
static void __exit xen_pcibk_cleanup(void)
{
#ifdef CONFIG_PCI_IOV
bus_unregister_notifier(&pci_bus_type, &pci_stub_nb);
#endif
xen_pcibk_xenbus_unregister();
pcistub_exit();
}

View File

@ -99,7 +99,8 @@ struct xen_pcibk_backend {
unsigned int *domain, unsigned int *bus,
unsigned int *devfn);
int (*publish)(struct xen_pcibk_device *pdev, publish_pci_root_cb cb);
void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev);
void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev,
bool lock);
int (*add)(struct xen_pcibk_device *pdev, struct pci_dev *dev,
int devid, publish_pci_dev_cb publish_cb);
struct pci_dev *(*get)(struct xen_pcibk_device *pdev,
@ -122,10 +123,10 @@ static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
}
static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev)
struct pci_dev *dev, bool lock)
{
if (xen_pcibk_backend && xen_pcibk_backend->release)
return xen_pcibk_backend->release(pdev, dev);
return xen_pcibk_backend->release(pdev, dev, lock);
}
static inline struct pci_dev *

View File

@ -145,7 +145,7 @@ out:
}
static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev)
struct pci_dev *dev, bool lock)
{
int slot;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
@ -169,8 +169,13 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
out:
mutex_unlock(&vpci_dev->lock);
if (found_dev)
if (found_dev) {
if (lock)
device_lock(&found_dev->dev);
pcistub_put_pci_dev(found_dev);
if (lock)
device_unlock(&found_dev->dev);
}
}
static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
@ -208,8 +213,11 @@ static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
struct pci_dev_entry *e, *tmp;
list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
list) {
struct pci_dev *dev = e->dev;
list_del(&e->list);
pcistub_put_pci_dev(e->dev);
device_lock(&dev->dev);
pcistub_put_pci_dev(dev);
device_unlock(&dev->dev);
kfree(e);
}
}

View File

@ -247,7 +247,7 @@ static int xen_pcibk_export_device(struct xen_pcibk_device *pdev,
if (err)
goto out;
dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
dev_info(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
if (xen_register_device_domain_owner(dev,
pdev->xdev->otherend_id) != 0) {
dev_err(&dev->dev, "Stealing ownership from dom%d.\n",
@ -291,7 +291,7 @@ static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
/* N.B. This ends up calling pcistub_put_pci_dev which ends up
* doing the FLR. */
xen_pcibk_release_pci_dev(pdev, dev);
xen_pcibk_release_pci_dev(pdev, dev, true /* use the lock. */);
out:
return err;

View File

@ -911,6 +911,11 @@ static inline void device_unlock(struct device *dev)
mutex_unlock(&dev->mutex);
}
static inline void device_lock_assert(struct device *dev)
{
lockdep_assert_held(&dev->mutex);
}
void driver_init(void);
/*

View File

@ -1004,6 +1004,8 @@ void __iomem __must_check *pci_platform_rom(struct pci_dev *pdev, size_t *size);
int pci_save_state(struct pci_dev *dev);
void pci_restore_state(struct pci_dev *dev);
struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev);
int pci_load_saved_state(struct pci_dev *dev,
struct pci_saved_state *state);
int pci_load_and_free_saved_state(struct pci_dev *dev,
struct pci_saved_state **state);
struct pci_cap_saved_state *pci_find_saved_cap(struct pci_dev *dev, char cap);

View File

@ -53,9 +53,6 @@
/* operation as Dom0 is supported */
#define XENFEAT_dom0 11
/* Xen also maps grant references at pfn = mfn */
#define XENFEAT_grant_map_identity 12
#define XENFEAT_NR_SUBMAPS 1
#endif /* __XEN_PUBLIC_FEATURES_H__ */

View File

@ -478,6 +478,25 @@ struct gnttab_get_version {
};
DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
/*
* Issue one or more cache maintenance operations on a portion of a
* page granted to the calling domain by a foreign domain.
*/
#define GNTTABOP_cache_flush 12
struct gnttab_cache_flush {
union {
uint64_t dev_bus_addr;
grant_ref_t ref;
} a;
uint16_t offset; /* offset from start of grant */
uint16_t length; /* size within the grant */
#define GNTTAB_CACHE_CLEAN (1<<0)
#define GNTTAB_CACHE_INVAL (1<<1)
#define GNTTAB_CACHE_SOURCE_GREF (1<<31)
uint32_t op;
};
DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush);
/*
* Bitfield values for update_pin_status.flags.
*/