qemu-patch-raspberry4/hw/vfio/pci.h
Alex Williamson c958c51d2e vfio/quirks: ioeventfd quirk acceleration
The NVIDIA BAR0 quirks virtualize the PCI config space mirrors found
in device MMIO space.  Normally PCI config space is considered a slow
path and further optimization is unnecessary, however NVIDIA uses a
register here to enable the MSI interrupt to re-trigger.  Exiting to
QEMU for this MSI-ACK handling can therefore rate limit our interrupt
handling.  Fortunately the MSI-ACK write is easily detected since the
quirk MemoryRegion otherwise has very few accesses, so simply looking
for consecutive writes with the same data is sufficient, in this case
10 consecutive writes with the same data and size is arbitrarily
chosen.  We configure the KVM ioeventfd with data match, so there's
no risk of triggering for the wrong data or size, but we do risk that
pathological driver behavior might consume all of QEMU's file
descriptors, so we cap ourselves to 10 ioeventfds for this purpose.

In support of the above, generic ioeventfd infrastructure is added
for vfio quirks.  This automatically initializes an ioeventfd list
per quirk, disables and frees ioeventfds on exit, and allows
ioeventfds marked as dynamic to be dropped on device reset.  The
rationale for this latter feature is that useful ioeventfds may
depend on specific driver behavior and since we necessarily place a
cap on our use of ioeventfds, a machine reset is a reasonable point
at which to assume a new driver and re-profile.

Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-06-05 08:23:17 -06:00

200 lines
6.1 KiB
C

/*
* vfio based device assignment support - PCI devices
*
* Copyright Red Hat, Inc. 2012-2015
*
* Authors:
* Alex Williamson <alex.williamson@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*/
#ifndef HW_VFIO_VFIO_PCI_H
#define HW_VFIO_VFIO_PCI_H
#include "qemu-common.h"
#include "exec/memory.h"
#include "hw/pci/pci.h"
#include "hw/vfio/vfio-common.h"
#include "qemu/event_notifier.h"
#include "qemu/queue.h"
#include "qemu/timer.h"
#define PCI_ANY_ID (~0)
struct VFIOPCIDevice;
typedef struct VFIOIOEventFD {
QLIST_ENTRY(VFIOIOEventFD) next;
MemoryRegion *mr;
hwaddr addr;
unsigned size;
uint64_t data;
EventNotifier e;
VFIORegion *region;
hwaddr region_addr;
bool dynamic; /* Added runtime, removed on device reset */
} VFIOIOEventFD;
typedef struct VFIOQuirk {
QLIST_ENTRY(VFIOQuirk) next;
void *data;
QLIST_HEAD(, VFIOIOEventFD) ioeventfds;
int nr_mem;
MemoryRegion *mem;
void (*reset)(struct VFIOPCIDevice *vdev, struct VFIOQuirk *quirk);
} VFIOQuirk;
typedef struct VFIOBAR {
VFIORegion region;
MemoryRegion *mr;
size_t size;
uint8_t type;
bool ioport;
bool mem64;
QLIST_HEAD(, VFIOQuirk) quirks;
} VFIOBAR;
typedef struct VFIOVGARegion {
MemoryRegion mem;
off_t offset;
int nr;
QLIST_HEAD(, VFIOQuirk) quirks;
} VFIOVGARegion;
typedef struct VFIOVGA {
off_t fd_offset;
int fd;
VFIOVGARegion region[QEMU_PCI_VGA_NUM_REGIONS];
} VFIOVGA;
typedef struct VFIOINTx {
bool pending; /* interrupt pending */
bool kvm_accel; /* set when QEMU bypass through KVM enabled */
uint8_t pin; /* which pin to pull for qemu_set_irq */
EventNotifier interrupt; /* eventfd triggered on interrupt */
EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
PCIINTxRoute route; /* routing info for QEMU bypass */
uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
QEMUTimer *mmap_timer; /* enable mmaps after periods w/o interrupts */
} VFIOINTx;
typedef struct VFIOMSIVector {
/*
* Two interrupt paths are configured per vector. The first, is only used
* for interrupts injected via QEMU. This is typically the non-accel path,
* but may also be used when we want QEMU to handle masking and pending
* bits. The KVM path bypasses QEMU and is therefore higher performance,
* but requires masking at the device. virq is used to track the MSI route
* through KVM, thus kvm_interrupt is only available when virq is set to a
* valid (>= 0) value.
*/
EventNotifier interrupt;
EventNotifier kvm_interrupt;
struct VFIOPCIDevice *vdev; /* back pointer to device */
int virq;
bool use;
} VFIOMSIVector;
enum {
VFIO_INT_NONE = 0,
VFIO_INT_INTx = 1,
VFIO_INT_MSI = 2,
VFIO_INT_MSIX = 3,
};
/* Cache of MSI-X setup */
typedef struct VFIOMSIXInfo {
uint8_t table_bar;
uint8_t pba_bar;
uint16_t entries;
uint32_t table_offset;
uint32_t pba_offset;
unsigned long *pending;
} VFIOMSIXInfo;
typedef struct VFIOPCIDevice {
PCIDevice pdev;
VFIODevice vbasedev;
VFIOINTx intx;
unsigned int config_size;
uint8_t *emulated_config_bits; /* QEMU emulated bits, little-endian */
off_t config_offset; /* Offset of config space region within device fd */
unsigned int rom_size;
off_t rom_offset; /* Offset of ROM region within device fd */
void *rom;
int msi_cap_size;
VFIOMSIVector *msi_vectors;
VFIOMSIXInfo *msix;
int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
int interrupt; /* Current interrupt type */
VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
void *igd_opregion;
PCIHostDeviceAddress host;
EventNotifier err_notifier;
EventNotifier req_notifier;
int (*resetfn)(struct VFIOPCIDevice *);
uint32_t vendor_id;
uint32_t device_id;
uint32_t sub_vendor_id;
uint32_t sub_device_id;
uint32_t features;
#define VFIO_FEATURE_ENABLE_VGA_BIT 0
#define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
#define VFIO_FEATURE_ENABLE_REQ_BIT 1
#define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
#define VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT 2
#define VFIO_FEATURE_ENABLE_IGD_OPREGION \
(1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT)
OnOffAuto display;
int32_t bootindex;
uint32_t igd_gms;
OffAutoPCIBAR msix_relo;
uint8_t pm_cap;
uint8_t nv_gpudirect_clique;
bool pci_aer;
bool req_enabled;
bool has_flr;
bool has_pm_reset;
bool rom_read_failed;
bool no_kvm_intx;
bool no_kvm_msi;
bool no_kvm_msix;
bool no_geforce_quirks;
bool no_kvm_ioeventfd;
VFIODisplay *dpy;
} VFIOPCIDevice;
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
void vfio_pci_write_config(PCIDevice *pdev,
uint32_t addr, uint32_t val, int len);
uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size);
void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size);
bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev);
void vfio_vga_quirk_setup(VFIOPCIDevice *vdev);
void vfio_vga_quirk_exit(VFIOPCIDevice *vdev);
void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev);
void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr);
void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr);
void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr);
void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev);
int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp);
void vfio_quirk_reset(VFIOPCIDevice *vdev);
extern const PropertyInfo qdev_prop_nv_gpudirect_clique;
int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp);
int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
struct vfio_region_info *info,
Error **errp);
void vfio_display_reset(VFIOPCIDevice *vdev);
int vfio_display_probe(VFIOPCIDevice *vdev, Error **errp);
void vfio_display_finalize(VFIOPCIDevice *vdev);
#endif /* HW_VFIO_VFIO_PCI_H */