pci,virtio

This further optimizes MSIX handling in virtio-pci.
 Also included is pci cleanup by Paolo, and pci device
 assignment fix by Alex.
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQEcBAABAgAGBQJQ7ZaiAAoJECgfDbjSjVRpFhcIAJkY4VQ3i7TLnLsnEDOR+FrP
 66YLEDwCSiKZ/UW7WERGN3p3tm0hAXLhPoHFqMGRPPV9pdcXI+Eb8v+u0IHVlt+7
 DsQ9TIemZkpSMuUJjQbu/RF8k9JV8+X7M6CKnWahq68p0UD/vDX+OgCiGKO/l/zY
 tENJhwD6M1MMzbxyzd4nCnkf3CPrHFvpPt2VAqQnkCw3wLAtR34SucBjr/dXcjuT
 arPiV8dNmXHTosdKvcodAWA+0YLLE7Bhz0nLK6eTt5L/UsfdbRN8q9Xdhd5nJjji
 DjKBJBfwdG5n3r96g7dlb/XdHuQjbFBq3uLmc8H2OdWOrk5PyqeoUA5fdBQxkb8=
 =vKSI
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'mst/tags/for_anthony' into staging

pci,virtio

This further optimizes MSIX handling in virtio-pci.
Also included is pci cleanup by Paolo, and pci device
assignment fix by Alex.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>

* mst/tags/for_anthony:
  pci-assign: Enable MSIX on device to match guest
  pci: use constants for devices under the 1B36 device ID, document them
  ivshmem: use symbolic constant for PCI ID, add to pci-ids.txt
  virtio-9p: use symbolic constant, add to pci-ids.txt
  reorganize pci-ids.txt
  docs: move pci-ids.txt to docs/specs/
  vhost: backend masking support
  vhost: set started flag while start is in progress
  virtio-net: set/clear vhost_started in reverse order
  virtio: backend virtqueue notifier masking
  virtio-pci: cache msix messages
  kvm: add stub for update msi route
  msix: add api to access msix message
  virtio: don't waste irqfds on control vqs
This commit is contained in:
Anthony Liguori 2013-01-14 10:23:50 -06:00
commit 8e9a8681dd
19 changed files with 438 additions and 101 deletions

50
docs/specs/pci-ids.txt Normal file
View file

@ -0,0 +1,50 @@
PCI IDs for qemu
================
Red Hat, Inc. donates a part of its device ID range to qemu, to be used for
virtual devices. The vendor IDs are 1af4 (formerly Qumranet ID) and 1b36.
Contact Gerd Hoffmann <kraxel@redhat.com> to get a device ID assigned
for your devices.
1af4 vendor ID
--------------
The 1000 -> 10ff device ID range is used as follows for virtio-pci devices.
Note that this allocation separate from the virtio device IDs, which are
maintained as part of the virtio specification.
1af4:1000 network device
1af4:1001 block device
1af4:1002 balloon device
1af4:1003 console device
1af4:1004 SCSI host bus adapter device
1af4:1005 entropy generator device
1af4:1009 9p filesystem device
1af4:10f0 Available for experimental usage without registration. Must get
to official ID when the code leaves the test lab (i.e. when seeking
1af4:10ff upstream merge or shipping a distro/product) to avoid conflicts.
1af4:1100 Used as PCI Subsystem ID for existing hardware devices emulated
by qemu.
1af4:1110 ivshmem device (shared memory, docs/specs/ivshmem_device_spec.txt)
All other device IDs are reserved.
1b36 vendor ID
--------------
The 0000 -> 00ff device ID range is used as follows for QEMU-specific
PCI devices (other than virtio):
1b36:0001 PCI-PCI bridge
1b36:0002 PCI serial port (16550A) adapter (docs/specs/pci-serial.txt)
1b36:0003 PCI Dual-port 16550A adapter (docs/specs/pci-serial.txt)
1b36:0004 PCI Quad-port 16550A adapter (docs/specs/pci-serial.txt)
All these devices are documented in docs/specs.
The 0100 device ID is used for the QXL video card device.

View file

@ -170,7 +170,7 @@ static void virtio_9p_class_init(ObjectClass *klass, void *data)
k->init = virtio_9p_init_pci;
k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
k->device_id = 0x1009;
k->device_id = PCI_DEVICE_ID_VIRTIO_9P;
k->revision = VIRTIO_PCI_ABI_VERSION;
k->class_id = 0x2;
dc->props = virtio_9p_properties;

View file

@ -29,6 +29,9 @@
#include <sys/mman.h>
#include <sys/types.h>
#define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET
#define PCI_DEVICE_ID_IVSHMEM 0x1110
#define IVSHMEM_IOEVENTFD 0
#define IVSHMEM_MSI 1
@ -800,8 +803,8 @@ static void ivshmem_class_init(ObjectClass *klass, void *data)
k->init = pci_ivshmem_init;
k->exit = pci_ivshmem_uninit;
k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
k->device_id = 0x1110;
k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
k->device_id = PCI_DEVICE_ID_IVSHMEM;
k->class_id = PCI_CLASS_MEMORY_RAM;
dc->reset = ivshmem_reset;
dc->props = ivshmem_properties;

View file

@ -1031,6 +1031,19 @@ static bool assigned_dev_msix_masked(MSIXTableEntry *entry)
return (entry->ctrl & cpu_to_le32(0x1)) != 0;
}
/*
* When MSI-X is first enabled the vector table typically has all the
* vectors masked, so we can't use that as the obvious test to figure out
* how many vectors to initially enable. Instead we look at the data field
* because this is what worked for pci-assign for a long time. This makes
* sure the physical MSI-X state tracks the guest's view, which is important
* for some VF/PF and PF/fw communication channels.
*/
static bool assigned_dev_msix_skipped(MSIXTableEntry *entry)
{
return !entry->data;
}
static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
{
AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
@ -1041,7 +1054,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
/* Get the usable entry number for allocating */
for (i = 0; i < adev->msix_max; i++, entry++) {
if (assigned_dev_msix_masked(entry)) {
if (assigned_dev_msix_skipped(entry)) {
continue;
}
entries_nr++;
@ -1070,7 +1083,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
for (i = 0; i < adev->msix_max; i++, entry++) {
adev->msi_virq[i] = -1;
if (assigned_dev_msix_masked(entry)) {
if (assigned_dev_msix_skipped(entry)) {
continue;
}

View file

@ -27,7 +27,7 @@
#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
#define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
{
uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
MSIMessage msg;

View file

@ -5,6 +5,7 @@
#include "hw/pci/pci.h"
void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
MSIMessage msix_get_message(PCIDevice *dev, unsigned int vector);
int msix_init(PCIDevice *dev, unsigned short nentries,
MemoryRegion *table_bar, uint8_t table_bar_nr,
unsigned table_offset, MemoryRegion *pba_bar,

View file

@ -77,6 +77,14 @@
#define PCI_DEVICE_ID_VIRTIO_CONSOLE 0x1003
#define PCI_DEVICE_ID_VIRTIO_SCSI 0x1004
#define PCI_DEVICE_ID_VIRTIO_RNG 0x1005
#define PCI_DEVICE_ID_VIRTIO_9P 0x1009
#define PCI_VENDOR_ID_REDHAT 0x1b36
#define PCI_DEVICE_ID_REDHAT_BRIDGE 0x0001
#define PCI_DEVICE_ID_REDHAT_SERIAL 0x0002
#define PCI_DEVICE_ID_REDHAT_SERIAL2 0x0003
#define PCI_DEVICE_ID_REDHAT_SERIAL4 0x0004
#define PCI_DEVICE_ID_REDHAT_QXL 0x0100
#define FMT_PCIBUS PRIx64

View file

@ -27,10 +27,6 @@
#include "exec/memory.h"
#include "pci/pci_bus.h"
#define REDHAT_PCI_VENDOR_ID 0x1b36
#define PCI_BRIDGE_DEV_VENDOR_ID REDHAT_PCI_VENDOR_ID
#define PCI_BRIDGE_DEV_DEVICE_ID 0x1
struct PCIBridgeDev {
PCIBridge bridge;
MemoryRegion bar;
@ -146,8 +142,8 @@ static void pci_bridge_dev_class_init(ObjectClass *klass, void *data)
k->init = pci_bridge_dev_initfn;
k->exit = pci_bridge_dev_exitfn;
k->config_write = pci_bridge_dev_write_config;
k->vendor_id = PCI_BRIDGE_DEV_VENDOR_ID;
k->device_id = PCI_BRIDGE_DEV_DEVICE_ID;
k->vendor_id = PCI_VENDOR_ID_REDHAT;
k->device_id = PCI_DEVICE_ID_REDHAT_BRIDGE;
k->class_id = PCI_CLASS_BRIDGE_PCI;
k->is_bridge = 1,
dc->desc = "Standard PCI Bridge";

View file

@ -185,8 +185,8 @@ static void serial_pci_class_initfn(ObjectClass *klass, void *data)
PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
pc->init = serial_pci_init;
pc->exit = serial_pci_exit;
pc->vendor_id = 0x1b36; /* Red Hat */
pc->device_id = 0x0002;
pc->vendor_id = PCI_VENDOR_ID_REDHAT;
pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL;
pc->revision = 1;
pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
dc->vmsd = &vmstate_pci_serial;
@ -199,8 +199,8 @@ static void multi_2x_serial_pci_class_initfn(ObjectClass *klass, void *data)
PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
pc->init = multi_serial_pci_init;
pc->exit = multi_serial_pci_exit;
pc->vendor_id = 0x1b36; /* Red Hat */
pc->device_id = 0x0003;
pc->vendor_id = PCI_VENDOR_ID_REDHAT;
pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL2;
pc->revision = 1;
pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
dc->vmsd = &vmstate_pci_multi_serial;
@ -213,8 +213,8 @@ static void multi_4x_serial_pci_class_initfn(ObjectClass *klass, void *data)
PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
pc->init = multi_serial_pci_init;
pc->exit = multi_serial_pci_exit;
pc->vendor_id = 0x1b36; /* Red Hat */
pc->device_id = 0x0004;
pc->vendor_id = PCI_VENDOR_ID_REDHAT;
pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL4;
pc->revision = 1;
pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
dc->vmsd = &vmstate_pci_multi_serial;

View file

@ -612,7 +612,7 @@ static void vhost_log_stop(MemoryListener *listener,
/* FIXME: implement */
}
static int vhost_virtqueue_init(struct vhost_dev *dev,
static int vhost_virtqueue_start(struct vhost_dev *dev,
struct VirtIODevice *vdev,
struct vhost_virtqueue *vq,
unsigned idx)
@ -681,16 +681,11 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
goto fail_kick;
}
file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
if (r) {
r = -errno;
goto fail_call;
}
/* Clear and discard previous events if any. */
event_notifier_test_and_clear(&vq->masked_notifier);
return 0;
fail_call:
fail_kick:
fail_alloc:
cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
@ -708,7 +703,7 @@ fail_alloc_desc:
return r;
}
static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
static void vhost_virtqueue_stop(struct vhost_dev *dev,
struct VirtIODevice *vdev,
struct vhost_virtqueue *vq,
unsigned idx)
@ -746,11 +741,39 @@ static void vhost_eventfd_del(MemoryListener *listener,
{
}
static int vhost_virtqueue_init(struct vhost_dev *dev,
struct vhost_virtqueue *vq, int n)
{
struct vhost_vring_file file = {
.index = n,
};
int r = event_notifier_init(&vq->masked_notifier, 0);
if (r < 0) {
return r;
}
file.fd = event_notifier_get_fd(&vq->masked_notifier);
r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
if (r) {
r = -errno;
goto fail_call;
}
return 0;
fail_call:
event_notifier_cleanup(&vq->masked_notifier);
return r;
}
static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
{
event_notifier_cleanup(&vq->masked_notifier);
}
int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
bool force)
{
uint64_t features;
int r;
int i, r;
if (devfd >= 0) {
hdev->control = devfd;
} else {
@ -768,6 +791,13 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
if (r < 0) {
goto fail;
}
for (i = 0; i < hdev->nvqs; ++i) {
r = vhost_virtqueue_init(hdev, hdev->vqs + i, i);
if (r < 0) {
goto fail_vq;
}
}
hdev->features = features;
hdev->memory_listener = (MemoryListener) {
@ -795,6 +825,10 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
memory_listener_register(&hdev->memory_listener, &address_space_memory);
hdev->force = force;
return 0;
fail_vq:
while (--i >= 0) {
vhost_virtqueue_cleanup(hdev->vqs + i);
}
fail:
r = -errno;
close(hdev->control);
@ -803,6 +837,10 @@ fail:
void vhost_dev_cleanup(struct vhost_dev *hdev)
{
int i;
for (i = 0; i < hdev->nvqs; ++i) {
vhost_virtqueue_cleanup(hdev->vqs + i);
}
memory_listener_unregister(&hdev->memory_listener);
g_free(hdev->mem);
g_free(hdev->mem_sections);
@ -869,17 +907,53 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
}
}
/* Test and clear event pending status.
* Should be called after unmask to avoid losing events.
*/
bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
{
struct vhost_virtqueue *vq = hdev->vqs + n;
assert(hdev->started);
return event_notifier_test_and_clear(&vq->masked_notifier);
}
/* Mask/unmask events from this vq. */
void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
bool mask)
{
struct VirtQueue *vvq = virtio_get_queue(vdev, n);
int r;
assert(hdev->started);
struct vhost_vring_file file = {
.index = n,
};
if (mask) {
file.fd = event_notifier_get_fd(&hdev->vqs[n].masked_notifier);
} else {
file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
}
r = ioctl(hdev->control, VHOST_SET_VRING_CALL, &file);
assert(r >= 0);
}
/* Host notifiers must be enabled at this point. */
int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
{
int i, r;
hdev->started = true;
if (!vdev->binding->set_guest_notifiers) {
fprintf(stderr, "binding does not support guest notifiers\n");
r = -ENOSYS;
goto fail;
}
r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true);
r = vdev->binding->set_guest_notifiers(vdev->binding_opaque,
hdev->nvqs,
true);
if (r < 0) {
fprintf(stderr, "Error binding guest notifier: %d\n", -r);
goto fail_notifiers;
@ -895,7 +969,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
goto fail_mem;
}
for (i = 0; i < hdev->nvqs; ++i) {
r = vhost_virtqueue_init(hdev,
r = vhost_virtqueue_start(hdev,
vdev,
hdev->vqs + i,
i);
@ -916,22 +990,22 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
}
}
hdev->started = true;
return 0;
fail_log:
fail_vq:
while (--i >= 0) {
vhost_virtqueue_cleanup(hdev,
vhost_virtqueue_stop(hdev,
vdev,
hdev->vqs + i,
i);
}
fail_mem:
fail_features:
vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
vdev->binding->set_guest_notifiers(vdev->binding_opaque, hdev->nvqs, false);
fail_notifiers:
fail:
hdev->started = false;
return r;
}
@ -941,7 +1015,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
int i, r;
for (i = 0; i < hdev->nvqs; ++i) {
vhost_virtqueue_cleanup(hdev,
vhost_virtqueue_stop(hdev,
vdev,
hdev->vqs + i,
i);
@ -950,7 +1024,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
vhost_sync_dirty_bitmap(hdev, &hdev->mem_sections[i],
0, (hwaddr)~0x0ull);
}
r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
r = vdev->binding->set_guest_notifiers(vdev->binding_opaque,
hdev->nvqs,
false);
if (r < 0) {
fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
fflush(stderr);

View file

@ -18,6 +18,7 @@ struct vhost_virtqueue {
void *ring;
unsigned long long ring_phys;
unsigned ring_size;
EventNotifier masked_notifier;
};
typedef unsigned long vhost_log_chunk_t;
@ -53,4 +54,13 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
/* Test and clear masked event pending status.
* Should be called after unmask to avoid losing events.
*/
bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n);
/* Mask/unmask events from this vq.
*/
void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
bool mask);
#endif

View file

@ -109,6 +109,9 @@ struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
(1 << VHOST_NET_F_VIRTIO_NET_HDR);
net->backend = r;
net->dev.nvqs = 2;
net->dev.vqs = net->vqs;
r = vhost_dev_init(&net->dev, devfd, "/dev/vhost-net", force);
if (r < 0) {
goto fail;
@ -143,9 +146,6 @@ int vhost_net_start(struct vhost_net *net,
struct vhost_vring_file file = { };
int r;
net->dev.nvqs = 2;
net->dev.vqs = net->vqs;
r = vhost_dev_enable_notifiers(&net->dev, dev);
if (r < 0) {
goto fail_notifiers;
@ -200,6 +200,17 @@ void vhost_net_cleanup(struct vhost_net *net)
vhost_dev_cleanup(&net->dev);
g_free(net);
}
bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
{
return vhost_virtqueue_pending(&net->dev, idx);
}
void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
int idx, bool mask)
{
vhost_virtqueue_mask(&net->dev, dev, idx, mask);
}
#else
struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
bool force)
@ -234,4 +245,14 @@ unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
void vhost_net_ack_features(struct vhost_net *net, unsigned features)
{
}
bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
{
return -ENOSYS;
}
void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
int idx, bool mask)
{
}
#endif

View file

@ -17,4 +17,7 @@ void vhost_net_cleanup(VHostNetState *net);
unsigned vhost_net_get_features(VHostNetState *net, unsigned features);
void vhost_net_ack_features(VHostNetState *net, unsigned features);
bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
int idx, bool mask);
#endif

View file

@ -126,12 +126,12 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
if (!vhost_net_query(tap_get_vhost_net(n->nic->nc.peer), &n->vdev)) {
return;
}
n->vhost_started = 1;
r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
if (r < 0) {
error_report("unable to start vhost net: %d: "
"falling back on userspace virtio", -r);
} else {
n->vhost_started = 1;
n->vhost_started = 0;
}
} else {
vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
@ -1010,6 +1010,22 @@ static NetClientInfo net_virtio_info = {
.link_status_changed = virtio_net_set_link_status,
};
static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
{
VirtIONet *n = to_virtio_net(vdev);
assert(n->vhost_started);
return vhost_net_virtqueue_pending(tap_get_vhost_net(n->nic->nc.peer), idx);
}
static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
bool mask)
{
VirtIONet *n = to_virtio_net(vdev);
assert(n->vhost_started);
vhost_net_virtqueue_mask(tap_get_vhost_net(n->nic->nc.peer),
vdev, idx, mask);
}
VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
virtio_net_conf *net)
{
@ -1026,6 +1042,8 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
n->vdev.bad_features = virtio_net_bad_features;
n->vdev.reset = virtio_net_reset;
n->vdev.set_status = virtio_net_set_status;
n->vdev.guest_notifier_mask = virtio_net_guest_notifier_mask;
n->vdev.guest_notifier_pending = virtio_net_guest_notifier_pending;
n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {

View file

@ -487,8 +487,6 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
unsigned int vector,
MSIMessage msg)
{
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
EventNotifier *n = virtio_queue_get_guest_notifier(vq);
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
int ret;
@ -500,20 +498,33 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
irqfd->virq = ret;
}
irqfd->users++;
ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq);
if (ret < 0) {
if (--irqfd->users == 0) {
kvm_irqchip_release_virq(kvm_state, irqfd->virq);
}
return ret;
}
return 0;
}
static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector)
{
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
if (--irqfd->users == 0) {
kvm_irqchip_release_virq(kvm_state, irqfd->virq);
}
}
static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector)
{
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
EventNotifier *n = virtio_queue_get_guest_notifier(vq);
int ret;
ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq);
return ret;
}
static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector)
{
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
EventNotifier *n = virtio_queue_get_guest_notifier(vq);
@ -522,27 +533,143 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, n, irqfd->virq);
assert(ret == 0);
}
if (--irqfd->users == 0) {
kvm_irqchip_release_virq(kvm_state, irqfd->virq);
static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
{
PCIDevice *dev = &proxy->pci_dev;
VirtIODevice *vdev = proxy->vdev;
unsigned int vector;
int ret, queue_no;
MSIMessage msg;
for (queue_no = 0; queue_no < nvqs; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
vector = virtio_queue_vector(vdev, queue_no);
if (vector >= msix_nr_vectors_allocated(dev)) {
continue;
}
msg = msix_get_message(dev, vector);
ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
if (ret < 0) {
goto undo;
}
/* If guest supports masking, set up irqfd now.
* Otherwise, delay until unmasked in the frontend.
*/
if (proxy->vdev->guest_notifier_mask) {
ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
if (ret < 0) {
kvm_virtio_pci_vq_vector_release(proxy, vector);
goto undo;
}
}
}
return 0;
undo:
while (--queue_no >= 0) {
vector = virtio_queue_vector(vdev, queue_no);
if (vector >= msix_nr_vectors_allocated(dev)) {
continue;
}
if (proxy->vdev->guest_notifier_mask) {
kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
}
kvm_virtio_pci_vq_vector_release(proxy, vector);
}
return ret;
}
static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
{
PCIDevice *dev = &proxy->pci_dev;
VirtIODevice *vdev = proxy->vdev;
unsigned int vector;
int queue_no;
for (queue_no = 0; queue_no < nvqs; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
vector = virtio_queue_vector(vdev, queue_no);
if (vector >= msix_nr_vectors_allocated(dev)) {
continue;
}
/* If guest supports masking, clean up irqfd now.
* Otherwise, it was cleaned when masked in the frontend.
*/
if (proxy->vdev->guest_notifier_mask) {
kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
}
kvm_virtio_pci_vq_vector_release(proxy, vector);
}
}
static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector,
static int kvm_virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector,
MSIMessage msg)
{
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
EventNotifier *n = virtio_queue_get_guest_notifier(vq);
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
int ret;
if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) {
ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg);
if (ret < 0) {
return ret;
}
}
/* If guest supports masking, irqfd is already setup, unmask it.
* Otherwise, set it up now.
*/
if (proxy->vdev->guest_notifier_mask) {
proxy->vdev->guest_notifier_mask(proxy->vdev, queue_no, false);
/* Test after unmasking to avoid losing events. */
if (proxy->vdev->guest_notifier_pending &&
proxy->vdev->guest_notifier_pending(proxy->vdev, queue_no)) {
event_notifier_set(n);
}
} else {
ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
}
return ret;
}
static void kvm_virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector)
{
/* If guest supports masking, keep irqfd but mask it.
* Otherwise, clean it up now.
*/
if (proxy->vdev->guest_notifier_mask) {
proxy->vdev->guest_notifier_mask(proxy->vdev, queue_no, true);
} else {
kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
}
}
static int kvm_virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
MSIMessage msg)
{
VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
VirtIODevice *vdev = proxy->vdev;
int ret, queue_no;
for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
ret = kvm_virtio_pci_vq_vector_unmask(proxy, queue_no, vector, msg);
if (ret < 0) {
goto undo;
}
@ -554,25 +681,25 @@ undo:
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
kvm_virtio_pci_vq_vector_mask(proxy, queue_no, vector);
}
return ret;
}
static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector)
static void kvm_virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
{
VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
VirtIODevice *vdev = proxy->vdev;
int queue_no;
for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
kvm_virtio_pci_vq_vector_mask(proxy, queue_no, vector);
}
}
@ -587,7 +714,7 @@ static void kvm_virtio_pci_vector_poll(PCIDevice *dev,
EventNotifier *notifier;
VirtQueue *vq;
for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
@ -598,7 +725,11 @@ static void kvm_virtio_pci_vector_poll(PCIDevice *dev,
}
vq = virtio_get_queue(vdev, queue_no);
notifier = virtio_queue_get_guest_notifier(vq);
if (event_notifier_test_and_clear(notifier)) {
if (vdev->guest_notifier_pending) {
if (vdev->guest_notifier_pending(vdev, queue_no)) {
msix_set_pending(dev, vector);
}
} else if (event_notifier_test_and_clear(notifier)) {
msix_set_pending(dev, vector);
}
}
@ -631,7 +762,7 @@ static bool virtio_pci_query_guest_notifiers(DeviceState *d)
return msix_enabled(&proxy->pci_dev);
}
static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
{
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
VirtIODevice *vdev = proxy->vdev;
@ -639,14 +770,24 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
kvm_msi_via_irqfd_enabled();
nvqs = MIN(nvqs, VIRTIO_PCI_QUEUE_MAX);
/* When deassigning, pass a consistent nvqs value
* to avoid leaking notifiers.
*/
assert(assign || nvqs == proxy->nvqs_with_notifiers);
proxy->nvqs_with_notifiers = nvqs;
/* Must unset vector notifier while guest notifier is still assigned */
if (proxy->vector_irqfd && !assign) {
msix_unset_vector_notifiers(&proxy->pci_dev);
kvm_virtio_pci_vector_release(proxy, nvqs);
g_free(proxy->vector_irqfd);
proxy->vector_irqfd = NULL;
}
for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
for (n = 0; n < nvqs; n++) {
if (!virtio_queue_get_num(vdev, n)) {
break;
}
@ -663,17 +804,25 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
proxy->vector_irqfd =
g_malloc0(sizeof(*proxy->vector_irqfd) *
msix_nr_vectors_allocated(&proxy->pci_dev));
r = msix_set_vector_notifiers(&proxy->pci_dev,
kvm_virtio_pci_vector_use,
kvm_virtio_pci_vector_release,
kvm_virtio_pci_vector_poll);
r = kvm_virtio_pci_vector_use(proxy, nvqs);
if (r < 0) {
goto assign_error;
}
r = msix_set_vector_notifiers(&proxy->pci_dev,
kvm_virtio_pci_vector_unmask,
kvm_virtio_pci_vector_mask,
kvm_virtio_pci_vector_poll);
if (r < 0) {
goto notifiers_error;
}
}
return 0;
notifiers_error:
assert(assign);
kvm_virtio_pci_vector_release(proxy, nvqs);
assign_error:
/* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
assert(assign);

View file

@ -27,6 +27,7 @@
#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
typedef struct {
MSIMessage msg;
int virq;
unsigned int users;
} VirtIOIRQFD;
@ -51,6 +52,7 @@ typedef struct {
bool ioeventfd_disabled;
bool ioeventfd_started;
VirtIOIRQFD *vector_irqfd;
int nvqs_with_notifiers;
} VirtIOPCIProxy;
void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev);

View file

@ -99,7 +99,7 @@ typedef struct {
int (*load_done)(DeviceState *d, QEMUFile *f);
unsigned (*get_features)(DeviceState *d);
bool (*query_guest_notifiers)(DeviceState *d);
int (*set_guest_notifiers)(DeviceState *d, bool assigned);
int (*set_guest_notifiers)(DeviceState *d, int nvqs, bool assigned);
int (*set_host_notifier)(DeviceState *d, int n, bool assigned);
void (*vmstate_change)(DeviceState *d, bool running);
} VirtIOBindings;
@ -126,6 +126,19 @@ struct VirtIODevice
void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
void (*reset)(VirtIODevice *vdev);
void (*set_status)(VirtIODevice *vdev, uint8_t val);
/* Test and clear event pending status.
* Should be called after unmask to avoid losing events.
* If backend does not support masking,
* must check in frontend instead.
*/
bool (*guest_notifier_pending)(VirtIODevice *vdev, int n);
/* Mask/unmask events from this vq. Any events reported
* while masked will become pending.
* If backend does not support masking,
* must mask in frontend instead.
*/
void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask);
VirtQueue *vq;
const VirtIOBindings *binding;
DeviceState *binding_opaque;

View file

@ -131,6 +131,11 @@ void kvm_irqchip_release_virq(KVMState *s, int virq)
{
}
int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg)
{
return -ENOSYS;
}
int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, int virq)
{
return -ENOSYS;

View file

@ -1,31 +0,0 @@
PCI IDs for qemu
================
Red Hat, Inc. donates a part of its device ID range to qemu, to be used for
virtual devices. The vendor ID is 1af4 (formerly Qumranet ID).
The 1000 -> 10ff device ID range is used for VirtIO devices.
The 1100 device ID is used as PCI Subsystem ID for existing hardware
devices emulated by qemu.
All other device IDs are reserved.
VirtIO Device IDs
-----------------
1af4:1000 network device
1af4:1001 block device
1af4:1002 balloon device
1af4:1003 console device
1af4:1004 Reserved.
to Contact Gerd Hoffmann <kraxel@redhat.com> to get a
1af4:10ef device ID assigned for your new virtio device.
1af4:10f0 Available for experimental usage without registration. Must get
to official ID when the code leaves the test lab (i.e. when seeking
1af4:10ff upstream merge or shipping a distro/product) to avoid conflicts.