virtio, vhost, pc: fixes

Most notably this fixes a regression with vhost introduced by the pull before
 last.
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJYLyObAAoJECgfDbjSjVRptPoIAK/4SdEAqS9pnXPekPZpIddV
 KHCFjj4Q68s22i0jpA1hxSXk1yQZIl56dnynU0DIAbCD1NYQIEmWx7uOJjppre9O
 L64V2s2ItEagFBGFwQDoJnUDIyEhth8KRqsa36V2YWJXYOaH1Rx1QNb9tX9R0aeb
 2lVwYE+yig1Gc/2PAYJrcKWwM3iwWrYW6ssycP2LEOGOhBCIrGZwDJkqv7ayDVL9
 j4tH2eBRrOAzm8c3fybC3OZkeLqcQJnbVONmD8kV0Q0IphcFvloJQCvcefb/3Ox1
 HAz57JxZfpxMZPVtvgU8Q+xzElz8noCXg+6lF/dx71CKicwXxg4lsMF1LyKHUoU=
 =cuAn
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'mst/tags/for_upstream' into staging

virtio, vhost, pc: fixes

Most notably this fixes a regression with vhost introduced by the pull before
last.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Fri 18 Nov 2016 03:51:55 PM GMT
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* mst/tags/for_upstream:
  acpi: Use apic_id_limit when calculating legacy ACPI table size
  ipmi: fix qemu crash while migrating with ipmi
  ivshmem: Fix 64 bit memory bar configuration
  virtio: set ISR on dataplane notifications
  virtio: access ISR atomically
  virtio: introduce grab/release_ioeventfd to fix vhost
  virtio-crypto: fix virtio_queue_set_notification() race

Message-id: 1479484366-7977-1-git-send-email-mst@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Stefan Hajnoczi 2016-11-21 11:09:58 +00:00
commit c36ed06e91
15 changed files with 151 additions and 57 deletions

View file

@ -68,9 +68,7 @@ static void notify_guest_bh(void *opaque)
unsigned i = j + ctzl(bits);
VirtQueue *vq = virtio_get_queue(s->vdev, i);
if (virtio_should_notify(s->vdev, vq)) {
event_notifier_set(virtio_queue_get_guest_notifier(vq));
}
virtio_notify_irqfd(s->vdev, vq);
bits &= bits - 1; /* clear right-most bit */
}

View file

@ -2860,7 +2860,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
*/
int legacy_aml_len =
pcmc->legacy_acpi_table_size +
ACPI_BUILD_LEGACY_CPU_AML_SIZE * max_cpus;
ACPI_BUILD_LEGACY_CPU_AML_SIZE * pcms->apic_id_limit;
int legacy_table_size =
ROUND_UP(tables_blob->len - aml_len + legacy_aml_len,
ACPI_BUILD_ALIGN_SIZE);

View file

@ -433,10 +433,8 @@ const VMStateDescription vmstate_ISAIPMIKCSDevice = {
VMSTATE_BOOL(kcs.use_irq, ISAIPMIKCSDevice),
VMSTATE_BOOL(kcs.irqs_enabled, ISAIPMIKCSDevice),
VMSTATE_UINT32(kcs.outpos, ISAIPMIKCSDevice),
VMSTATE_VBUFFER_UINT32(kcs.outmsg, ISAIPMIKCSDevice, 1, NULL, 0,
kcs.outlen),
VMSTATE_VBUFFER_UINT32(kcs.inmsg, ISAIPMIKCSDevice, 1, NULL, 0,
kcs.inlen),
VMSTATE_UINT8_ARRAY(kcs.outmsg, ISAIPMIKCSDevice, MAX_IPMI_MSG_SIZE),
VMSTATE_UINT8_ARRAY(kcs.inmsg, ISAIPMIKCSDevice, MAX_IPMI_MSG_SIZE),
VMSTATE_BOOL(kcs.write_end, ISAIPMIKCSDevice),
VMSTATE_UINT8(kcs.status_reg, ISAIPMIKCSDevice),
VMSTATE_UINT8(kcs.data_out_reg, ISAIPMIKCSDevice),

View file

@ -95,13 +95,6 @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n,
return 0;
}
void virtio_scsi_dataplane_notify(VirtIODevice *vdev, VirtIOSCSIReq *req)
{
if (virtio_should_notify(vdev, req->vq)) {
event_notifier_set(virtio_queue_get_guest_notifier(req->vq));
}
}
/* assumes s->ctx held */
static void virtio_scsi_clear_aio(VirtIOSCSI *s)
{

View file

@ -69,7 +69,7 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
qemu_iovec_from_buf(&req->resp_iov, 0, &req->resp, req->resp_size);
virtqueue_push(vq, &req->elem, req->qsgl.size + req->resp_iov.size);
if (s->dataplane_started && !s->dataplane_fenced) {
virtio_scsi_dataplane_notify(vdev, req);
virtio_notify_irqfd(vdev, vq);
} else {
virtio_notify(vdev, vq);
}

View file

@ -5,7 +5,7 @@ virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "
virtqueue_flush(void *vq, unsigned int count) "vq %p count %u"
virtqueue_pop(void *vq, void *elem, unsigned int in_num, unsigned int out_num) "vq %p elem %p in_num %u out_num %u"
virtio_queue_notify(void *vdev, int n, void *vq) "vdev %p n %d vq %p"
virtio_irq(void *vq) "vq %p"
virtio_notify_irqfd(void *vdev, void *vq) "vdev %p vq %p"
virtio_notify(void *vdev, void *vq) "vdev %p vq %p"
virtio_set_status(void *vdev, uint8_t val) "vdev %p val %u"

View file

@ -1214,17 +1214,17 @@ void vhost_dev_cleanup(struct vhost_dev *hdev)
int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
{
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
VirtioBusState *vbus = VIRTIO_BUS(qbus);
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
int i, r, e;
if (!k->ioeventfd_assign) {
/* We will pass the notifiers to the kernel, make sure that QEMU
* doesn't interfere.
*/
r = virtio_device_grab_ioeventfd(vdev);
if (r < 0) {
error_report("binding does not support host notifiers");
r = -ENOSYS;
goto fail;
}
virtio_device_stop_ioeventfd(vdev);
for (i = 0; i < hdev->nvqs; ++i) {
r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
true);
@ -1244,7 +1244,7 @@ fail_vq:
}
assert (e >= 0);
}
virtio_device_start_ioeventfd(vdev);
virtio_device_release_ioeventfd(vdev);
fail:
return r;
}
@ -1267,7 +1267,7 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
}
assert (r >= 0);
}
virtio_device_start_ioeventfd(vdev);
virtio_device_release_ioeventfd(vdev);
}
/* Test and clear event pending status.

View file

@ -147,6 +147,39 @@ void virtio_bus_set_vdev_config(VirtioBusState *bus, uint8_t *config)
}
}
/* On success, ioeventfd ownership belongs to the caller. */
int virtio_bus_grab_ioeventfd(VirtioBusState *bus)
{
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
/* vhost can be used even if ioeventfd=off in the proxy device,
* so do not check k->ioeventfd_enabled.
*/
if (!k->ioeventfd_assign) {
return -ENOSYS;
}
if (bus->ioeventfd_grabbed == 0 && bus->ioeventfd_started) {
virtio_bus_stop_ioeventfd(bus);
/* Remember that we need to restart ioeventfd
* when ioeventfd_grabbed becomes zero.
*/
bus->ioeventfd_started = true;
}
bus->ioeventfd_grabbed++;
return 0;
}
void virtio_bus_release_ioeventfd(VirtioBusState *bus)
{
assert(bus->ioeventfd_grabbed != 0);
if (--bus->ioeventfd_grabbed == 0 && bus->ioeventfd_started) {
/* Force virtio_bus_start_ioeventfd to act. */
bus->ioeventfd_started = false;
virtio_bus_start_ioeventfd(bus);
}
}
int virtio_bus_start_ioeventfd(VirtioBusState *bus)
{
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
@ -161,10 +194,14 @@ int virtio_bus_start_ioeventfd(VirtioBusState *bus)
if (bus->ioeventfd_started) {
return 0;
}
r = vdc->start_ioeventfd(vdev);
if (r < 0) {
error_report("%s: failed. Fallback to userspace (slower).", __func__);
return r;
/* Only set our notifier if we have ownership. */
if (!bus->ioeventfd_grabbed) {
r = vdc->start_ioeventfd(vdev);
if (r < 0) {
error_report("%s: failed. Fallback to userspace (slower).", __func__);
return r;
}
}
bus->ioeventfd_started = true;
return 0;
@ -179,9 +216,12 @@ void virtio_bus_stop_ioeventfd(VirtioBusState *bus)
return;
}
vdev = virtio_bus_get_device(bus);
vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
vdc->stop_ioeventfd(vdev);
/* Only remove our notifier if we have ownership. */
if (!bus->ioeventfd_grabbed) {
vdev = virtio_bus_get_device(bus);
vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
vdc->stop_ioeventfd(vdev);
}
bus->ioeventfd_started = false;
}
@ -211,7 +251,6 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign)
}
if (assign) {
assert(!bus->ioeventfd_started);
r = event_notifier_init(notifier, 1);
if (r < 0) {
error_report("%s: unable to init event notifier: %s (%d)",
@ -225,9 +264,6 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign)
}
return 0;
} else {
if (!bus->ioeventfd_started) {
return 0;
}
k->ioeventfd_assign(proxy, notifier, n, false);
}

View file

@ -692,8 +692,17 @@ static void virtio_crypto_dataq_bh(void *opaque)
return;
}
virtio_crypto_handle_dataq(vdev, q->dataq);
virtio_queue_set_notification(q->dataq, 1);
for (;;) {
virtio_crypto_handle_dataq(vdev, q->dataq);
virtio_queue_set_notification(q->dataq, 1);
/* Are we done or did the guest add more buffers? */
if (virtio_queue_empty(q->dataq)) {
break;
}
virtio_queue_set_notification(q->dataq, 0);
}
}
static void

View file

@ -191,7 +191,7 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
return virtio_queue_get_addr(vdev, vdev->queue_sel)
>> proxy->guest_page_shift;
case VIRTIO_MMIO_INTERRUPTSTATUS:
return vdev->isr;
return atomic_read(&vdev->isr);
case VIRTIO_MMIO_STATUS:
return vdev->status;
case VIRTIO_MMIO_HOSTFEATURESSEL:
@ -299,7 +299,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
}
break;
case VIRTIO_MMIO_INTERRUPTACK:
vdev->isr &= ~value;
atomic_and(&vdev->isr, ~value);
virtio_update_irq(vdev);
break;
case VIRTIO_MMIO_STATUS:
@ -347,7 +347,7 @@ static void virtio_mmio_update_irq(DeviceState *opaque, uint16_t vector)
if (!vdev) {
return;
}
level = (vdev->isr != 0);
level = (atomic_read(&vdev->isr) != 0);
DPRINTF("virtio_mmio setting IRQ %d\n", level);
qemu_set_irq(proxy->irq, level);
}

View file

@ -73,7 +73,7 @@ static void virtio_pci_notify(DeviceState *d, uint16_t vector)
msix_notify(&proxy->pci_dev, vector);
else {
VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
pci_set_irq(&proxy->pci_dev, vdev->isr & 1);
pci_set_irq(&proxy->pci_dev, atomic_read(&vdev->isr) & 1);
}
}
@ -449,8 +449,7 @@ static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
break;
case VIRTIO_PCI_ISR:
/* reading from the ISR also clears it. */
ret = vdev->isr;
vdev->isr = 0;
ret = atomic_xchg(&vdev->isr, 0);
pci_irq_deassert(&proxy->pci_dev);
break;
case VIRTIO_MSI_CONFIG_VECTOR:
@ -1379,9 +1378,7 @@ static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr,
{
VirtIOPCIProxy *proxy = opaque;
VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
uint64_t val = vdev->isr;
vdev->isr = 0;
uint64_t val = atomic_xchg(&vdev->isr, 0);
pci_irq_deassert(&proxy->pci_dev);
return val;

View file

@ -945,7 +945,7 @@ void virtio_reset(void *opaque)
vdev->guest_features = 0;
vdev->queue_sel = 0;
vdev->status = 0;
vdev->isr = 0;
atomic_set(&vdev->isr, 0);
vdev->config_vector = VIRTIO_NO_VECTOR;
virtio_notify_vector(vdev, vdev->config_vector);
@ -1318,11 +1318,16 @@ void virtio_del_queue(VirtIODevice *vdev, int n)
vdev->vq[n].vring.num_default = 0;
}
void virtio_irq(VirtQueue *vq)
static void virtio_set_isr(VirtIODevice *vdev, int value)
{
trace_virtio_irq(vq);
vq->vdev->isr |= 0x01;
virtio_notify_vector(vq->vdev, vq->vector);
uint8_t old = atomic_read(&vdev->isr);
/* Do not write ISR if it does not change, so that its cacheline remains
* shared in the common case where the guest does not read it.
*/
if ((old & value) != value) {
atomic_or(&vdev->isr, value);
}
}
bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
@ -1348,6 +1353,33 @@ bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
return !v || vring_need_event(vring_get_used_event(vq), new, old);
}
void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
{
if (!virtio_should_notify(vdev, vq)) {
return;
}
trace_virtio_notify_irqfd(vdev, vq);
/*
* virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
* windows drivers included in virtio-win 1.8.0 (circa 2015) are
* incorrectly polling this bit during crashdump and hibernation
* in MSI mode, causing a hang if this bit is never updated.
* Recent releases of Windows do not really shut down, but rather
* log out and hibernate to make the next startup faster. Hence,
* this manifested as a more serious hang during shutdown with
*
* Next driver release from 2016 fixed this problem, so working around it
* is not a must, but it's easy to do so let's do it here.
*
* Note: it's safe to update ISR from any thread as it was switched
* to an atomic operation.
*/
virtio_set_isr(vq->vdev, 0x1);
event_notifier_set(&vq->guest_notifier);
}
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
{
if (!virtio_should_notify(vdev, vq)) {
@ -1355,7 +1387,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
}
trace_virtio_notify(vdev, vq);
vdev->isr |= 0x01;
virtio_set_isr(vq->vdev, 0x1);
virtio_notify_vector(vdev, vq->vector);
}
@ -1364,7 +1396,7 @@ void virtio_notify_config(VirtIODevice *vdev)
if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
return;
vdev->isr |= 0x03;
virtio_set_isr(vdev, 0x3);
vdev->generation++;
virtio_notify_vector(vdev, vdev->config_vector);
}
@ -1895,7 +1927,7 @@ void virtio_init(VirtIODevice *vdev, const char *name,
vdev->device_id = device_id;
vdev->status = 0;
vdev->isr = 0;
atomic_set(&vdev->isr, 0);
vdev->queue_sel = 0;
vdev->config_vector = VIRTIO_NO_VECTOR;
vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
@ -1982,7 +2014,7 @@ static void virtio_queue_guest_notifier_read(EventNotifier *n)
{
VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
if (event_notifier_test_and_clear(n)) {
virtio_irq(vq);
virtio_notify_vector(vq->vdev, vq->vector);
}
}
@ -2191,6 +2223,22 @@ void virtio_device_stop_ioeventfd(VirtIODevice *vdev)
virtio_bus_stop_ioeventfd(vbus);
}
int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
{
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
VirtioBusState *vbus = VIRTIO_BUS(qbus);
return virtio_bus_grab_ioeventfd(vbus);
}
void virtio_device_release_ioeventfd(VirtIODevice *vdev)
{
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
VirtioBusState *vbus = VIRTIO_BUS(qbus);
virtio_bus_release_ioeventfd(vbus);
}
static void virtio_device_class_init(ObjectClass *klass, void *data)
{
/* Set the default value here. */

View file

@ -97,6 +97,16 @@ struct VirtioBusState {
* Set if ioeventfd has been started.
*/
bool ioeventfd_started;
/*
* Set if ioeventfd has been grabbed by vhost. When ioeventfd
* is grabbed by vhost, we track its started/stopped state (which
* depends in turn on the virtio status register), but do not
* register a handler for the ioeventfd. When ioeventfd is
* released, if ioeventfd_started is true we finally register
* the handler so that QEMU's device model can use ioeventfd.
*/
int ioeventfd_grabbed;
};
void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp);
@ -131,6 +141,10 @@ bool virtio_bus_ioeventfd_enabled(VirtioBusState *bus);
int virtio_bus_start_ioeventfd(VirtioBusState *bus);
/* Stop the ioeventfd. */
void virtio_bus_stop_ioeventfd(VirtioBusState *bus);
/* Tell the bus that vhost is grabbing the ioeventfd. */
int virtio_bus_grab_ioeventfd(VirtioBusState *bus);
/* bus that vhost is not using the ioeventfd anymore. */
void virtio_bus_release_ioeventfd(VirtioBusState *bus);
/* Switch from/to the generic ioeventfd handler */
int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign);

View file

@ -137,6 +137,5 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp);
int virtio_scsi_dataplane_start(VirtIODevice *s);
void virtio_scsi_dataplane_stop(VirtIODevice *s);
void virtio_scsi_dataplane_notify(VirtIODevice *vdev, VirtIOSCSIReq *req);
#endif /* QEMU_VIRTIO_SCSI_H */

View file

@ -181,6 +181,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
unsigned max_in_bytes, unsigned max_out_bytes);
bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq);
void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq);
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
void virtio_save(VirtIODevice *vdev, QEMUFile *f);
@ -272,13 +273,14 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
bool with_irqfd);
int virtio_device_start_ioeventfd(VirtIODevice *vdev);
void virtio_device_stop_ioeventfd(VirtIODevice *vdev);
int virtio_device_grab_ioeventfd(VirtIODevice *vdev);
void virtio_device_release_ioeventfd(VirtIODevice *vdev);
bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
void virtio_queue_host_notifier_read(EventNotifier *n);
void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
void (*fn)(VirtIODevice *,
VirtQueue *));
void virtio_irq(VirtQueue *vq);
VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector);
VirtQueue *virtio_vector_next_queue(VirtQueue *vq);