pci, pc fixes, features

A bunch of bugfixes - these will make sense for 2.1.1
 
 Initial Intel IOMMU support.
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABAgAGBQJUBdygAAoJECgfDbjSjVRpa9cIAJS06we0CpJaVmPrQS5HvC1w
 An5Y5bGdfMQtfKjqN1Kehmtu/+wjNKZJw427+6B+KNO7wm9rRUiu927qp9lNGlbH
 g3ybrknKYeyqVO/43SJt8c1eODSkmNgHPqyCkRVLbriYo850b2HhjJyMvVNZqeHD
 zuTmU95GTNeiYAV8J1c59OrqUz302kCXI4A47loY7LdoEFMbJat4DbkrkspuTgbQ
 EVk5sR8p2atKzgaOV6M6yiAtL5uSBNr9KmHvuA7ZBiV21wmOJm5u3y6DpLczUD90
 +Ln6BCjmPS5GQ12pzY7U65enr/x/RYo6k01ig9MP3TndNA02XxCaskqfd083jM8=
 =4drK
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging

pci, pc fixes, features

A bunch of bugfixes - these will make sense for 2.1.1

Initial Intel IOMMU support.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Tue 02 Sep 2014 16:05:04 BST using RSA key ID D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"

* remotes/mst/tags/for_upstream:
  vhost_net: start/stop guest notifiers properly
  pci: avoid losing config updates to MSI/MSIX cap regs
  virtio-net: don't run bh on vm stopped
  ioh3420: remove unused ioh3420_init() declaration
  vhost_net: cleanup start/stop condition
  intel-iommu: add IOTLB using hash table
  intel-iommu: add context-cache to cache context-entry
  intel-iommu: add supports for queued invalidation interface
  intel-iommu: fix coding style issues around in q35.c and machine.c
  intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "iommu" as a switch
  intel-iommu: add DMAR table to ACPI tables
  intel-iommu: introduce Intel IOMMU (VT-d) emulation
  iommu: add is_write as a parameter to the translate function of MemoryRegionIOMMUOps

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2014-09-02 16:07:31 +01:00
commit f2426947de
22 changed files with 2683 additions and 45 deletions

2
exec.c
View file

@ -373,7 +373,7 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
break; break;
} }
iotlb = mr->iommu_ops->translate(mr, addr); iotlb = mr->iommu_ops->translate(mr, addr, is_write);
addr = ((iotlb.translated_addr & ~iotlb.addr_mask) addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
| (addr & iotlb.addr_mask)); | (addr & iotlb.addr_mask));
len = MIN(len, (addr | iotlb.addr_mask) - addr + 1); len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);

View file

@ -660,7 +660,8 @@ static bool window_translate(TyphoonWindow *win, hwaddr addr,
/* Handle PCI-to-system address translation. */ /* Handle PCI-to-system address translation. */
/* TODO: A translation failure here ought to set PCI error codes on the /* TODO: A translation failure here ought to set PCI error codes on the
Pchip and generate a machine check interrupt. */ Pchip and generate a machine check interrupt. */
static IOMMUTLBEntry typhoon_translate_iommu(MemoryRegion *iommu, hwaddr addr) static IOMMUTLBEntry typhoon_translate_iommu(MemoryRegion *iommu, hwaddr addr,
bool is_write)
{ {
TyphoonPchip *pchip = container_of(iommu, TyphoonPchip, iommu); TyphoonPchip *pchip = container_of(iommu, TyphoonPchip, iommu);
IOMMUTLBEntry ret; IOMMUTLBEntry ret;

View file

@ -235,6 +235,20 @@ static void machine_set_firmware(Object *obj, const char *value, Error **errp)
ms->firmware = g_strdup(value); ms->firmware = g_strdup(value);
} }
static bool machine_get_iommu(Object *obj, Error **errp)
{
MachineState *ms = MACHINE(obj);
return ms->iommu;
}
static void machine_set_iommu(Object *obj, bool value, Error **errp)
{
MachineState *ms = MACHINE(obj);
ms->iommu = value;
}
static void machine_initfn(Object *obj) static void machine_initfn(Object *obj)
{ {
object_property_add_str(obj, "accel", object_property_add_str(obj, "accel",
@ -270,10 +284,17 @@ static void machine_initfn(Object *obj)
machine_set_dump_guest_core, machine_set_dump_guest_core,
NULL); NULL);
object_property_add_bool(obj, "mem-merge", object_property_add_bool(obj, "mem-merge",
machine_get_mem_merge, machine_set_mem_merge, NULL); machine_get_mem_merge,
object_property_add_bool(obj, "usb", machine_get_usb, machine_set_usb, NULL); machine_set_mem_merge, NULL);
object_property_add_bool(obj, "usb",
machine_get_usb,
machine_set_usb, NULL);
object_property_add_str(obj, "firmware", object_property_add_str(obj, "firmware",
machine_get_firmware, machine_set_firmware, NULL); machine_get_firmware,
machine_set_firmware, NULL);
object_property_add_bool(obj, "iommu",
machine_get_iommu,
machine_set_iommu, NULL);
} }
static void machine_finalize(Object *obj) static void machine_finalize(Object *obj)

View file

@ -2,6 +2,7 @@ obj-$(CONFIG_KVM) += kvm/
obj-y += multiboot.o smbios.o obj-y += multiboot.o smbios.o
obj-y += pc.o pc_piix.o pc_q35.o obj-y += pc.o pc_piix.o pc_q35.o
obj-y += pc_sysfw.o obj-y += pc_sysfw.o
obj-y += intel_iommu.o
obj-$(CONFIG_XEN) += ../xenpv/ xen/ obj-$(CONFIG_XEN) += ../xenpv/ xen/
obj-y += kvmvapic.o obj-y += kvmvapic.o

View file

@ -49,6 +49,7 @@
#include "hw/i386/ich9.h" #include "hw/i386/ich9.h"
#include "hw/pci/pci_bus.h" #include "hw/pci/pci_bus.h"
#include "hw/pci-host/q35.h" #include "hw/pci-host/q35.h"
#include "hw/i386/intel_iommu.h"
#include "hw/i386/q35-acpi-dsdt.hex" #include "hw/i386/q35-acpi-dsdt.hex"
#include "hw/i386/acpi-dsdt.hex" #include "hw/i386/acpi-dsdt.hex"
@ -1387,6 +1388,30 @@ build_mcfg_q35(GArray *table_data, GArray *linker, AcpiMcfgInfo *info)
build_header(linker, table_data, (void *)mcfg, sig, len, 1); build_header(linker, table_data, (void *)mcfg, sig, len, 1);
} }
static void
build_dmar_q35(GArray *table_data, GArray *linker)
{
int dmar_start = table_data->len;
AcpiTableDmar *dmar;
AcpiDmarHardwareUnit *drhd;
dmar = acpi_data_push(table_data, sizeof(*dmar));
dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1;
dmar->flags = 0; /* No intr_remap for now */
/* DMAR Remapping Hardware Unit Definition structure */
drhd = acpi_data_push(table_data, sizeof(*drhd));
drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT);
drhd->length = cpu_to_le16(sizeof(*drhd)); /* No device scope now */
drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
drhd->pci_segment = cpu_to_le16(0);
drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR);
build_header(linker, table_data, (void *)(table_data->data + dmar_start),
"DMAR", table_data->len - dmar_start, 1);
}
static void static void
build_dsdt(GArray *table_data, GArray *linker, AcpiMiscInfo *misc) build_dsdt(GArray *table_data, GArray *linker, AcpiMiscInfo *misc)
{ {
@ -1508,6 +1533,16 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg)
return true; return true;
} }
static bool acpi_has_iommu(void)
{
bool ambiguous;
Object *intel_iommu;
intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE,
&ambiguous);
return intel_iommu && !ambiguous;
}
static static
void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables)
{ {
@ -1584,6 +1619,10 @@ void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables)
acpi_add_table(table_offsets, tables->table_data); acpi_add_table(table_offsets, tables->table_data);
build_mcfg_q35(tables->table_data, tables->linker, &mcfg); build_mcfg_q35(tables->table_data, tables->linker, &mcfg);
} }
if (acpi_has_iommu()) {
acpi_add_table(table_offsets, tables->table_data);
build_dmar_q35(tables->table_data, tables->linker);
}
/* Add tables supplied by user (if any) */ /* Add tables supplied by user (if any) */
for (u = acpi_table_first(); u; u = acpi_table_next(u)) { for (u = acpi_table_first(); u; u = acpi_table_next(u)) {

View file

@ -325,4 +325,44 @@ struct Acpi20Tcpa {
} QEMU_PACKED; } QEMU_PACKED;
typedef struct Acpi20Tcpa Acpi20Tcpa; typedef struct Acpi20Tcpa Acpi20Tcpa;
/* DMAR - DMA Remapping table r2.2 */
struct AcpiTableDmar {
ACPI_TABLE_HEADER_DEF
uint8_t host_address_width; /* Maximum DMA physical addressability */
uint8_t flags;
uint8_t reserved[10];
} QEMU_PACKED;
typedef struct AcpiTableDmar AcpiTableDmar;
/* Masks for Flags field above */
#define ACPI_DMAR_INTR_REMAP 1
#define ACPI_DMAR_X2APIC_OPT_OUT (1 << 1)
/* Values for sub-structure type for DMAR */
enum {
ACPI_DMAR_TYPE_HARDWARE_UNIT = 0, /* DRHD */
ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, /* RMRR */
ACPI_DMAR_TYPE_ATSR = 2, /* ATSR */
ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3, /* RHSR */
ACPI_DMAR_TYPE_ANDD = 4, /* ANDD */
ACPI_DMAR_TYPE_RESERVED = 5 /* Reserved for furture use */
};
/*
* Sub-structures for DMAR
*/
/* Type 0: Hardware Unit Definition */
struct AcpiDmarHardwareUnit {
uint16_t type;
uint16_t length;
uint8_t flags;
uint8_t reserved;
uint16_t pci_segment; /* The PCI Segment associated with this unit */
uint64_t address; /* Base address of remapping hardware register-set */
} QEMU_PACKED;
typedef struct AcpiDmarHardwareUnit AcpiDmarHardwareUnit;
/* Masks for Flags field above */
#define ACPI_DMAR_INCLUDE_PCI_ALL 1
#endif #endif

1963
hw/i386/intel_iommu.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,389 @@
/*
* QEMU emulation of an Intel IOMMU (VT-d)
* (DMA Remapping device)
*
* Copyright (C) 2013 Knut Omang, Oracle <knut.omang@oracle.com>
* Copyright (C) 2014 Le Tan, <tamlokveer@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along
* with this program; if not, see <http://www.gnu.org/licenses/>.
*
* Lots of defines copied from kernel/include/linux/intel-iommu.h:
* Copyright (C) 2006-2008 Intel Corporation
* Author: Ashok Raj <ashok.raj@intel.com>
* Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
*
*/
#ifndef HW_I386_INTEL_IOMMU_INTERNAL_H
#define HW_I386_INTEL_IOMMU_INTERNAL_H
#include "hw/i386/intel_iommu.h"
/*
* Intel IOMMU register specification
*/
#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */
#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */
#define DMAR_CAP_REG_HI 0xc /* High 32-bit of DMAR_CAP_REG */
#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */
#define DMAR_ECAP_REG_HI 0X14
#define DMAR_GCMD_REG 0x18 /* Global command */
#define DMAR_GSTS_REG 0x1c /* Global status */
#define DMAR_RTADDR_REG 0x20 /* Root entry table */
#define DMAR_RTADDR_REG_HI 0X24
#define DMAR_CCMD_REG 0x28 /* Context command */
#define DMAR_CCMD_REG_HI 0x2c
#define DMAR_FSTS_REG 0x34 /* Fault status */
#define DMAR_FECTL_REG 0x38 /* Fault control */
#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data */
#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr */
#define DMAR_FEUADDR_REG 0x44 /* Upper address */
#define DMAR_AFLOG_REG 0x58 /* Advanced fault control */
#define DMAR_AFLOG_REG_HI 0X5c
#define DMAR_PMEN_REG 0x64 /* Enable protected memory region */
#define DMAR_PLMBASE_REG 0x68 /* PMRR low addr */
#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
#define DMAR_PHMBASE_REG 0x70 /* PMRR high base addr */
#define DMAR_PHMBASE_REG_HI 0X74
#define DMAR_PHMLIMIT_REG 0x78 /* PMRR high limit */
#define DMAR_PHMLIMIT_REG_HI 0x7c
#define DMAR_IQH_REG 0x80 /* Invalidation queue head */
#define DMAR_IQH_REG_HI 0X84
#define DMAR_IQT_REG 0x88 /* Invalidation queue tail */
#define DMAR_IQT_REG_HI 0X8c
#define DMAR_IQA_REG 0x90 /* Invalidation queue addr */
#define DMAR_IQA_REG_HI 0x94
#define DMAR_ICS_REG 0x9c /* Invalidation complete status */
#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr */
#define DMAR_IRTA_REG_HI 0xbc
#define DMAR_IECTL_REG 0xa0 /* Invalidation event control */
#define DMAR_IEDATA_REG 0xa4 /* Invalidation event data */
#define DMAR_IEADDR_REG 0xa8 /* Invalidation event address */
#define DMAR_IEUADDR_REG 0xac /* Invalidation event address */
#define DMAR_PQH_REG 0xc0 /* Page request queue head */
#define DMAR_PQH_REG_HI 0xc4
#define DMAR_PQT_REG 0xc8 /* Page request queue tail*/
#define DMAR_PQT_REG_HI 0xcc
#define DMAR_PQA_REG 0xd0 /* Page request queue address */
#define DMAR_PQA_REG_HI 0xd4
#define DMAR_PRS_REG 0xdc /* Page request status */
#define DMAR_PECTL_REG 0xe0 /* Page request event control */
#define DMAR_PEDATA_REG 0xe4 /* Page request event data */
#define DMAR_PEADDR_REG 0xe8 /* Page request event address */
#define DMAR_PEUADDR_REG 0xec /* Page event upper address */
#define DMAR_MTRRCAP_REG 0x100 /* MTRR capability */
#define DMAR_MTRRCAP_REG_HI 0x104
#define DMAR_MTRRDEF_REG 0x108 /* MTRR default type */
#define DMAR_MTRRDEF_REG_HI 0x10c
/* IOTLB registers */
#define DMAR_IOTLB_REG_OFFSET 0xf0 /* Offset to the IOTLB registers */
#define DMAR_IVA_REG DMAR_IOTLB_REG_OFFSET /* Invalidate address */
#define DMAR_IVA_REG_HI (DMAR_IVA_REG + 4)
/* IOTLB invalidate register */
#define DMAR_IOTLB_REG (DMAR_IOTLB_REG_OFFSET + 0x8)
#define DMAR_IOTLB_REG_HI (DMAR_IOTLB_REG + 4)
/* FRCD */
#define DMAR_FRCD_REG_OFFSET 0x220 /* Offset to the fault recording regs */
/* NOTICE: If you change the DMAR_FRCD_REG_NR, please remember to change the
* DMAR_REG_SIZE in include/hw/i386/intel_iommu.h.
* #define DMAR_REG_SIZE (DMAR_FRCD_REG_OFFSET + 16 * DMAR_FRCD_REG_NR)
*/
#define DMAR_FRCD_REG_NR 1ULL /* Num of fault recording regs */
#define DMAR_FRCD_REG_0_0 0x220 /* The 0th fault recording regs */
#define DMAR_FRCD_REG_0_1 0x224
#define DMAR_FRCD_REG_0_2 0x228
#define DMAR_FRCD_REG_0_3 0x22c
/* Interrupt Address Range */
#define VTD_INTERRUPT_ADDR_FIRST 0xfee00000ULL
#define VTD_INTERRUPT_ADDR_LAST 0xfeefffffULL
/* The shift of source_id in the key of IOTLB hash table */
#define VTD_IOTLB_SID_SHIFT 36
#define VTD_IOTLB_MAX_SIZE 1024 /* Max size of the hash table */
/* IOTLB_REG */
#define VTD_TLB_GLOBAL_FLUSH (1ULL << 60) /* Global invalidation */
#define VTD_TLB_DSI_FLUSH (2ULL << 60) /* Domain-selective */
#define VTD_TLB_PSI_FLUSH (3ULL << 60) /* Page-selective */
#define VTD_TLB_FLUSH_GRANU_MASK (3ULL << 60)
#define VTD_TLB_GLOBAL_FLUSH_A (1ULL << 57)
#define VTD_TLB_DSI_FLUSH_A (2ULL << 57)
#define VTD_TLB_PSI_FLUSH_A (3ULL << 57)
#define VTD_TLB_FLUSH_GRANU_MASK_A (3ULL << 57)
#define VTD_TLB_IVT (1ULL << 63)
#define VTD_TLB_DID(val) (((val) >> 32) & VTD_DOMAIN_ID_MASK)
/* IVA_REG */
#define VTD_IVA_ADDR(val) ((val) & ~0xfffULL & ((1ULL << VTD_MGAW) - 1))
#define VTD_IVA_AM(val) ((val) & 0x3fULL)
/* GCMD_REG */
#define VTD_GCMD_TE (1UL << 31)
#define VTD_GCMD_SRTP (1UL << 30)
#define VTD_GCMD_SFL (1UL << 29)
#define VTD_GCMD_EAFL (1UL << 28)
#define VTD_GCMD_WBF (1UL << 27)
#define VTD_GCMD_QIE (1UL << 26)
#define VTD_GCMD_IRE (1UL << 25)
#define VTD_GCMD_SIRTP (1UL << 24)
#define VTD_GCMD_CFI (1UL << 23)
/* GSTS_REG */
#define VTD_GSTS_TES (1UL << 31)
#define VTD_GSTS_RTPS (1UL << 30)
#define VTD_GSTS_FLS (1UL << 29)
#define VTD_GSTS_AFLS (1UL << 28)
#define VTD_GSTS_WBFS (1UL << 27)
#define VTD_GSTS_QIES (1UL << 26)
#define VTD_GSTS_IRES (1UL << 25)
#define VTD_GSTS_IRTPS (1UL << 24)
#define VTD_GSTS_CFIS (1UL << 23)
/* CCMD_REG */
#define VTD_CCMD_ICC (1ULL << 63)
#define VTD_CCMD_GLOBAL_INVL (1ULL << 61)
#define VTD_CCMD_DOMAIN_INVL (2ULL << 61)
#define VTD_CCMD_DEVICE_INVL (3ULL << 61)
#define VTD_CCMD_CIRG_MASK (3ULL << 61)
#define VTD_CCMD_GLOBAL_INVL_A (1ULL << 59)
#define VTD_CCMD_DOMAIN_INVL_A (2ULL << 59)
#define VTD_CCMD_DEVICE_INVL_A (3ULL << 59)
#define VTD_CCMD_CAIG_MASK (3ULL << 59)
#define VTD_CCMD_DID(val) ((val) & VTD_DOMAIN_ID_MASK)
#define VTD_CCMD_SID(val) (((val) >> 16) & 0xffffULL)
#define VTD_CCMD_FM(val) (((val) >> 32) & 3ULL)
/* RTADDR_REG */
#define VTD_RTADDR_RTT (1ULL << 11)
#define VTD_RTADDR_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL)
/* ECAP_REG */
/* (offset >> 4) << 8 */
#define VTD_ECAP_IRO (DMAR_IOTLB_REG_OFFSET << 4)
#define VTD_ECAP_QI (1ULL << 1)
/* CAP_REG */
/* (offset >> 4) << 24 */
#define VTD_CAP_FRO (DMAR_FRCD_REG_OFFSET << 20)
#define VTD_CAP_NFR ((DMAR_FRCD_REG_NR - 1) << 40)
#define VTD_DOMAIN_ID_SHIFT 16 /* 16-bit domain id for 64K domains */
#define VTD_DOMAIN_ID_MASK ((1UL << VTD_DOMAIN_ID_SHIFT) - 1)
#define VTD_CAP_ND (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL)
#define VTD_MGAW 39 /* Maximum Guest Address Width */
#define VTD_CAP_MGAW (((VTD_MGAW - 1) & 0x3fULL) << 16)
#define VTD_MAMV 9ULL
#define VTD_CAP_MAMV (VTD_MAMV << 48)
#define VTD_CAP_PSI (1ULL << 39)
/* Supported Adjusted Guest Address Widths */
#define VTD_CAP_SAGAW_SHIFT 8
#define VTD_CAP_SAGAW_MASK (0x1fULL << VTD_CAP_SAGAW_SHIFT)
/* 39-bit AGAW, 3-level page-table */
#define VTD_CAP_SAGAW_39bit (0x2ULL << VTD_CAP_SAGAW_SHIFT)
/* 48-bit AGAW, 4-level page-table */
#define VTD_CAP_SAGAW_48bit (0x4ULL << VTD_CAP_SAGAW_SHIFT)
#define VTD_CAP_SAGAW VTD_CAP_SAGAW_39bit
/* IQT_REG */
#define VTD_IQT_QT(val) (((val) >> 4) & 0x7fffULL)
/* IQA_REG */
#define VTD_IQA_IQA_MASK (VTD_HAW_MASK ^ 0xfffULL)
#define VTD_IQA_QS 0x7ULL
/* IQH_REG */
#define VTD_IQH_QH_SHIFT 4
#define VTD_IQH_QH_MASK 0x7fff0ULL
/* ICS_REG */
#define VTD_ICS_IWC 1UL
/* IECTL_REG */
#define VTD_IECTL_IM (1UL << 31)
#define VTD_IECTL_IP (1UL << 30)
/* FSTS_REG */
#define VTD_FSTS_FRI_MASK 0xff00UL
#define VTD_FSTS_FRI(val) ((((uint32_t)(val)) << 8) & VTD_FSTS_FRI_MASK)
#define VTD_FSTS_IQE (1UL << 4)
#define VTD_FSTS_PPF (1UL << 1)
#define VTD_FSTS_PFO 1UL
/* FECTL_REG */
#define VTD_FECTL_IM (1UL << 31)
#define VTD_FECTL_IP (1UL << 30)
/* Fault Recording Register */
/* For the high 64-bit of 128-bit */
#define VTD_FRCD_F (1ULL << 63)
#define VTD_FRCD_T (1ULL << 62)
#define VTD_FRCD_FR(val) (((val) & 0xffULL) << 32)
#define VTD_FRCD_SID_MASK 0xffffULL
#define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK)
/* For the low 64-bit of 128-bit */
#define VTD_FRCD_FI(val) ((val) & (((1ULL << VTD_MGAW) - 1) ^ 0xfffULL))
/* DMA Remapping Fault Conditions */
typedef enum VTDFaultReason {
VTD_FR_RESERVED = 0, /* Reserved for Advanced Fault logging */
VTD_FR_ROOT_ENTRY_P = 1, /* The Present(P) field of root-entry is 0 */
VTD_FR_CONTEXT_ENTRY_P, /* The Present(P) field of context-entry is 0 */
VTD_FR_CONTEXT_ENTRY_INV, /* Invalid programming of a context-entry */
VTD_FR_ADDR_BEYOND_MGAW, /* Input-address above (2^x-1) */
VTD_FR_WRITE, /* No write permission */
VTD_FR_READ, /* No read permission */
/* Fail to access a second-level paging entry (not SL_PML4E) */
VTD_FR_PAGING_ENTRY_INV,
VTD_FR_ROOT_TABLE_INV, /* Fail to access a root-entry */
VTD_FR_CONTEXT_TABLE_INV, /* Fail to access a context-entry */
/* Non-zero reserved field in a present root-entry */
VTD_FR_ROOT_ENTRY_RSVD,
/* Non-zero reserved field in a present context-entry */
VTD_FR_CONTEXT_ENTRY_RSVD,
/* Non-zero reserved field in a second-level paging entry with at lease one
* Read(R) and Write(W) or Execute(E) field is Set.
*/
VTD_FR_PAGING_ENTRY_RSVD,
/* Translation request or translated request explicitly blocked dut to the
* programming of the Translation Type (T) field in the present
* context-entry.
*/
VTD_FR_CONTEXT_ENTRY_TT,
/* This is not a normal fault reason. We use this to indicate some faults
* that are not referenced by the VT-d specification.
* Fault event with such reason should not be recorded.
*/
VTD_FR_RESERVED_ERR,
VTD_FR_MAX, /* Guard */
} VTDFaultReason;
#define VTD_CONTEXT_CACHE_GEN_MAX 0xffffffffUL
/* Queued Invalidation Descriptor */
struct VTDInvDesc {
uint64_t lo;
uint64_t hi;
};
typedef struct VTDInvDesc VTDInvDesc;
/* Masks for struct VTDInvDesc */
#define VTD_INV_DESC_TYPE 0xf
#define VTD_INV_DESC_CC 0x1 /* Context-cache Invalidate Desc */
#define VTD_INV_DESC_IOTLB 0x2
#define VTD_INV_DESC_WAIT 0x5 /* Invalidation Wait Descriptor */
#define VTD_INV_DESC_NONE 0 /* Not an Invalidate Descriptor */
/* Masks for Invalidation Wait Descriptor*/
#define VTD_INV_DESC_WAIT_SW (1ULL << 5)
#define VTD_INV_DESC_WAIT_IF (1ULL << 4)
#define VTD_INV_DESC_WAIT_FN (1ULL << 6)
#define VTD_INV_DESC_WAIT_DATA_SHIFT 32
#define VTD_INV_DESC_WAIT_RSVD_LO 0Xffffff80ULL
#define VTD_INV_DESC_WAIT_RSVD_HI 3ULL
/* Masks for Context-cache Invalidation Descriptor */
#define VTD_INV_DESC_CC_G (3ULL << 4)
#define VTD_INV_DESC_CC_GLOBAL (1ULL << 4)
#define VTD_INV_DESC_CC_DOMAIN (2ULL << 4)
#define VTD_INV_DESC_CC_DEVICE (3ULL << 4)
#define VTD_INV_DESC_CC_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK)
#define VTD_INV_DESC_CC_SID(val) (((val) >> 32) & 0xffffUL)
#define VTD_INV_DESC_CC_FM(val) (((val) >> 48) & 3UL)
#define VTD_INV_DESC_CC_RSVD 0xfffc00000000ffc0ULL
/* Masks for IOTLB Invalidate Descriptor */
#define VTD_INV_DESC_IOTLB_G (3ULL << 4)
#define VTD_INV_DESC_IOTLB_GLOBAL (1ULL << 4)
#define VTD_INV_DESC_IOTLB_DOMAIN (2ULL << 4)
#define VTD_INV_DESC_IOTLB_PAGE (3ULL << 4)
#define VTD_INV_DESC_IOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK)
#define VTD_INV_DESC_IOTLB_ADDR(val) ((val) & ~0xfffULL & \
((1ULL << VTD_MGAW) - 1))
#define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL)
#define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000ff00ULL
#define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL
/* Information about page-selective IOTLB invalidate */
struct VTDIOTLBPageInvInfo {
uint16_t domain_id;
uint64_t gfn;
uint8_t mask;
};
typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo;
/* Pagesize of VTD paging structures, including root and context tables */
#define VTD_PAGE_SHIFT 12
#define VTD_PAGE_SIZE (1ULL << VTD_PAGE_SHIFT)
#define VTD_PAGE_SHIFT_4K 12
#define VTD_PAGE_MASK_4K (~((1ULL << VTD_PAGE_SHIFT_4K) - 1))
#define VTD_PAGE_SHIFT_2M 21
#define VTD_PAGE_MASK_2M (~((1ULL << VTD_PAGE_SHIFT_2M) - 1))
#define VTD_PAGE_SHIFT_1G 30
#define VTD_PAGE_MASK_1G (~((1ULL << VTD_PAGE_SHIFT_1G) - 1))
struct VTDRootEntry {
uint64_t val;
uint64_t rsvd;
};
typedef struct VTDRootEntry VTDRootEntry;
/* Masks for struct VTDRootEntry */
#define VTD_ROOT_ENTRY_P 1ULL
#define VTD_ROOT_ENTRY_CTP (~0xfffULL)
#define VTD_ROOT_ENTRY_NR (VTD_PAGE_SIZE / sizeof(VTDRootEntry))
#define VTD_ROOT_ENTRY_RSVD (0xffeULL | ~VTD_HAW_MASK)
/* Masks for struct VTDContextEntry */
/* lo */
#define VTD_CONTEXT_ENTRY_P (1ULL << 0)
#define VTD_CONTEXT_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */
#define VTD_CONTEXT_ENTRY_TT (3ULL << 2) /* Translation Type */
#define VTD_CONTEXT_TT_MULTI_LEVEL 0
#define VTD_CONTEXT_TT_DEV_IOTLB 1
#define VTD_CONTEXT_TT_PASS_THROUGH 2
/* Second Level Page Translation Pointer*/
#define VTD_CONTEXT_ENTRY_SLPTPTR (~0xfffULL)
#define VTD_CONTEXT_ENTRY_RSVD_LO (0xff0ULL | ~VTD_HAW_MASK)
/* hi */
#define VTD_CONTEXT_ENTRY_AW 7ULL /* Adjusted guest-address-width */
#define VTD_CONTEXT_ENTRY_DID(val) (((val) >> 8) & VTD_DOMAIN_ID_MASK)
#define VTD_CONTEXT_ENTRY_RSVD_HI 0xffffffffff000080ULL
#define VTD_CONTEXT_ENTRY_NR (VTD_PAGE_SIZE / sizeof(VTDContextEntry))
/* Paging Structure common */
#define VTD_SL_PT_PAGE_SIZE_MASK (1ULL << 7)
/* Bits to decide the offset for each level */
#define VTD_SL_LEVEL_BITS 9
/* Second Level Paging Structure */
#define VTD_SL_PML4_LEVEL 4
#define VTD_SL_PDP_LEVEL 3
#define VTD_SL_PD_LEVEL 2
#define VTD_SL_PT_LEVEL 1
#define VTD_SL_PT_ENTRY_NR 512
/* Masks for Second Level Paging Entry */
#define VTD_SL_RW_MASK 3ULL
#define VTD_SL_R 1ULL
#define VTD_SL_W (1ULL << 1)
#define VTD_SL_PT_BASE_ADDR_MASK (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK)
#define VTD_SL_IGN_COM 0xbff0000000000000ULL
#endif

View file

@ -188,20 +188,19 @@ bool vhost_net_query(VHostNetState *net, VirtIODevice *dev)
return vhost_dev_query(&net->dev, dev); return vhost_dev_query(&net->dev, dev);
} }
static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index)
{
net->dev.vq_index = vq_index;
}
static int vhost_net_start_one(struct vhost_net *net, static int vhost_net_start_one(struct vhost_net *net,
VirtIODevice *dev, VirtIODevice *dev)
int vq_index)
{ {
struct vhost_vring_file file = { }; struct vhost_vring_file file = { };
int r; int r;
if (net->dev.started) {
return 0;
}
net->dev.nvqs = 2; net->dev.nvqs = 2;
net->dev.vqs = net->vqs; net->dev.vqs = net->vqs;
net->dev.vq_index = vq_index;
r = vhost_dev_enable_notifiers(&net->dev, dev); r = vhost_dev_enable_notifiers(&net->dev, dev);
if (r < 0) { if (r < 0) {
@ -256,10 +255,6 @@ static void vhost_net_stop_one(struct vhost_net *net,
{ {
struct vhost_vring_file file = { .fd = -1 }; struct vhost_vring_file file = { .fd = -1 };
if (!net->dev.started) {
return;
}
if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) { if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) {
for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
const VhostOps *vhost_ops = net->dev.vhost_ops; const VhostOps *vhost_ops = net->dev.vhost_ops;
@ -309,11 +304,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
} }
for (i = 0; i < total_queues; i++) { for (i = 0; i < total_queues; i++) {
r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev, i * 2); vhost_net_set_vq_index(get_vhost_net(ncs[i].peer), i * 2);
if (r < 0) {
goto err;
}
} }
r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true); r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true);
@ -322,6 +313,14 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
goto err; goto err;
} }
for (i = 0; i < total_queues; i++) {
r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev);
if (r < 0) {
goto err;
}
}
return 0; return 0;
err: err:
@ -339,16 +338,16 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
int i, r; int i, r;
for (i = 0; i < total_queues; i++) {
vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
}
r = k->set_guest_notifiers(qbus->parent, total_queues * 2, false); r = k->set_guest_notifiers(qbus->parent, total_queues * 2, false);
if (r < 0) { if (r < 0) {
fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r); fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
fflush(stderr); fflush(stderr);
} }
assert(r >= 0); assert(r >= 0);
for (i = 0; i < total_queues; i++) {
vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
}
} }
void vhost_net_cleanup(struct vhost_net *net) void vhost_net_cleanup(struct vhost_net *net)

View file

@ -1224,7 +1224,12 @@ static void virtio_net_tx_timer(void *opaque)
VirtIONetQueue *q = opaque; VirtIONetQueue *q = opaque;
VirtIONet *n = q->n; VirtIONet *n = q->n;
VirtIODevice *vdev = VIRTIO_DEVICE(n); VirtIODevice *vdev = VIRTIO_DEVICE(n);
assert(vdev->vm_running); /* This happens when device was stopped but BH wasn't. */
if (!vdev->vm_running) {
/* Make sure tx waiting is set, so we'll run when restarted. */
assert(q->tx_waiting);
return;
}
q->tx_waiting = 0; q->tx_waiting = 0;
@ -1244,7 +1249,12 @@ static void virtio_net_tx_bh(void *opaque)
VirtIODevice *vdev = VIRTIO_DEVICE(n); VirtIODevice *vdev = VIRTIO_DEVICE(n);
int32_t ret; int32_t ret;
assert(vdev->vm_running); /* This happens when device was stopped but BH wasn't. */
if (!vdev->vm_running) {
/* Make sure tx waiting is set, so we'll run when restarted. */
assert(q->tx_waiting);
return;
}
q->tx_waiting = 0; q->tx_waiting = 0;

View file

@ -3,8 +3,4 @@
#include "hw/pci/pcie_port.h" #include "hw/pci/pcie_port.h"
PCIESlot *ioh3420_init(PCIBus *bus, int devfn, bool multifunction,
const char *bus_name, pci_map_irq_fn map_irq,
uint8_t port, uint8_t chassis, uint16_t slot);
#endif /* QEMU_IOH3420_H */ #endif /* QEMU_IOH3420_H */

View file

@ -204,7 +204,8 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
return &is->iommu_as; return &is->iommu_as;
} }
static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr) static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr,
bool is_write)
{ {
IOMMUState *is = container_of(iommu, IOMMUState, iommu); IOMMUState *is = container_of(iommu, IOMMUState, iommu);
hwaddr baseaddr, offset; hwaddr baseaddr, offset;

View file

@ -347,6 +347,49 @@ static void mch_reset(DeviceState *qdev)
mch_update(mch); mch_update(mch);
} }
static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
{
IntelIOMMUState *s = opaque;
VTDAddressSpace **pvtd_as;
int bus_num = pci_bus_num(bus);
assert(0 <= bus_num && bus_num <= VTD_PCI_BUS_MAX);
assert(0 <= devfn && devfn <= VTD_PCI_DEVFN_MAX);
pvtd_as = s->address_spaces[bus_num];
if (!pvtd_as) {
/* No corresponding free() */
pvtd_as = g_malloc0(sizeof(VTDAddressSpace *) * VTD_PCI_DEVFN_MAX);
s->address_spaces[bus_num] = pvtd_as;
}
if (!pvtd_as[devfn]) {
pvtd_as[devfn] = g_malloc0(sizeof(VTDAddressSpace));
pvtd_as[devfn]->bus_num = (uint8_t)bus_num;
pvtd_as[devfn]->devfn = (uint8_t)devfn;
pvtd_as[devfn]->iommu_state = s;
pvtd_as[devfn]->context_cache_entry.context_cache_gen = 0;
memory_region_init_iommu(&pvtd_as[devfn]->iommu, OBJECT(s),
&s->iommu_ops, "intel_iommu", UINT64_MAX);
address_space_init(&pvtd_as[devfn]->as,
&pvtd_as[devfn]->iommu, "intel_iommu");
}
return &pvtd_as[devfn]->as;
}
static void mch_init_dmar(MCHPCIState *mch)
{
PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch)));
mch->iommu = INTEL_IOMMU_DEVICE(qdev_create(NULL, TYPE_INTEL_IOMMU_DEVICE));
object_property_add_child(OBJECT(mch), "intel-iommu",
OBJECT(mch->iommu), NULL);
qdev_init_nofail(DEVICE(mch->iommu));
sysbus_mmio_map(SYS_BUS_DEVICE(mch->iommu), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu);
}
static int mch_init(PCIDevice *d) static int mch_init(PCIDevice *d)
{ {
int i; int i;
@ -363,12 +406,17 @@ static int mch_init(PCIDevice *d)
memory_region_add_subregion_overlap(mch->system_memory, 0xa0000, memory_region_add_subregion_overlap(mch->system_memory, 0xa0000,
&mch->smram_region, 1); &mch->smram_region, 1);
memory_region_set_enabled(&mch->smram_region, false); memory_region_set_enabled(&mch->smram_region, false);
init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory, mch->pci_address_space, init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory,
&mch->pam_regions[0], PAM_BIOS_BASE, PAM_BIOS_SIZE); mch->pci_address_space, &mch->pam_regions[0],
PAM_BIOS_BASE, PAM_BIOS_SIZE);
for (i = 0; i < 12; ++i) { for (i = 0; i < 12; ++i) {
init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory, mch->pci_address_space, init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory,
&mch->pam_regions[i+1], PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, mch->pci_address_space, &mch->pam_regions[i+1],
PAM_EXPAN_SIZE); PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
}
/* Intel IOMMU (VT-d) */
if (qemu_opt_get_bool(qemu_get_machine_opts(), "iommu", false)) {
mch_init_dmar(mch);
} }
return 0; return 0;
} }

View file

@ -1146,9 +1146,10 @@ uint32_t pci_default_read_config(PCIDevice *d,
return le32_to_cpu(val); return le32_to_cpu(val);
} }
void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l) void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int l)
{ {
int i, was_irq_disabled = pci_irq_disabled(d); int i, was_irq_disabled = pci_irq_disabled(d);
uint32_t val = val_in;
for (i = 0; i < l; val >>= 8, ++i) { for (i = 0; i < l; val >>= 8, ++i) {
uint8_t wmask = d->wmask[addr + i]; uint8_t wmask = d->wmask[addr + i];
@ -1170,8 +1171,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
& PCI_COMMAND_MASTER); & PCI_COMMAND_MASTER);
} }
msi_write_config(d, addr, val, l); msi_write_config(d, addr, val_in, l);
msix_write_config(d, addr, val, l); msix_write_config(d, addr, val_in, l);
} }
/***********************************************************/ /***********************************************************/

View file

@ -59,7 +59,8 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
return NULL; return NULL;
} }
static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr) static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
bool is_write)
{ {
sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu); sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
uint64_t tce; uint64_t tce;

View file

@ -976,7 +976,6 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n) bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
{ {
struct vhost_virtqueue *vq = hdev->vqs + n - hdev->vq_index; struct vhost_virtqueue *vq = hdev->vqs + n - hdev->vq_index;
assert(hdev->started);
assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs); assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
return event_notifier_test_and_clear(&vq->masked_notifier); return event_notifier_test_and_clear(&vq->masked_notifier);
} }
@ -988,7 +987,6 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
struct VirtQueue *vvq = virtio_get_queue(vdev, n); struct VirtQueue *vvq = virtio_get_queue(vdev, n);
int r, index = n - hdev->vq_index; int r, index = n - hdev->vq_index;
assert(hdev->started);
assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs); assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
struct vhost_vring_file file = { struct vhost_vring_file file = {

View file

@ -129,7 +129,7 @@ typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps;
struct MemoryRegionIOMMUOps { struct MemoryRegionIOMMUOps {
/* Return a TLB entry that contains a given address. */ /* Return a TLB entry that contains a given address. */
IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr); IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write);
}; };
typedef struct CoalescedMemoryRange CoalescedMemoryRange; typedef struct CoalescedMemoryRange CoalescedMemoryRange;

View file

@ -123,6 +123,7 @@ struct MachineState {
bool mem_merge; bool mem_merge;
bool usb; bool usb;
char *firmware; char *firmware;
bool iommu;
ram_addr_t ram_size; ram_addr_t ram_size;
ram_addr_t maxram_size; ram_addr_t maxram_size;

View file

@ -0,0 +1,120 @@
/*
* QEMU emulation of an Intel IOMMU (VT-d)
* (DMA Remapping device)
*
* Copyright (C) 2013 Knut Omang, Oracle <knut.omang@oracle.com>
* Copyright (C) 2014 Le Tan, <tamlokveer@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along
* with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INTEL_IOMMU_H
#define INTEL_IOMMU_H
#include "hw/qdev.h"
#include "sysemu/dma.h"
#define TYPE_INTEL_IOMMU_DEVICE "intel-iommu"
#define INTEL_IOMMU_DEVICE(obj) \
OBJECT_CHECK(IntelIOMMUState, (obj), TYPE_INTEL_IOMMU_DEVICE)
/* DMAR Hardware Unit Definition address (IOMMU unit) */
#define Q35_HOST_BRIDGE_IOMMU_ADDR 0xfed90000ULL
#define VTD_PCI_BUS_MAX 256
#define VTD_PCI_SLOT_MAX 32
#define VTD_PCI_FUNC_MAX 8
#define VTD_PCI_DEVFN_MAX 256
#define VTD_PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
#define VTD_PCI_FUNC(devfn) ((devfn) & 0x07)
#define VTD_SID_TO_BUS(sid) (((sid) >> 8) && 0xff)
#define VTD_SID_TO_DEVFN(sid) ((sid) & 0xff)
#define DMAR_REG_SIZE 0x230
#define VTD_HOST_ADDRESS_WIDTH 39
#define VTD_HAW_MASK ((1ULL << VTD_HOST_ADDRESS_WIDTH) - 1)
typedef struct VTDContextEntry VTDContextEntry;
typedef struct VTDContextCacheEntry VTDContextCacheEntry;
typedef struct IntelIOMMUState IntelIOMMUState;
typedef struct VTDAddressSpace VTDAddressSpace;
typedef struct VTDIOTLBEntry VTDIOTLBEntry;
/* Context-Entry */
struct VTDContextEntry {
uint64_t lo;
uint64_t hi;
};
struct VTDContextCacheEntry {
/* The cache entry is obsolete if
* context_cache_gen!=IntelIOMMUState.context_cache_gen
*/
uint32_t context_cache_gen;
struct VTDContextEntry context_entry;
};
struct VTDAddressSpace {
uint8_t bus_num;
uint8_t devfn;
AddressSpace as;
MemoryRegion iommu;
IntelIOMMUState *iommu_state;
VTDContextCacheEntry context_cache_entry;
};
struct VTDIOTLBEntry {
uint64_t gfn;
uint16_t domain_id;
uint64_t slpte;
bool read_flags;
bool write_flags;
};
/* The iommu (DMAR) device state struct */
struct IntelIOMMUState {
SysBusDevice busdev;
MemoryRegion csrmem;
uint8_t csr[DMAR_REG_SIZE]; /* register values */
uint8_t wmask[DMAR_REG_SIZE]; /* R/W bytes */
uint8_t w1cmask[DMAR_REG_SIZE]; /* RW1C(Write 1 to Clear) bytes */
uint8_t womask[DMAR_REG_SIZE]; /* WO (write only - read returns 0) */
uint32_t version;
dma_addr_t root; /* Current root table pointer */
bool root_extended; /* Type of root table (extended or not) */
bool dmar_enabled; /* Set if DMA remapping is enabled */
uint16_t iq_head; /* Current invalidation queue head */
uint16_t iq_tail; /* Current invalidation queue tail */
dma_addr_t iq; /* Current invalidation queue pointer */
uint16_t iq_size; /* IQ Size in number of entries */
bool qi_enabled; /* Set if the QI is enabled */
uint8_t iq_last_desc_type; /* The type of last completed descriptor */
/* The index of the Fault Recording Register to be used next.
* Wraps around from N-1 to 0, where N is the number of FRCD_REG.
*/
uint16_t next_frcd_reg;
uint64_t cap; /* The value of capability reg */
uint64_t ecap; /* The value of extended capability reg */
uint32_t context_cache_gen; /* Should be in [1,MAX] */
GHashTable *iotlb; /* IOTLB */
MemoryRegionIOMMUOps iommu_ops;
VTDAddressSpace **address_spaces[VTD_PCI_BUS_MAX];
};
#endif

View file

@ -33,6 +33,7 @@
#include "hw/acpi/acpi.h" #include "hw/acpi/acpi.h"
#include "hw/acpi/ich9.h" #include "hw/acpi/ich9.h"
#include "hw/pci-host/pam.h" #include "hw/pci-host/pam.h"
#include "hw/i386/intel_iommu.h"
#define TYPE_Q35_HOST_DEVICE "q35-pcihost" #define TYPE_Q35_HOST_DEVICE "q35-pcihost"
#define Q35_HOST_DEVICE(obj) \ #define Q35_HOST_DEVICE(obj) \
@ -60,6 +61,7 @@ typedef struct MCHPCIState {
uint64_t pci_hole64_size; uint64_t pci_hole64_size;
PcGuestInfo *guest_info; PcGuestInfo *guest_info;
uint32_t short_root_bus; uint32_t short_root_bus;
IntelIOMMUState *iommu;
} MCHPCIState; } MCHPCIState;
typedef struct Q35PCIHost { typedef struct Q35PCIHost {

View file

@ -35,7 +35,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
" kernel_irqchip=on|off controls accelerated irqchip support\n" " kernel_irqchip=on|off controls accelerated irqchip support\n"
" kvm_shadow_mem=size of KVM shadow MMU\n" " kvm_shadow_mem=size of KVM shadow MMU\n"
" dump-guest-core=on|off include guest memory in a core dump (default=on)\n" " dump-guest-core=on|off include guest memory in a core dump (default=on)\n"
" mem-merge=on|off controls memory merge support (default: on)\n", " mem-merge=on|off controls memory merge support (default: on)\n"
" iommu=on|off controls emulated Intel IOMMU (VT-d) support (default=off)\n",
QEMU_ARCH_ALL) QEMU_ARCH_ALL)
STEXI STEXI
@item -machine [type=]@var{name}[,prop=@var{value}[,...]] @item -machine [type=]@var{name}[,prop=@var{value}[,...]]
@ -58,6 +59,8 @@ Include guest memory in a core dump. The default is on.
Enables or disables memory merge support. This feature, when supported by Enables or disables memory merge support. This feature, when supported by
the host, de-duplicates identical memory pages among VMs instances the host, de-duplicates identical memory pages among VMs instances
(enabled by default). (enabled by default).
@item iommu=on|off
Enables or disables emulated Intel IOMMU (VT-d) support. The default is off.
@end table @end table
ETEXI ETEXI

4
vl.c
View file

@ -388,6 +388,10 @@ static QemuOptsList qemu_machine_opts = {
.name = PC_MACHINE_MAX_RAM_BELOW_4G, .name = PC_MACHINE_MAX_RAM_BELOW_4G,
.type = QEMU_OPT_SIZE, .type = QEMU_OPT_SIZE,
.help = "maximum ram below the 4G boundary (32bit boundary)", .help = "maximum ram below the 4G boundary (32bit boundary)",
},{
.name = "iommu",
.type = QEMU_OPT_BOOL,
.help = "Set on/off to enable/disable Intel IOMMU (VT-d)",
}, },
{ /* End of list */ } { /* End of list */ }
}, },