Machine queue for 2.10

-----BEGIN PGP SIGNATURE-----
 
 iQIcBAABCAAGBQJY+QGLAAoJECgHk2+YTcWm9U0QAKaZOefXp73ZKJOtYuS+oF5w
 L57dGuULzpxzOlQXug2aHNBRaaBPcNqGI/LvId6eWQBDHEqSWCmpfq5DXsM3qNCq
 PCDbveo1gn582MGPgRKYHeN8wqcyt32mpd6TgjBPIhmcLwCR9meCtRvFMDykFuLK
 +2typZrWLUqsMh0tRz+mVnbkIeBIN4um85Bqx9gknUHerc+jr2boIpzpvHHNlx4q
 37JcIgXFnhz3sjMwuRyK/XihbeMiC1zK6XYxZLbn8JEL9tVBSAiv0PUguJwyjLVP
 zftENpiXf4fqURZ/cia43Oy+MQDJEKhiw+oIikGDd1NqAmgCLHKYNQ6lZUL6x+ja
 xuyh4sLZgA0o8pPJBuiPseFe3ZcOUSw+TAoTnOorOfYrc65meFJ2byc7STVbh/G9
 hGTBfz19mvAtgMk5JsaJsB5MB12vCDizoTxgF35yOq4+jdOQUKGRLIYHt80Q37jl
 /5JbWC0xVP1BsCYhsnpleA7V8d7/V1AUaxiVZYeOZiFV/ALzEFxlyQYmaDhjjQEe
 FIdFea1eGrTX7P93uGoFDokIkXn9OxD+PvJxrCy076g+noFCERaX4y3MePjOlpm1
 gj5A+JTVPvDgB0H53+sDT6i6cqLGqNJPPuIPtX5vQEGFkKIVHNj67NRZlxgWdFj9
 lgE7/vHttYI2zUfgFZv+
 =Em1p
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/ehabkost/tags/machine-pull-request' into staging

Machine queue for 2.10

# gpg: Signature made Thu 20 Apr 2017 19:44:27 BST
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/machine-pull-request:
  qdev: Constify local variable returned by blk_bs
  qdev: Constify value passed to qdev_prop_set_macaddr
  hostmem: use host_memory_backend_mr_inited() where proper
  hostmem: introduce host_memory_backend_mr_inited()
  hw/core/null-machine: Print error message when using the -kernel parameter
  qdev: Make "hotplugged" property read-only
  intel_iommu: enable remote IOTLB
  intel_iommu: allow dynamic switch of IOMMU region
  intel_iommu: provide its own replay() callback
  intel_iommu: use the correct memory region for device IOTLB notification
  memory: add MemoryRegionIOMMUOps.replay() callback
  memory: introduce memory_region_notify_one()
  memory: provide iommu_replay_all()
  memory: provide IOMMU_NOTIFIER_FOREACH macro
  memory: add section range info for IOMMU notifier

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2017-04-21 10:23:56 +01:00
commit 7cd37925a1
16 changed files with 584 additions and 54 deletions

View file

@ -51,7 +51,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
#ifndef CONFIG_LINUX
error_setg(errp, "-mem-path not supported on this host");
#else
if (!memory_region_size(&backend->mr)) {
if (!host_memory_backend_mr_inited(backend)) {
gchar *path;
backend->force_prealloc = mem_prealloc;
path = object_get_canonical_path(OBJECT(backend));
@ -76,7 +76,7 @@ static void set_mem_path(Object *o, const char *str, Error **errp)
HostMemoryBackend *backend = MEMORY_BACKEND(o);
HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
if (memory_region_size(&backend->mr)) {
if (host_memory_backend_mr_inited(backend)) {
error_setg(errp, "cannot change property value");
return;
}
@ -96,7 +96,7 @@ static void file_memory_backend_set_share(Object *o, bool value, Error **errp)
HostMemoryBackend *backend = MEMORY_BACKEND(o);
HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
if (memory_region_size(&backend->mr)) {
if (host_memory_backend_mr_inited(backend)) {
error_setg(errp, "cannot change property value");
return;
}

View file

@ -45,7 +45,7 @@ host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
Error *local_err = NULL;
uint64_t value;
if (memory_region_size(&backend->mr)) {
if (host_memory_backend_mr_inited(backend)) {
error_setg(&local_err, "cannot change property value");
goto out;
}
@ -146,7 +146,7 @@ static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
if (!memory_region_size(&backend->mr)) {
if (!host_memory_backend_mr_inited(backend)) {
backend->merge = value;
return;
}
@ -172,7 +172,7 @@ static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
if (!memory_region_size(&backend->mr)) {
if (!host_memory_backend_mr_inited(backend)) {
backend->dump = value;
return;
}
@ -208,7 +208,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
}
}
if (!memory_region_size(&backend->mr)) {
if (!host_memory_backend_mr_inited(backend)) {
backend->prealloc = value;
return;
}
@ -237,10 +237,19 @@ static void host_memory_backend_init(Object *obj)
backend->prealloc = mem_prealloc;
}
bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
{
/*
* NOTE: We forbid zero-length memory backend, so here zero means
* "we haven't inited the backend memory region yet".
*/
return memory_region_size(&backend->mr) != 0;
}
MemoryRegion *
host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
{
return memory_region_size(&backend->mr) ? &backend->mr : NULL;
return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
}
void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)

View file

@ -40,6 +40,12 @@ static void machine_none_init(MachineState *mch)
memory_region_allocate_system_memory(ram, NULL, "ram", mch->ram_size);
memory_region_add_subregion(get_system_memory(), 0, ram);
}
if (mch->kernel_filename) {
error_report("The -kernel parameter is not supported "
"(use the generic 'loader' device instead).");
exit(1);
}
}
static void machine_none_machine_init(MachineClass *mc)

View file

@ -409,7 +409,7 @@ void qdev_prop_set_drive(DeviceState *dev, const char *name,
if (value) {
ref = blk_name(value);
if (!*ref) {
BlockDriverState *bs = blk_bs(value);
const BlockDriverState *bs = blk_bs(value);
if (bs) {
ref = bdrv_get_node_name(bs);
}

View file

@ -1010,7 +1010,8 @@ void qdev_prop_set_string(DeviceState *dev, const char *name, const char *value)
object_property_set_str(OBJECT(dev), value, name, &error_abort);
}
void qdev_prop_set_macaddr(DeviceState *dev, const char *name, uint8_t *value)
void qdev_prop_set_macaddr(DeviceState *dev, const char *name,
const uint8_t *value)
{
char str[2 * 6 + 5 + 1];
snprintf(str, sizeof(str), "%02x:%02x:%02x:%02x:%02x:%02x",

View file

@ -1037,13 +1037,6 @@ static bool device_get_hotplugged(Object *obj, Error **err)
return dev->hotplugged;
}
static void device_set_hotplugged(Object *obj, bool value, Error **err)
{
DeviceState *dev = DEVICE(obj);
dev->hotplugged = value;
}
static void device_initfn(Object *obj)
{
DeviceState *dev = DEVICE(obj);
@ -1063,7 +1056,7 @@ static void device_initfn(Object *obj)
object_property_add_bool(obj, "hotpluggable",
device_get_hotpluggable, NULL, NULL);
object_property_add_bool(obj, "hotplugged",
device_get_hotplugged, device_set_hotplugged,
device_get_hotplugged, NULL,
&error_abort);
class = object_get_class(OBJECT(dev));

View file

@ -595,6 +595,22 @@ static inline uint32_t vtd_get_agaw_from_context_entry(VTDContextEntry *ce)
return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9;
}
static inline uint64_t vtd_iova_limit(VTDContextEntry *ce)
{
uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce);
return 1ULL << MIN(ce_agaw, VTD_MGAW);
}
/* Return true if IOVA passes range check, otherwise false. */
static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce)
{
/*
* Check if @iova is above 2^X-1, where X is the minimum of MGAW
* in CAP_REG and AW in context-entry.
*/
return !(iova & ~(vtd_iova_limit(ce) - 1));
}
static const uint64_t vtd_paging_entry_rsvd_field[] = {
[0] = ~0ULL,
/* For not large page */
@ -630,13 +646,9 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
uint32_t level = vtd_get_level_from_context_entry(ce);
uint32_t offset;
uint64_t slpte;
uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce);
uint64_t access_right_check;
/* Check if @iova is above 2^X-1, where X is the minimum of MGAW
* in CAP_REG and AW in context-entry.
*/
if (iova & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) {
if (!vtd_iova_range_check(iova, ce)) {
VTD_DPRINTF(GENERAL, "error: iova 0x%"PRIx64 " exceeds limits", iova);
return -VTD_FR_ADDR_BEYOND_MGAW;
}
@ -684,6 +696,135 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
}
}
typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private);
/**
* vtd_page_walk_level - walk over specific level for IOVA range
*
* @addr: base GPA addr to start the walk
* @start: IOVA range start address
* @end: IOVA range end address (start <= addr < end)
* @hook_fn: hook func to be called when detected page
* @private: private data to be passed into hook func
* @read: whether parent level has read permission
* @write: whether parent level has write permission
* @notify_unmap: whether we should notify invalid entries
*/
static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
uint64_t end, vtd_page_walk_hook hook_fn,
void *private, uint32_t level,
bool read, bool write, bool notify_unmap)
{
bool read_cur, write_cur, entry_valid;
uint32_t offset;
uint64_t slpte;
uint64_t subpage_size, subpage_mask;
IOMMUTLBEntry entry;
uint64_t iova = start;
uint64_t iova_next;
int ret = 0;
trace_vtd_page_walk_level(addr, level, start, end);
subpage_size = 1ULL << vtd_slpt_level_shift(level);
subpage_mask = vtd_slpt_level_page_mask(level);
while (iova < end) {
iova_next = (iova & subpage_mask) + subpage_size;
offset = vtd_iova_level_offset(iova, level);
slpte = vtd_get_slpte(addr, offset);
if (slpte == (uint64_t)-1) {
trace_vtd_page_walk_skip_read(iova, iova_next);
goto next;
}
if (vtd_slpte_nonzero_rsvd(slpte, level)) {
trace_vtd_page_walk_skip_reserve(iova, iova_next);
goto next;
}
/* Permissions are stacked with parents' */
read_cur = read && (slpte & VTD_SL_R);
write_cur = write && (slpte & VTD_SL_W);
/*
* As long as we have either read/write permission, this is a
* valid entry. The rule works for both page entries and page
* table entries.
*/
entry_valid = read_cur | write_cur;
if (vtd_is_last_slpte(slpte, level)) {
entry.target_as = &address_space_memory;
entry.iova = iova & subpage_mask;
/* NOTE: this is only meaningful if entry_valid == true */
entry.translated_addr = vtd_get_slpte_addr(slpte);
entry.addr_mask = ~subpage_mask;
entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
if (!entry_valid && !notify_unmap) {
trace_vtd_page_walk_skip_perm(iova, iova_next);
goto next;
}
trace_vtd_page_walk_one(level, entry.iova, entry.translated_addr,
entry.addr_mask, entry.perm);
if (hook_fn) {
ret = hook_fn(&entry, private);
if (ret < 0) {
return ret;
}
}
} else {
if (!entry_valid) {
trace_vtd_page_walk_skip_perm(iova, iova_next);
goto next;
}
ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte), iova,
MIN(iova_next, end), hook_fn, private,
level - 1, read_cur, write_cur,
notify_unmap);
if (ret < 0) {
return ret;
}
}
next:
iova = iova_next;
}
return 0;
}
/**
* vtd_page_walk - walk specific IOVA range, and call the hook
*
* @ce: context entry to walk upon
* @start: IOVA address to start the walk
* @end: IOVA range end address (start <= addr < end)
* @hook_fn: the hook that to be called for each detected area
* @private: private data for the hook function
*/
static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end,
vtd_page_walk_hook hook_fn, void *private,
bool notify_unmap)
{
dma_addr_t addr = vtd_get_slpt_base_from_context(ce);
uint32_t level = vtd_get_level_from_context_entry(ce);
if (!vtd_iova_range_check(start, ce)) {
return -VTD_FR_ADDR_BEYOND_MGAW;
}
if (!vtd_iova_range_check(end, ce)) {
/* Fix end so that it reaches the maximum */
end = vtd_iova_limit(ce);
}
return vtd_page_walk_level(addr, start, end, hook_fn, private,
level, true, true, notify_unmap);
}
/* Map a device to its corresponding domain (context-entry) */
static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
uint8_t devfn, VTDContextEntry *ce)
@ -898,6 +1039,15 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
s->intr_root, s->intr_size);
}
static void vtd_iommu_replay_all(IntelIOMMUState *s)
{
IntelIOMMUNotifierNode *node;
QLIST_FOREACH(node, &s->notifiers_list, next) {
memory_region_iommu_replay_all(&node->vtd_as->iommu);
}
}
static void vtd_context_global_invalidate(IntelIOMMUState *s)
{
trace_vtd_inv_desc_cc_global();
@ -905,6 +1055,14 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
vtd_reset_context_cache(s);
}
/*
* From VT-d spec 6.5.2.1, a global context entry invalidation
* should be followed by a IOTLB global invalidation, so we should
* be safe even without this. Hoewever, let's replay the region as
* well to be safer, and go back here when we need finer tunes for
* VT-d emulation codes.
*/
vtd_iommu_replay_all(s);
}
@ -971,6 +1129,16 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
VTD_PCI_FUNC(devfn_it));
vtd_as->context_cache_entry.context_cache_gen = 0;
/*
* So a device is moving out of (or moving into) a
* domain, a replay() suites here to notify all the
* IOMMU_NOTIFIER_MAP registers about this change.
* This won't bring bad even if we have no such
* notifier registered - the IOMMU notification
* framework will skip MAP notifications if that
* happened.
*/
memory_region_iommu_replay_all(&vtd_as->iommu);
}
}
}
@ -1012,12 +1180,53 @@ static void vtd_iotlb_global_invalidate(IntelIOMMUState *s)
{
trace_vtd_iotlb_reset("global invalidation recved");
vtd_reset_iotlb(s);
vtd_iommu_replay_all(s);
}
static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
{
IntelIOMMUNotifierNode *node;
VTDContextEntry ce;
VTDAddressSpace *vtd_as;
g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
&domain_id);
QLIST_FOREACH(node, &s->notifiers_list, next) {
vtd_as = node->vtd_as;
if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce) &&
domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
memory_region_iommu_replay_all(&vtd_as->iommu);
}
}
}
static int vtd_page_invalidate_notify_hook(IOMMUTLBEntry *entry,
void *private)
{
memory_region_notify_iommu((MemoryRegion *)private, *entry);
return 0;
}
static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
uint16_t domain_id, hwaddr addr,
uint8_t am)
{
IntelIOMMUNotifierNode *node;
VTDContextEntry ce;
int ret;
QLIST_FOREACH(node, &(s->notifiers_list), next) {
VTDAddressSpace *vtd_as = node->vtd_as;
ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce);
if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
vtd_page_walk(&ce, addr, addr + (1 << am) * VTD_PAGE_SIZE,
vtd_page_invalidate_notify_hook,
(void *)&vtd_as->iommu, true);
}
}
}
static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
@ -1030,6 +1239,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
info.addr = addr;
info.mask = ~((1 << am) - 1);
g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
}
/* Flush IOTLB
@ -1151,9 +1361,49 @@ static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s)
vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS);
}
static void vtd_switch_address_space(VTDAddressSpace *as)
{
assert(as);
trace_vtd_switch_address_space(pci_bus_num(as->bus),
VTD_PCI_SLOT(as->devfn),
VTD_PCI_FUNC(as->devfn),
as->iommu_state->dmar_enabled);
/* Turn off first then on the other */
if (as->iommu_state->dmar_enabled) {
memory_region_set_enabled(&as->sys_alias, false);
memory_region_set_enabled(&as->iommu, true);
} else {
memory_region_set_enabled(&as->iommu, false);
memory_region_set_enabled(&as->sys_alias, true);
}
}
static void vtd_switch_address_space_all(IntelIOMMUState *s)
{
GHashTableIter iter;
VTDBus *vtd_bus;
int i;
g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) {
if (!vtd_bus->dev_as[i]) {
continue;
}
vtd_switch_address_space(vtd_bus->dev_as[i]);
}
}
}
/* Handle Translation Enable/Disable */
static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
{
if (s->dmar_enabled == en) {
return;
}
VTD_DPRINTF(CSR, "Translation Enable %s", (en ? "on" : "off"));
if (en) {
@ -1168,6 +1418,8 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
/* Ok - report back to driver */
vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0);
}
vtd_switch_address_space_all(s);
}
/* Handle Interrupt Remap Enable/Disable */
@ -1457,7 +1709,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
entry.iova = addr;
entry.perm = IOMMU_NONE;
entry.translated_addr = 0;
memory_region_notify_iommu(entry.target_as->root, entry);
memory_region_notify_iommu(&vtd_dev_as->iommu, entry);
done:
return true;
@ -2005,15 +2257,33 @@ static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu,
IOMMUNotifierFlag new)
{
VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
IntelIOMMUState *s = vtd_as->iommu_state;
IntelIOMMUNotifierNode *node = NULL;
IntelIOMMUNotifierNode *next_node = NULL;
if (new & IOMMU_NOTIFIER_MAP) {
error_report("Device at bus %s addr %02x.%d requires iommu "
"notifier which is currently not supported by "
"intel-iommu emulation",
vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn),
PCI_FUNC(vtd_as->devfn));
if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) {
error_report("We need to set cache_mode=1 for intel-iommu to enable "
"device assignment with IOMMU protection.");
exit(1);
}
if (old == IOMMU_NOTIFIER_NONE) {
node = g_malloc0(sizeof(*node));
node->vtd_as = vtd_as;
QLIST_INSERT_HEAD(&s->notifiers_list, node, next);
return;
}
/* update notifier node with new flags */
QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) {
if (node->vtd_as == vtd_as) {
if (new == IOMMU_NOTIFIER_NONE) {
QLIST_REMOVE(node, next);
g_free(node);
}
return;
}
}
}
static const VMStateDescription vtd_vmstate = {
@ -2389,19 +2659,150 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
vtd_dev_as->devfn = (uint8_t)devfn;
vtd_dev_as->iommu_state = s;
vtd_dev_as->context_cache_entry.context_cache_gen = 0;
/*
* Memory region relationships looks like (Address range shows
* only lower 32 bits to make it short in length...):
*
* |-----------------+-------------------+----------|
* | Name | Address range | Priority |
* |-----------------+-------------------+----------+
* | vtd_root | 00000000-ffffffff | 0 |
* | intel_iommu | 00000000-ffffffff | 1 |
* | vtd_sys_alias | 00000000-ffffffff | 1 |
* | intel_iommu_ir | fee00000-feefffff | 64 |
* |-----------------+-------------------+----------|
*
* We enable/disable DMAR by switching enablement for
* vtd_sys_alias and intel_iommu regions. IR region is always
* enabled.
*/
memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s),
&s->iommu_ops, "intel_iommu", UINT64_MAX);
&s->iommu_ops, "intel_iommu_dmar",
UINT64_MAX);
memory_region_init_alias(&vtd_dev_as->sys_alias, OBJECT(s),
"vtd_sys_alias", get_system_memory(),
0, memory_region_size(get_system_memory()));
memory_region_init_io(&vtd_dev_as->iommu_ir, OBJECT(s),
&vtd_mem_ir_ops, s, "intel_iommu_ir",
VTD_INTERRUPT_ADDR_SIZE);
memory_region_add_subregion(&vtd_dev_as->iommu, VTD_INTERRUPT_ADDR_FIRST,
&vtd_dev_as->iommu_ir);
address_space_init(&vtd_dev_as->as,
&vtd_dev_as->iommu, name);
memory_region_init(&vtd_dev_as->root, OBJECT(s),
"vtd_root", UINT64_MAX);
memory_region_add_subregion_overlap(&vtd_dev_as->root,
VTD_INTERRUPT_ADDR_FIRST,
&vtd_dev_as->iommu_ir, 64);
address_space_init(&vtd_dev_as->as, &vtd_dev_as->root, name);
memory_region_add_subregion_overlap(&vtd_dev_as->root, 0,
&vtd_dev_as->sys_alias, 1);
memory_region_add_subregion_overlap(&vtd_dev_as->root, 0,
&vtd_dev_as->iommu, 1);
vtd_switch_address_space(vtd_dev_as);
}
return vtd_dev_as;
}
/* Unmap the whole range in the notifier's scope. */
static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
{
IOMMUTLBEntry entry;
hwaddr size;
hwaddr start = n->start;
hwaddr end = n->end;
/*
* Note: all the codes in this function has a assumption that IOVA
* bits are no more than VTD_MGAW bits (which is restricted by
* VT-d spec), otherwise we need to consider overflow of 64 bits.
*/
if (end > VTD_ADDRESS_SIZE) {
/*
* Don't need to unmap regions that is bigger than the whole
* VT-d supported address space size
*/
end = VTD_ADDRESS_SIZE;
}
assert(start <= end);
size = end - start;
if (ctpop64(size) != 1) {
/*
* This size cannot format a correct mask. Let's enlarge it to
* suite the minimum available mask.
*/
int n = 64 - clz64(size);
if (n > VTD_MGAW) {
/* should not happen, but in case it happens, limit it */
n = VTD_MGAW;
}
size = 1ULL << n;
}
entry.target_as = &address_space_memory;
/* Adjust iova for the size */
entry.iova = n->start & ~(size - 1);
/* This field is meaningless for unmap */
entry.translated_addr = 0;
entry.perm = IOMMU_NONE;
entry.addr_mask = size - 1;
trace_vtd_as_unmap_whole(pci_bus_num(as->bus),
VTD_PCI_SLOT(as->devfn),
VTD_PCI_FUNC(as->devfn),
entry.iova, size);
memory_region_notify_one(n, &entry);
}
static void vtd_address_space_unmap_all(IntelIOMMUState *s)
{
IntelIOMMUNotifierNode *node;
VTDAddressSpace *vtd_as;
IOMMUNotifier *n;
QLIST_FOREACH(node, &s->notifiers_list, next) {
vtd_as = node->vtd_as;
IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) {
vtd_address_space_unmap(vtd_as, n);
}
}
}
static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private)
{
memory_region_notify_one((IOMMUNotifier *)private, entry);
return 0;
}
static void vtd_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n)
{
VTDAddressSpace *vtd_as = container_of(mr, VTDAddressSpace, iommu);
IntelIOMMUState *s = vtd_as->iommu_state;
uint8_t bus_n = pci_bus_num(vtd_as->bus);
VTDContextEntry ce;
/*
* The replay can be triggered by either a invalidation or a newly
* created entry. No matter what, we release existing mappings
* (it means flushing caches for UNMAP-only registers).
*/
vtd_address_space_unmap(vtd_as, n);
if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) {
trace_vtd_replay_ce_valid(bus_n, PCI_SLOT(vtd_as->devfn),
PCI_FUNC(vtd_as->devfn),
VTD_CONTEXT_ENTRY_DID(ce.hi),
ce.hi, ce.lo);
vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false);
} else {
trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn),
PCI_FUNC(vtd_as->devfn));
}
return;
}
/* Do the initialization. It will also be called when reset, so pay
* attention when adding new initialization stuff.
*/
@ -2416,6 +2817,7 @@ static void vtd_init(IntelIOMMUState *s)
s->iommu_ops.translate = vtd_iommu_translate;
s->iommu_ops.notify_flag_changed = vtd_iommu_notify_flag_changed;
s->iommu_ops.replay = vtd_iommu_replay;
s->root = 0;
s->root_extended = false;
s->dmar_enabled = false;
@ -2511,6 +2913,11 @@ static void vtd_reset(DeviceState *dev)
VTD_DPRINTF(GENERAL, "");
vtd_init(s);
/*
* When device reset, throw away all mappings and external caches
*/
vtd_address_space_unmap_all(s);
}
static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
@ -2574,6 +2981,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
return;
}
QLIST_INIT(&s->notifiers_list);
memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
"intel_iommu", DMAR_REG_SIZE);

View file

@ -197,6 +197,7 @@
#define VTD_DOMAIN_ID_MASK ((1UL << VTD_DOMAIN_ID_SHIFT) - 1)
#define VTD_CAP_ND (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL)
#define VTD_MGAW 39 /* Maximum Guest Address Width */
#define VTD_ADDRESS_SIZE (1ULL << VTD_MGAW)
#define VTD_CAP_MGAW (((VTD_MGAW - 1) & 0x3fULL) << 16)
#define VTD_MAMV 18ULL
#define VTD_CAP_MAMV (VTD_MAMV << 48)

View file

@ -4,7 +4,6 @@
x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32
# hw/i386/intel_iommu.c
vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
vtd_inv_desc(const char *type, uint64_t hi, uint64_t lo) "invalidate desc type %s high 0x%"PRIx64" low 0x%"PRIx64
vtd_inv_desc_invalid(uint64_t hi, uint64_t lo) "invalid inv desc hi 0x%"PRIx64" lo 0x%"PRIx64
vtd_inv_desc_cc_domain(uint16_t domain) "context invalidate domain 0x%"PRIx16
@ -30,6 +29,15 @@ vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32
vtd_iotlb_cc_update(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen1, uint32_t gen2) "IOTLB context update bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32" -> gen %"PRIu32
vtd_iotlb_reset(const char *reason) "IOTLB reset (reason: %s)"
vtd_fault_disabled(void) "Fault processing disabled for context entry"
vtd_replay_ce_valid(uint8_t bus, uint8_t dev, uint8_t fn, uint16_t domain, uint64_t hi, uint64_t lo) "replay valid context device %02"PRIx8":%02"PRIx8".%02"PRIx8" domain 0x%"PRIx16" hi 0x%"PRIx64" lo 0x%"PRIx64
vtd_replay_ce_invalid(uint8_t bus, uint8_t dev, uint8_t fn) "replay invalid context device %02"PRIx8":%02"PRIx8".%02"PRIx8
vtd_page_walk_level(uint64_t addr, uint32_t level, uint64_t start, uint64_t end) "walk (base=0x%"PRIx64", level=%"PRIu32") iova range 0x%"PRIx64" - 0x%"PRIx64
vtd_page_walk_one(uint32_t level, uint64_t iova, uint64_t gpa, uint64_t mask, int perm) "detected page level 0x%"PRIx32" iova 0x%"PRIx64" -> gpa 0x%"PRIx64" mask 0x%"PRIx64" perm %d"
vtd_page_walk_skip_read(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to unable to read"
vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to perm empty"
vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to rsrv set"
vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64
# hw/i386/amd_iommu.c
amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32

View file

@ -478,8 +478,13 @@ static void vfio_listener_region_add(MemoryListener *listener,
giommu->iommu_offset = section->offset_within_address_space -
section->offset_within_region;
giommu->container = container;
giommu->n.notify = vfio_iommu_map_notify;
giommu->n.notifier_flags = IOMMU_NOTIFIER_ALL;
llend = int128_add(int128_make64(section->offset_within_region),
section->size);
llend = int128_sub(llend, int128_one());
iommu_notifier_init(&giommu->n, vfio_iommu_map_notify,
IOMMU_NOTIFIER_ALL,
section->offset_within_region,
int128_get64(llend));
QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
@ -550,7 +555,8 @@ static void vfio_listener_region_del(MemoryListener *listener,
VFIOGuestIOMMU *giommu;
QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
if (giommu->iommu == section->mr) {
if (giommu->iommu == section->mr &&
giommu->n.start == section->offset_within_region) {
memory_region_unregister_iommu_notifier(giommu->iommu,
&giommu->n);
QLIST_REMOVE(giommu, giommu_next);

View file

@ -736,14 +736,20 @@ static void vhost_iommu_region_add(MemoryListener *listener,
struct vhost_dev *dev = container_of(listener, struct vhost_dev,
iommu_listener);
struct vhost_iommu *iommu;
Int128 end;
if (!memory_region_is_iommu(section->mr)) {
return;
}
iommu = g_malloc0(sizeof(*iommu));
iommu->n.notify = vhost_iommu_unmap_notify;
iommu->n.notifier_flags = IOMMU_NOTIFIER_UNMAP;
end = int128_add(int128_make64(section->offset_within_region),
section->size);
end = int128_sub(end, int128_one());
iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify,
IOMMU_NOTIFIER_UNMAP,
section->offset_within_region,
int128_get64(end));
iommu->mr = section->mr;
iommu->iommu_offset = section->offset_within_address_space -
section->offset_within_region;
@ -765,7 +771,8 @@ static void vhost_iommu_region_del(MemoryListener *listener,
}
QLIST_FOREACH(iommu, &dev->iommu_list, iommu_next) {
if (iommu->mr == section->mr) {
if (iommu->mr == section->mr &&
iommu->n.start == section->offset_within_region) {
memory_region_unregister_iommu_notifier(iommu->mr,
&iommu->n);
QLIST_REMOVE(iommu, iommu_next);

View file

@ -55,6 +55,8 @@ typedef enum {
IOMMU_RW = 3,
} IOMMUAccessFlags;
#define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | ((w) ? IOMMU_WO : 0))
struct IOMMUTLBEntry {
AddressSpace *target_as;
hwaddr iova;
@ -77,13 +79,30 @@ typedef enum {
#define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP)
struct IOMMUNotifier;
typedef void (*IOMMUNotify)(struct IOMMUNotifier *notifier,
IOMMUTLBEntry *data);
struct IOMMUNotifier {
void (*notify)(struct IOMMUNotifier *notifier, IOMMUTLBEntry *data);
IOMMUNotify notify;
IOMMUNotifierFlag notifier_flags;
/* Notify for address space range start <= addr <= end */
hwaddr start;
hwaddr end;
QLIST_ENTRY(IOMMUNotifier) node;
};
typedef struct IOMMUNotifier IOMMUNotifier;
static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
IOMMUNotifierFlag flags,
hwaddr start, hwaddr end)
{
n->notify = fn;
n->notifier_flags = flags;
n->start = start;
n->end = end;
}
/* New-style MMIO accessors can indicate that the transaction failed.
* A zero (MEMTX_OK) response means success; anything else is a failure
* of some kind. The memory subsystem will bitwise-OR together results
@ -174,6 +193,8 @@ struct MemoryRegionIOMMUOps {
void (*notify_flag_changed)(MemoryRegion *iommu,
IOMMUNotifierFlag old_flags,
IOMMUNotifierFlag new_flags);
/* Set this up to provide customized IOMMU replay function */
void (*replay)(MemoryRegion *iommu, IOMMUNotifier *notifier);
};
typedef struct CoalescedMemoryRange CoalescedMemoryRange;
@ -222,6 +243,9 @@ struct MemoryRegion {
IOMMUNotifierFlag iommu_notify_flags;
};
#define IOMMU_NOTIFIER_FOREACH(n, mr) \
QLIST_FOREACH((n), &(mr)->iommu_notify, node)
/**
* MemoryListener: callbacks structure for updates to the physical memory map
*
@ -667,6 +691,21 @@ uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr);
void memory_region_notify_iommu(MemoryRegion *mr,
IOMMUTLBEntry entry);
/**
* memory_region_notify_one: notify a change in an IOMMU translation
* entry to a single notifier
*
* This works just like memory_region_notify_iommu(), but it only
* notifies a specific notifier, not all of them.
*
* @notifier: the notifier to be notified
* @entry: the new entry in the IOMMU translation table. The entry
* replaces all old entries for the same virtual I/O address range.
* Deleted entries have .@perm == 0.
*/
void memory_region_notify_one(IOMMUNotifier *notifier,
IOMMUTLBEntry *entry);
/**
* memory_region_register_iommu_notifier: register a notifier for changes to
* IOMMU translation entries.
@ -692,6 +731,14 @@ void memory_region_register_iommu_notifier(MemoryRegion *mr,
void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
bool is_write);
/**
* memory_region_iommu_replay_all: replay existing IOMMU translations
* to all the notifiers registered.
*
* @mr: the memory region to observe
*/
void memory_region_iommu_replay_all(MemoryRegion *mr);
/**
* memory_region_unregister_iommu_notifier: unregister a notifier for
* changes to IOMMU translation entries.

View file

@ -63,6 +63,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
typedef struct VTDIrq VTDIrq;
typedef struct VTD_MSIMessage VTD_MSIMessage;
typedef struct IntelIOMMUNotifierNode IntelIOMMUNotifierNode;
/* Context-Entry */
struct VTDContextEntry {
@ -83,6 +84,8 @@ struct VTDAddressSpace {
uint8_t devfn;
AddressSpace as;
MemoryRegion iommu;
MemoryRegion root;
MemoryRegion sys_alias;
MemoryRegion iommu_ir; /* Interrupt region: 0xfeeXXXXX */
IntelIOMMUState *iommu_state;
VTDContextCacheEntry context_cache_entry;
@ -247,6 +250,11 @@ struct VTD_MSIMessage {
/* When IR is enabled, all MSI/MSI-X data bits should be zero */
#define VTD_IR_MSI_DATA (0)
struct IntelIOMMUNotifierNode {
VTDAddressSpace *vtd_as;
QLIST_ENTRY(IntelIOMMUNotifierNode) next;
};
/* The iommu (DMAR) device state struct */
struct IntelIOMMUState {
X86IOMMUState x86_iommu;
@ -284,6 +292,8 @@ struct IntelIOMMUState {
MemoryRegionIOMMUOps iommu_ops;
GHashTable *vtd_as_by_busptr; /* VTDBus objects indexed by PCIBus* reference */
VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */
/* list of registered notifiers */
QLIST_HEAD(, IntelIOMMUNotifierNode) notifiers_list;
/* interrupt remapping */
bool intr_enabled; /* Whether guest enabled IR */

View file

@ -188,7 +188,8 @@ void qdev_prop_set_chr(DeviceState *dev, const char *name, Chardev *value);
void qdev_prop_set_netdev(DeviceState *dev, const char *name, NetClientState *value);
void qdev_prop_set_drive(DeviceState *dev, const char *name,
BlockBackend *value, Error **errp);
void qdev_prop_set_macaddr(DeviceState *dev, const char *name, uint8_t *value);
void qdev_prop_set_macaddr(DeviceState *dev, const char *name,
const uint8_t *value);
void qdev_prop_set_enum(DeviceState *dev, const char *name, int value);
/* FIXME: Remove opaque pointer properties. */
void qdev_prop_set_ptr(DeviceState *dev, const char *name, void *value);

View file

@ -62,6 +62,7 @@ struct HostMemoryBackend {
MemoryRegion mr;
};
bool host_memory_backend_mr_inited(HostMemoryBackend *backend);
MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend,
Error **errp);

View file

@ -1583,7 +1583,7 @@ static void memory_region_update_iommu_notify_flags(MemoryRegion *mr)
IOMMUNotifierFlag flags = IOMMU_NOTIFIER_NONE;
IOMMUNotifier *iommu_notifier;
QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) {
IOMMU_NOTIFIER_FOREACH(iommu_notifier, mr) {
flags |= iommu_notifier->notifier_flags;
}
@ -1606,6 +1606,7 @@ void memory_region_register_iommu_notifier(MemoryRegion *mr,
/* We need to register for at least one bitfield */
assert(n->notifier_flags != IOMMU_NOTIFIER_NONE);
assert(n->start <= n->end);
QLIST_INSERT_HEAD(&mr->iommu_notify, n, node);
memory_region_update_iommu_notify_flags(mr);
}
@ -1625,6 +1626,12 @@ void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
hwaddr addr, granularity;
IOMMUTLBEntry iotlb;
/* If the IOMMU has its own replay callback, override */
if (mr->iommu_ops->replay) {
mr->iommu_ops->replay(mr, n);
return;
}
granularity = memory_region_iommu_get_min_page_size(mr);
for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
@ -1641,6 +1648,15 @@ void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
}
}
void memory_region_iommu_replay_all(MemoryRegion *mr)
{
IOMMUNotifier *notifier;
IOMMU_NOTIFIER_FOREACH(notifier, mr) {
memory_region_iommu_replay(mr, notifier, false);
}
}
void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
IOMMUNotifier *n)
{
@ -1652,24 +1668,40 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
memory_region_update_iommu_notify_flags(mr);
}
void memory_region_notify_iommu(MemoryRegion *mr,
IOMMUTLBEntry entry)
void memory_region_notify_one(IOMMUNotifier *notifier,
IOMMUTLBEntry *entry)
{
IOMMUNotifier *iommu_notifier;
IOMMUNotifierFlag request_flags;
assert(memory_region_is_iommu(mr));
/*
* Skip the notification if the notification does not overlap
* with registered range.
*/
if (notifier->start > entry->iova + entry->addr_mask + 1 ||
notifier->end < entry->iova) {
return;
}
if (entry.perm & IOMMU_RW) {
if (entry->perm & IOMMU_RW) {
request_flags = IOMMU_NOTIFIER_MAP;
} else {
request_flags = IOMMU_NOTIFIER_UNMAP;
}
QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) {
if (iommu_notifier->notifier_flags & request_flags) {
iommu_notifier->notify(iommu_notifier, &entry);
}
if (notifier->notifier_flags & request_flags) {
notifier->notify(notifier, entry);
}
}
void memory_region_notify_iommu(MemoryRegion *mr,
IOMMUTLBEntry entry)
{
IOMMUNotifier *iommu_notifier;
assert(memory_region_is_iommu(mr));
IOMMU_NOTIFIER_FOREACH(iommu_notifier, mr) {
memory_region_notify_one(iommu_notifier, &entry);
}
}