exec: separate sections and nodes per address space

Every address space has its own nodes and sections, but
it uses the same global arrays of nodes/section.

This limits the number of devices that can be attached
to the guest to 20-30 devices. It happens because:
 - The sections array is limited to 2^12 entries.
 - The main memory has at least 100 sections.
 - Each device address space is actually an alias to
   main memory, multiplying its number of nodes/sections.

Remove the limitation by using separate arrays of
nodes and sections for each address space.

Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
Marcel Apfelbaum 2013-12-01 14:02:23 +02:00 committed by Michael S. Tsirkin
parent 6307d974f9
commit 53cb28cbfe

155
exec.c
View file

@ -103,13 +103,21 @@ struct PhysPageEntry {
typedef PhysPageEntry Node[P_L2_SIZE]; typedef PhysPageEntry Node[P_L2_SIZE];
typedef struct PhysPageMap {
unsigned sections_nb;
unsigned sections_nb_alloc;
unsigned nodes_nb;
unsigned nodes_nb_alloc;
Node *nodes;
MemoryRegionSection *sections;
} PhysPageMap;
struct AddressSpaceDispatch { struct AddressSpaceDispatch {
/* This is a multi-level map on the physical address space. /* This is a multi-level map on the physical address space.
* The bottom level has pointers to MemoryRegionSections. * The bottom level has pointers to MemoryRegionSections.
*/ */
PhysPageEntry phys_map; PhysPageEntry phys_map;
Node *nodes; PhysPageMap map;
MemoryRegionSection *sections;
AddressSpace *as; AddressSpace *as;
}; };
@ -126,18 +134,6 @@ typedef struct subpage_t {
#define PHYS_SECTION_ROM 2 #define PHYS_SECTION_ROM 2
#define PHYS_SECTION_WATCH 3 #define PHYS_SECTION_WATCH 3
typedef struct PhysPageMap {
unsigned sections_nb;
unsigned sections_nb_alloc;
unsigned nodes_nb;
unsigned nodes_nb_alloc;
Node *nodes;
MemoryRegionSection *sections;
} PhysPageMap;
static PhysPageMap *prev_map;
static PhysPageMap next_map;
static void io_mem_init(void); static void io_mem_init(void);
static void memory_map_init(void); static void memory_map_init(void);
@ -146,35 +142,32 @@ static MemoryRegion io_mem_watch;
#if !defined(CONFIG_USER_ONLY) #if !defined(CONFIG_USER_ONLY)
static void phys_map_node_reserve(unsigned nodes) static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
{ {
if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) { if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2, map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
16); map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc, map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
next_map.nodes_nb + nodes);
next_map.nodes = g_renew(Node, next_map.nodes,
next_map.nodes_nb_alloc);
} }
} }
static uint32_t phys_map_node_alloc(void) static uint32_t phys_map_node_alloc(PhysPageMap *map)
{ {
unsigned i; unsigned i;
uint32_t ret; uint32_t ret;
ret = next_map.nodes_nb++; ret = map->nodes_nb++;
assert(ret != PHYS_MAP_NODE_NIL); assert(ret != PHYS_MAP_NODE_NIL);
assert(ret != next_map.nodes_nb_alloc); assert(ret != map->nodes_nb_alloc);
for (i = 0; i < P_L2_SIZE; ++i) { for (i = 0; i < P_L2_SIZE; ++i) {
next_map.nodes[ret][i].skip = 1; map->nodes[ret][i].skip = 1;
next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
} }
return ret; return ret;
} }
static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
hwaddr *nb, uint16_t leaf, hwaddr *index, hwaddr *nb, uint16_t leaf,
int level) int level)
{ {
PhysPageEntry *p; PhysPageEntry *p;
@ -182,8 +175,8 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
hwaddr step = (hwaddr)1 << (level * P_L2_BITS); hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) { if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
lp->ptr = phys_map_node_alloc(); lp->ptr = phys_map_node_alloc(map);
p = next_map.nodes[lp->ptr]; p = map->nodes[lp->ptr];
if (level == 0) { if (level == 0) {
for (i = 0; i < P_L2_SIZE; i++) { for (i = 0; i < P_L2_SIZE; i++) {
p[i].skip = 0; p[i].skip = 0;
@ -191,7 +184,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
} }
} }
} else { } else {
p = next_map.nodes[lp->ptr]; p = map->nodes[lp->ptr];
} }
lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)]; lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
@ -202,7 +195,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
*index += step; *index += step;
*nb -= step; *nb -= step;
} else { } else {
phys_page_set_level(lp, index, nb, leaf, level - 1); phys_page_set_level(map, lp, index, nb, leaf, level - 1);
} }
++lp; ++lp;
} }
@ -213,9 +206,9 @@ static void phys_page_set(AddressSpaceDispatch *d,
uint16_t leaf) uint16_t leaf)
{ {
/* Wildly overreserve - it doesn't matter much. */ /* Wildly overreserve - it doesn't matter much. */
phys_map_node_reserve(3 * P_L2_LEVELS); phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1); phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
} }
/* Compact a non leaf page entry. Simply detect that the entry has a single child, /* Compact a non leaf page entry. Simply detect that the entry has a single child,
@ -276,7 +269,7 @@ static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
DECLARE_BITMAP(compacted, nodes_nb); DECLARE_BITMAP(compacted, nodes_nb);
if (d->phys_map.skip) { if (d->phys_map.skip) {
phys_page_compact(&d->phys_map, d->nodes, compacted); phys_page_compact(&d->phys_map, d->map.nodes, compacted);
} }
} }
@ -317,10 +310,10 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
MemoryRegionSection *section; MemoryRegionSection *section;
subpage_t *subpage; subpage_t *subpage;
section = phys_page_find(d->phys_map, addr, d->nodes, d->sections); section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
if (resolve_subpage && section->mr->subpage) { if (resolve_subpage && section->mr->subpage) {
subpage = container_of(section->mr, subpage_t, iomem); subpage = container_of(section->mr, subpage_t, iomem);
section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]]; section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
} }
return section; return section;
} }
@ -788,7 +781,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
iotlb |= PHYS_SECTION_ROM; iotlb |= PHYS_SECTION_ROM;
} }
} else { } else {
iotlb = section - address_space_memory.dispatch->sections; iotlb = section - address_space_memory.dispatch->map.sections;
iotlb += xlat; iotlb += xlat;
} }
@ -827,23 +820,23 @@ void phys_mem_set_alloc(void *(*alloc)(size_t))
phys_mem_alloc = alloc; phys_mem_alloc = alloc;
} }
static uint16_t phys_section_add(MemoryRegionSection *section) static uint16_t phys_section_add(PhysPageMap *map,
MemoryRegionSection *section)
{ {
/* The physical section number is ORed with a page-aligned /* The physical section number is ORed with a page-aligned
* pointer to produce the iotlb entries. Thus it should * pointer to produce the iotlb entries. Thus it should
* never overflow into the page-aligned value. * never overflow into the page-aligned value.
*/ */
assert(next_map.sections_nb < TARGET_PAGE_SIZE); assert(map->sections_nb < TARGET_PAGE_SIZE);
if (next_map.sections_nb == next_map.sections_nb_alloc) { if (map->sections_nb == map->sections_nb_alloc) {
next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2, map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
16); map->sections = g_renew(MemoryRegionSection, map->sections,
next_map.sections = g_renew(MemoryRegionSection, next_map.sections, map->sections_nb_alloc);
next_map.sections_nb_alloc);
} }
next_map.sections[next_map.sections_nb] = *section; map->sections[map->sections_nb] = *section;
memory_region_ref(section->mr); memory_region_ref(section->mr);
return next_map.sections_nb++; return map->sections_nb++;
} }
static void phys_section_destroy(MemoryRegion *mr) static void phys_section_destroy(MemoryRegion *mr)
@ -865,7 +858,6 @@ static void phys_sections_free(PhysPageMap *map)
} }
g_free(map->sections); g_free(map->sections);
g_free(map->nodes); g_free(map->nodes);
g_free(map);
} }
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section) static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
@ -874,7 +866,7 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
hwaddr base = section->offset_within_address_space hwaddr base = section->offset_within_address_space
& TARGET_PAGE_MASK; & TARGET_PAGE_MASK;
MemoryRegionSection *existing = phys_page_find(d->phys_map, base, MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
next_map.nodes, next_map.sections); d->map.nodes, d->map.sections);
MemoryRegionSection subsection = { MemoryRegionSection subsection = {
.offset_within_address_space = base, .offset_within_address_space = base,
.size = int128_make64(TARGET_PAGE_SIZE), .size = int128_make64(TARGET_PAGE_SIZE),
@ -887,13 +879,14 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
subpage = subpage_init(d->as, base); subpage = subpage_init(d->as, base);
subsection.mr = &subpage->iomem; subsection.mr = &subpage->iomem;
phys_page_set(d, base >> TARGET_PAGE_BITS, 1, phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
phys_section_add(&subsection)); phys_section_add(&d->map, &subsection));
} else { } else {
subpage = container_of(existing->mr, subpage_t, iomem); subpage = container_of(existing->mr, subpage_t, iomem);
} }
start = section->offset_within_address_space & ~TARGET_PAGE_MASK; start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
end = start + int128_get64(section->size) - 1; end = start + int128_get64(section->size) - 1;
subpage_register(subpage, start, end, phys_section_add(section)); subpage_register(subpage, start, end,
phys_section_add(&d->map, section));
} }
@ -901,7 +894,7 @@ static void register_multipage(AddressSpaceDispatch *d,
MemoryRegionSection *section) MemoryRegionSection *section)
{ {
hwaddr start_addr = section->offset_within_address_space; hwaddr start_addr = section->offset_within_address_space;
uint16_t section_index = phys_section_add(section); uint16_t section_index = phys_section_add(&d->map, section);
uint64_t num_pages = int128_get64(int128_rshift(section->size, uint64_t num_pages = int128_get64(int128_rshift(section->size,
TARGET_PAGE_BITS)); TARGET_PAGE_BITS));
@ -1720,7 +1713,7 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
return mmio; return mmio;
} }
static uint16_t dummy_section(MemoryRegion *mr) static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
{ {
MemoryRegionSection section = { MemoryRegionSection section = {
.mr = mr, .mr = mr,
@ -1729,12 +1722,13 @@ static uint16_t dummy_section(MemoryRegion *mr)
.size = int128_2_64(), .size = int128_2_64(),
}; };
return phys_section_add(&section); return phys_section_add(map, &section);
} }
MemoryRegion *iotlb_to_region(hwaddr index) MemoryRegion *iotlb_to_region(hwaddr index)
{ {
return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr; return address_space_memory.dispatch->map.sections[
index & ~TARGET_PAGE_MASK].mr;
} }
static void io_mem_init(void) static void io_mem_init(void)
@ -1751,7 +1745,17 @@ static void io_mem_init(void)
static void mem_begin(MemoryListener *listener) static void mem_begin(MemoryListener *listener)
{ {
AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener); AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1); AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
uint16_t n;
n = dummy_section(&d->map, &io_mem_unassigned);
assert(n == PHYS_SECTION_UNASSIGNED);
n = dummy_section(&d->map, &io_mem_notdirty);
assert(n == PHYS_SECTION_NOTDIRTY);
n = dummy_section(&d->map, &io_mem_rom);
assert(n == PHYS_SECTION_ROM);
n = dummy_section(&d->map, &io_mem_watch);
assert(n == PHYS_SECTION_WATCH);
d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 }; d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
d->as = as; d->as = as;
@ -1764,39 +1768,14 @@ static void mem_commit(MemoryListener *listener)
AddressSpaceDispatch *cur = as->dispatch; AddressSpaceDispatch *cur = as->dispatch;
AddressSpaceDispatch *next = as->next_dispatch; AddressSpaceDispatch *next = as->next_dispatch;
next->nodes = next_map.nodes; phys_page_compact_all(next, next->map.nodes_nb);
next->sections = next_map.sections;
phys_page_compact_all(next, next_map.nodes_nb);
as->dispatch = next; as->dispatch = next;
g_free(cur);
}
static void core_begin(MemoryListener *listener) if (cur) {
{ phys_sections_free(&cur->map);
uint16_t n; g_free(cur);
}
prev_map = g_new(PhysPageMap, 1);
*prev_map = next_map;
memset(&next_map, 0, sizeof(next_map));
n = dummy_section(&io_mem_unassigned);
assert(n == PHYS_SECTION_UNASSIGNED);
n = dummy_section(&io_mem_notdirty);
assert(n == PHYS_SECTION_NOTDIRTY);
n = dummy_section(&io_mem_rom);
assert(n == PHYS_SECTION_ROM);
n = dummy_section(&io_mem_watch);
assert(n == PHYS_SECTION_WATCH);
}
/* This listener's commit run after the other AddressSpaceDispatch listeners'.
* All AddressSpaceDispatch instances have switched to the next map.
*/
static void core_commit(MemoryListener *listener)
{
phys_sections_free(prev_map);
} }
static void tcg_commit(MemoryListener *listener) static void tcg_commit(MemoryListener *listener)
@ -1824,8 +1803,6 @@ static void core_log_global_stop(MemoryListener *listener)
} }
static MemoryListener core_memory_listener = { static MemoryListener core_memory_listener = {
.begin = core_begin,
.commit = core_commit,
.log_global_start = core_log_global_start, .log_global_start = core_log_global_start,
.log_global_stop = core_log_global_stop, .log_global_stop = core_log_global_stop,
.priority = 1, .priority = 1,