* submodule cleanups (Philippe, myself)

* tiny step towards a usable preconfig mode (myself)
 * Kconfig and LOCK_GUARD cleanups (philippe)
 * new x86 CPUID feature (Yang Zhong)
 * "-object qtest" support (myself)
 * Dirty ring support for KVM (Peter)
 * Fixes for 6.0 command line parsing breakage (myself)
 * Fix for macOS 11.3 SDK (Katsuhiro)
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmCuRAQUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroOL6Qf/bUjQNAUc2QQJya1lu8TEf1o4vjkK
 C3EzFPVAj+m2O3OZOGEHcTh8+lDSzBeE2gB3bt4AD+KvFbQGXhLM3gMu/Ztymv8m
 3rVEe/NxNyq/CgC307GIwF3in7rEzjH0+WHaOuoU340e3Po1FA7s20VnMysVxxng
 4Pf4m4Y0k0eq022HgqZ/r/kbnINxDHagmzuyiFARkt8ooiuj4NyOMW7UKMk3fBvY
 MLMPsBe3imWmVnkOF0n/qJ+Svbtx15iLgGIIggshy3rmPereUpIQYaJ9FS6jcXO2
 YHuYDc2aGelMU84r+x+9UQra6auzJfc4UbylOsGjopCeFG2aU8rLMphvpw==
 =UQwU
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/bonzini-gitlab/tags/for-upstream' into staging

* submodule cleanups (Philippe, myself)
* tiny step towards a usable preconfig mode (myself)
* Kconfig and LOCK_GUARD cleanups (philippe)
* new x86 CPUID feature (Yang Zhong)
* "-object qtest" support (myself)
* Dirty ring support for KVM (Peter)
* Fixes for 6.0 command line parsing breakage (myself)
* Fix for macOS 11.3 SDK (Katsuhiro)

# gpg: Signature made Wed 26 May 2021 13:50:12 BST
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini-gitlab/tags/for-upstream: (28 commits)
  gitlab-ci: use --meson=git for CFI jobs
  hw/scsi: Fix sector translation bug in scsi_unmap_complete_noio
  configure: Avoid error messages about missing *-config-*.h files
  doc: Add notes about -mon option mode=control argument.
  qemu-config: load modules when instantiating option groups
  vl: allow not specifying size in -m when using -M memory-backend
  replication: move include out of root directory
  remove qemu-options* from root directory
  meson: Set implicit_include_directories to false
  tests/qtest/fuzz: Fix build failure
  KVM: Dirty ring support
  KVM: Disable manual dirty log when dirty ring enabled
  KVM: Add dirty-ring-size property
  KVM: Cache kvm slot dirty bitmap size
  KVM: Simplify dirty log sync in kvm_set_phys_mem
  KVM: Provide helper to sync dirty bitmap from slot to ramblock
  KVM: Provide helper to get kvm dirty log
  KVM: Create the KVMSlot dirty bitmap on flag changes
  KVM: Use a big lock to replace per-kml slots_lock
  memory: Introduce log_sync_global() to memory listener
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2021-05-26 21:05:35 +01:00
commit 8385235ba9
37 changed files with 924 additions and 227 deletions

View file

@ -27,9 +27,9 @@ include:
- cd build
- if test -n "$TARGETS";
then
../configure --enable-werror --disable-docs ${LD_JOBS:+--meson=internal} $CONFIGURE_ARGS --target-list="$TARGETS" ;
../configure --enable-werror --disable-docs ${LD_JOBS:+--meson=git} $CONFIGURE_ARGS --target-list="$TARGETS" ;
else
../configure --enable-werror --disable-docs ${LD_JOBS:+--meson=internal} $CONFIGURE_ARGS ;
../configure --enable-werror --disable-docs ${LD_JOBS:+--meson=git} $CONFIGURE_ARGS ;
fi || { cat config.log meson-logs/meson-log.txt && exit 1; }
- if test -n "$LD_JOBS";
then

View file

@ -48,9 +48,11 @@ Makefile: .git-submodule-status
.PHONY: git-submodule-update
git-submodule-update:
ifneq ($(GIT_SUBMODULES_ACTION),ignore)
$(call quiet-command, \
(GIT="$(GIT)" "$(SRC_PATH)/scripts/git-submodule.sh" $(GIT_SUBMODULES_ACTION) $(GIT_SUBMODULES)), \
"GIT","$(GIT_SUBMODULES)")
endif
# 0. ensure the build tree is okay

View file

@ -15,6 +15,7 @@
#include "qemu/osdep.h"
#include <sys/ioctl.h>
#include <poll.h>
#include <linux/kvm.h>
@ -78,6 +79,25 @@ struct KVMParkedVcpu {
QLIST_ENTRY(KVMParkedVcpu) node;
};
enum KVMDirtyRingReaperState {
KVM_DIRTY_RING_REAPER_NONE = 0,
/* The reaper is sleeping */
KVM_DIRTY_RING_REAPER_WAIT,
/* The reaper is reaping for dirty pages */
KVM_DIRTY_RING_REAPER_REAPING,
};
/*
* KVM reaper instance, responsible for collecting the KVM dirty bits
* via the dirty ring.
*/
struct KVMDirtyRingReaper {
/* The reaper thread */
QemuThread reaper_thr;
volatile uint64_t reaper_iteration; /* iteration number of reaper thr */
volatile enum KVMDirtyRingReaperState reaper_state; /* reap thr state */
};
struct KVMState
{
AccelState parent_obj;
@ -126,6 +146,9 @@ struct KVMState
KVMMemoryListener *ml;
AddressSpace *as;
} *as;
uint64_t kvm_dirty_ring_bytes; /* Size of the per-vcpu dirty ring */
uint32_t kvm_dirty_ring_size; /* Number of dirty GFNs per ring */
struct KVMDirtyRingReaper reaper;
};
KVMState *kvm_state;
@ -172,8 +195,12 @@ typedef struct KVMResampleFd KVMResampleFd;
static QLIST_HEAD(, KVMResampleFd) kvm_resample_fd_list =
QLIST_HEAD_INITIALIZER(kvm_resample_fd_list);
#define kvm_slots_lock(kml) qemu_mutex_lock(&(kml)->slots_lock)
#define kvm_slots_unlock(kml) qemu_mutex_unlock(&(kml)->slots_lock)
static QemuMutex kml_slots_lock;
#define kvm_slots_lock() qemu_mutex_lock(&kml_slots_lock)
#define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock)
static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
static inline void kvm_resample_fd_remove(int gsi)
{
@ -239,9 +266,9 @@ bool kvm_has_free_slot(MachineState *ms)
bool result;
KVMMemoryListener *kml = &s->memory_listener;
kvm_slots_lock(kml);
kvm_slots_lock();
result = !!kvm_get_free_slot(kml);
kvm_slots_unlock(kml);
kvm_slots_unlock();
return result;
}
@ -307,7 +334,7 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
KVMMemoryListener *kml = &s->memory_listener;
int i, ret = 0;
kvm_slots_lock(kml);
kvm_slots_lock();
for (i = 0; i < s->nr_slots; i++) {
KVMSlot *mem = &kml->slots[i];
@ -317,7 +344,7 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
break;
}
}
kvm_slots_unlock(kml);
kvm_slots_unlock();
return ret;
}
@ -383,6 +410,13 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
goto err;
}
if (cpu->kvm_dirty_gfns) {
ret = munmap(cpu->kvm_dirty_gfns, s->kvm_dirty_ring_size);
if (ret < 0) {
goto err;
}
}
vcpu = g_malloc0(sizeof(*vcpu));
vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
vcpu->kvm_fd = cpu->kvm_fd;
@ -459,6 +493,19 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
(void *)cpu->kvm_run + s->coalesced_mmio * PAGE_SIZE;
}
if (s->kvm_dirty_ring_size) {
/* Use MAP_SHARED to share pages with the kernel */
cpu->kvm_dirty_gfns = mmap(NULL, s->kvm_dirty_ring_size,
PROT_READ | PROT_WRITE, MAP_SHARED,
cpu->kvm_fd,
PAGE_SIZE * KVM_DIRTY_LOG_PAGE_OFFSET);
if (cpu->kvm_dirty_gfns == MAP_FAILED) {
ret = -errno;
DPRINTF("mmap'ing vcpu dirty gfns failed: %d\n", ret);
goto err;
}
}
ret = kvm_arch_init_vcpu(cpu);
if (ret < 0) {
error_setg_errno(errp, -ret,
@ -498,6 +545,7 @@ static int kvm_slot_update_flags(KVMMemoryListener *kml, KVMSlot *mem,
return 0;
}
kvm_slot_init_dirty_bitmap(mem);
return kvm_set_user_memory_region(kml, mem, false);
}
@ -513,7 +561,7 @@ static int kvm_section_update_flags(KVMMemoryListener *kml,
return 0;
}
kvm_slots_lock(kml);
kvm_slots_lock();
while (size && !ret) {
slot_size = MIN(kvm_max_slot_size, size);
@ -529,7 +577,7 @@ static int kvm_section_update_flags(KVMMemoryListener *kml,
}
out:
kvm_slots_unlock(kml);
kvm_slots_unlock();
return ret;
}
@ -568,22 +616,28 @@ static void kvm_log_stop(MemoryListener *listener,
}
/* get kvm's dirty pages bitmap and update qemu's */
static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section,
unsigned long *bitmap)
static void kvm_slot_sync_dirty_pages(KVMSlot *slot)
{
ram_addr_t start = section->offset_within_region +
memory_region_get_ram_addr(section->mr);
ram_addr_t pages = int128_get64(section->size) / qemu_real_host_page_size;
ram_addr_t start = slot->ram_start_offset;
ram_addr_t pages = slot->memory_size / qemu_real_host_page_size;
cpu_physical_memory_set_dirty_lebitmap(bitmap, start, pages);
return 0;
cpu_physical_memory_set_dirty_lebitmap(slot->dirty_bmap, start, pages);
}
static void kvm_slot_reset_dirty_pages(KVMSlot *slot)
{
memset(slot->dirty_bmap, 0, slot->dirty_bmap_size);
}
#define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
/* Allocate the dirty bitmap for a slot */
static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem)
static void kvm_slot_init_dirty_bitmap(KVMSlot *mem)
{
if (!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) || mem->dirty_bmap) {
return;
}
/*
* XXX bad kernel interface alert
* For dirty bitmap, kernel allocates array of size aligned to
@ -604,6 +658,196 @@ static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem)
hwaddr bitmap_size = ALIGN(mem->memory_size / qemu_real_host_page_size,
/*HOST_LONG_BITS*/ 64) / 8;
mem->dirty_bmap = g_malloc0(bitmap_size);
mem->dirty_bmap_size = bitmap_size;
}
/*
* Sync dirty bitmap from kernel to KVMSlot.dirty_bmap, return true if
* succeeded, false otherwise
*/
static bool kvm_slot_get_dirty_log(KVMState *s, KVMSlot *slot)
{
struct kvm_dirty_log d = {};
int ret;
d.dirty_bitmap = slot->dirty_bmap;
d.slot = slot->slot | (slot->as_id << 16);
ret = kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d);
if (ret == -ENOENT) {
/* kernel does not have dirty bitmap in this slot */
ret = 0;
}
if (ret) {
error_report_once("%s: KVM_GET_DIRTY_LOG failed with %d",
__func__, ret);
}
return ret == 0;
}
/* Should be with all slots_lock held for the address spaces. */
static void kvm_dirty_ring_mark_page(KVMState *s, uint32_t as_id,
uint32_t slot_id, uint64_t offset)
{
KVMMemoryListener *kml;
KVMSlot *mem;
if (as_id >= s->nr_as) {
return;
}
kml = s->as[as_id].ml;
mem = &kml->slots[slot_id];
if (!mem->memory_size || offset >=
(mem->memory_size / qemu_real_host_page_size)) {
return;
}
set_bit(offset, mem->dirty_bmap);
}
static bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn)
{
return gfn->flags == KVM_DIRTY_GFN_F_DIRTY;
}
static void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn)
{
gfn->flags = KVM_DIRTY_GFN_F_RESET;
}
/*
* Should be with all slots_lock held for the address spaces. It returns the
* dirty page we've collected on this dirty ring.
*/
static uint32_t kvm_dirty_ring_reap_one(KVMState *s, CPUState *cpu)
{
struct kvm_dirty_gfn *dirty_gfns = cpu->kvm_dirty_gfns, *cur;
uint32_t ring_size = s->kvm_dirty_ring_size;
uint32_t count = 0, fetch = cpu->kvm_fetch_index;
assert(dirty_gfns && ring_size);
trace_kvm_dirty_ring_reap_vcpu(cpu->cpu_index);
while (true) {
cur = &dirty_gfns[fetch % ring_size];
if (!dirty_gfn_is_dirtied(cur)) {
break;
}
kvm_dirty_ring_mark_page(s, cur->slot >> 16, cur->slot & 0xffff,
cur->offset);
dirty_gfn_set_collected(cur);
trace_kvm_dirty_ring_page(cpu->cpu_index, fetch, cur->offset);
fetch++;
count++;
}
cpu->kvm_fetch_index = fetch;
return count;
}
/* Must be with slots_lock held */
static uint64_t kvm_dirty_ring_reap_locked(KVMState *s)
{
int ret;
CPUState *cpu;
uint64_t total = 0;
int64_t stamp;
stamp = get_clock();
CPU_FOREACH(cpu) {
total += kvm_dirty_ring_reap_one(s, cpu);
}
if (total) {
ret = kvm_vm_ioctl(s, KVM_RESET_DIRTY_RINGS);
assert(ret == total);
}
stamp = get_clock() - stamp;
if (total) {
trace_kvm_dirty_ring_reap(total, stamp / 1000);
}
return total;
}
/*
* Currently for simplicity, we must hold BQL before calling this. We can
* consider to drop the BQL if we're clear with all the race conditions.
*/
static uint64_t kvm_dirty_ring_reap(KVMState *s)
{
uint64_t total;
/*
* We need to lock all kvm slots for all address spaces here,
* because:
*
* (1) We need to mark dirty for dirty bitmaps in multiple slots
* and for tons of pages, so it's better to take the lock here
* once rather than once per page. And more importantly,
*
* (2) We must _NOT_ publish dirty bits to the other threads
* (e.g., the migration thread) via the kvm memory slot dirty
* bitmaps before correctly re-protect those dirtied pages.
* Otherwise we can have potential risk of data corruption if
* the page data is read in the other thread before we do
* reset below.
*/
kvm_slots_lock();
total = kvm_dirty_ring_reap_locked(s);
kvm_slots_unlock();
return total;
}
static void do_kvm_cpu_synchronize_kick(CPUState *cpu, run_on_cpu_data arg)
{
/* No need to do anything */
}
/*
* Kick all vcpus out in a synchronized way. When returned, we
* guarantee that every vcpu has been kicked and at least returned to
* userspace once.
*/
static void kvm_cpu_synchronize_kick_all(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
run_on_cpu(cpu, do_kvm_cpu_synchronize_kick, RUN_ON_CPU_NULL);
}
}
/*
* Flush all the existing dirty pages to the KVM slot buffers. When
* this call returns, we guarantee that all the touched dirty pages
* before calling this function have been put into the per-kvmslot
* dirty bitmap.
*
* This function must be called with BQL held.
*/
static void kvm_dirty_ring_flush(void)
{
trace_kvm_dirty_ring_flush(0);
/*
* The function needs to be serialized. Since this function
* should always be with BQL held, serialization is guaranteed.
* However, let's be sure of it.
*/
assert(qemu_mutex_iothread_locked());
/*
* First make sure to flush the hardware buffers by kicking all
* vcpus out in a synchronous way.
*/
kvm_cpu_synchronize_kick_all();
kvm_dirty_ring_reap(kvm_state);
trace_kvm_dirty_ring_flush(1);
}
/**
@ -617,53 +861,28 @@ static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem)
* @kml: the KVM memory listener object
* @section: the memory section to sync the dirty bitmap with
*/
static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml,
MemoryRegionSection *section)
static void kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml,
MemoryRegionSection *section)
{
KVMState *s = kvm_state;
struct kvm_dirty_log d = {};
KVMSlot *mem;
hwaddr start_addr, size;
hwaddr slot_size, slot_offset = 0;
int ret = 0;
hwaddr slot_size;
size = kvm_align_section(section, &start_addr);
while (size) {
MemoryRegionSection subsection = *section;
slot_size = MIN(kvm_max_slot_size, size);
mem = kvm_lookup_matching_slot(kml, start_addr, slot_size);
if (!mem) {
/* We don't have a slot if we want to trap every access. */
goto out;
return;
}
if (!mem->dirty_bmap) {
/* Allocate on the first log_sync, once and for all */
kvm_memslot_init_dirty_bitmap(mem);
if (kvm_slot_get_dirty_log(s, mem)) {
kvm_slot_sync_dirty_pages(mem);
}
d.dirty_bitmap = mem->dirty_bmap;
d.slot = mem->slot | (kml->as_id << 16);
ret = kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d);
if (ret == -ENOENT) {
/* kernel does not have dirty bitmap in this slot */
ret = 0;
} else if (ret < 0) {
error_report("ioctl KVM_GET_DIRTY_LOG failed: %d", errno);
goto out;
} else {
subsection.offset_within_region += slot_offset;
subsection.size = int128_make64(slot_size);
kvm_get_dirty_pages_log_range(&subsection, d.dirty_bitmap);
}
slot_offset += slot_size;
start_addr += slot_size;
size -= slot_size;
}
out:
return ret;
}
/* Alignment requirement for KVM_CLEAR_DIRTY_LOG - 64 pages */
@ -810,7 +1029,7 @@ static int kvm_physical_log_clear(KVMMemoryListener *kml,
return ret;
}
kvm_slots_lock(kml);
kvm_slots_lock();
for (i = 0; i < s->nr_slots; i++) {
mem = &kml->slots[i];
@ -836,7 +1055,7 @@ static int kvm_physical_log_clear(KVMMemoryListener *kml,
}
}
kvm_slots_unlock(kml);
kvm_slots_unlock();
return ret;
}
@ -1119,7 +1338,8 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
int err;
MemoryRegion *mr = section->mr;
bool writeable = !mr->readonly && !mr->rom_device;
hwaddr start_addr, size, slot_size;
hwaddr start_addr, size, slot_size, mr_offset;
ram_addr_t ram_start_offset;
void *ram;
if (!memory_region_is_ram(mr)) {
@ -1137,11 +1357,15 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
return;
}
/* use aligned delta to align the ram address */
ram = memory_region_get_ram_ptr(mr) + section->offset_within_region +
(start_addr - section->offset_within_address_space);
/* The offset of the kvmslot within the memory region */
mr_offset = section->offset_within_region + start_addr -
section->offset_within_address_space;
kvm_slots_lock(kml);
/* use aligned delta to align the ram address and offset */
ram = memory_region_get_ram_ptr(mr) + mr_offset;
ram_start_offset = memory_region_get_ram_addr(mr) + mr_offset;
kvm_slots_lock();
if (!add) {
do {
@ -1151,7 +1375,25 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
goto out;
}
if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
kvm_physical_sync_dirty_bitmap(kml, section);
/*
* NOTE: We should be aware of the fact that here we're only
* doing a best effort to sync dirty bits. No matter whether
* we're using dirty log or dirty ring, we ignored two facts:
*
* (1) dirty bits can reside in hardware buffers (PML)
*
* (2) after we collected dirty bits here, pages can be dirtied
* again before we do the final KVM_SET_USER_MEMORY_REGION to
* remove the slot.
*
* Not easy. Let's cross the fingers until it's fixed.
*/
if (kvm_state->kvm_dirty_ring_size) {
kvm_dirty_ring_reap_locked(kvm_state);
} else {
kvm_slot_get_dirty_log(kvm_state, mem);
}
kvm_slot_sync_dirty_pages(mem);
}
/* unregister the slot */
@ -1175,18 +1417,13 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
do {
slot_size = MIN(kvm_max_slot_size, size);
mem = kvm_alloc_slot(kml);
mem->as_id = kml->as_id;
mem->memory_size = slot_size;
mem->start_addr = start_addr;
mem->ram_start_offset = ram_start_offset;
mem->ram = ram;
mem->flags = kvm_mem_flags(mr);
if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
/*
* Reallocate the bmap; it means it doesn't disappear in
* middle of a migrate.
*/
kvm_memslot_init_dirty_bitmap(mem);
}
kvm_slot_init_dirty_bitmap(mem);
err = kvm_set_user_memory_region(kml, mem, true);
if (err) {
fprintf(stderr, "%s: error registering slot: %s\n", __func__,
@ -1194,12 +1431,58 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
abort();
}
start_addr += slot_size;
ram_start_offset += slot_size;
ram += slot_size;
size -= slot_size;
} while (size);
out:
kvm_slots_unlock(kml);
kvm_slots_unlock();
}
static void *kvm_dirty_ring_reaper_thread(void *data)
{
KVMState *s = data;
struct KVMDirtyRingReaper *r = &s->reaper;
rcu_register_thread();
trace_kvm_dirty_ring_reaper("init");
while (true) {
r->reaper_state = KVM_DIRTY_RING_REAPER_WAIT;
trace_kvm_dirty_ring_reaper("wait");
/*
* TODO: provide a smarter timeout rather than a constant?
*/
sleep(1);
trace_kvm_dirty_ring_reaper("wakeup");
r->reaper_state = KVM_DIRTY_RING_REAPER_REAPING;
qemu_mutex_lock_iothread();
kvm_dirty_ring_reap(s);
qemu_mutex_unlock_iothread();
r->reaper_iteration++;
}
trace_kvm_dirty_ring_reaper("exit");
rcu_unregister_thread();
return NULL;
}
static int kvm_dirty_ring_reaper_init(KVMState *s)
{
struct KVMDirtyRingReaper *r = &s->reaper;
qemu_thread_create(&r->reaper_thr, "kvm-reaper",
kvm_dirty_ring_reaper_thread,
s, QEMU_THREAD_JOINABLE);
return 0;
}
static void kvm_region_add(MemoryListener *listener,
@ -1224,14 +1507,40 @@ static void kvm_log_sync(MemoryListener *listener,
MemoryRegionSection *section)
{
KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
int r;
kvm_slots_lock(kml);
r = kvm_physical_sync_dirty_bitmap(kml, section);
kvm_slots_unlock(kml);
if (r < 0) {
abort();
kvm_slots_lock();
kvm_physical_sync_dirty_bitmap(kml, section);
kvm_slots_unlock();
}
static void kvm_log_sync_global(MemoryListener *l)
{
KVMMemoryListener *kml = container_of(l, KVMMemoryListener, listener);
KVMState *s = kvm_state;
KVMSlot *mem;
int i;
/* Flush all kernel dirty addresses into KVMSlot dirty bitmap */
kvm_dirty_ring_flush();
/*
* TODO: make this faster when nr_slots is big while there are
* only a few used slots (small VMs).
*/
kvm_slots_lock();
for (i = 0; i < s->nr_slots; i++) {
mem = &kml->slots[i];
if (mem->memory_size && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
kvm_slot_sync_dirty_pages(mem);
/*
* This is not needed by KVM_GET_DIRTY_LOG because the
* ioctl will unconditionally overwrite the whole region.
* However kvm dirty ring has no such side effect.
*/
kvm_slot_reset_dirty_pages(mem);
}
}
kvm_slots_unlock();
}
static void kvm_log_clear(MemoryListener *listener,
@ -1328,7 +1637,6 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
{
int i;
qemu_mutex_init(&kml->slots_lock);
kml->slots = g_malloc0(s->nr_slots * sizeof(KVMSlot));
kml->as_id = as_id;
@ -1340,10 +1648,15 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
kml->listener.region_del = kvm_region_del;
kml->listener.log_start = kvm_log_start;
kml->listener.log_stop = kvm_log_stop;
kml->listener.log_sync = kvm_log_sync;
kml->listener.log_clear = kvm_log_clear;
kml->listener.priority = 10;
if (s->kvm_dirty_ring_size) {
kml->listener.log_sync_global = kvm_log_sync_global;
} else {
kml->listener.log_sync = kvm_log_sync;
kml->listener.log_clear = kvm_log_clear;
}
memory_listener_register(&kml->listener, as);
for (i = 0; i < s->nr_as; ++i) {
@ -2001,6 +2314,8 @@ static int kvm_init(MachineState *ms)
int type = 0;
uint64_t dirty_log_manual_caps;
qemu_mutex_init(&kml_slots_lock);
s = KVM_STATE(ms->accelerator);
/*
@ -2017,7 +2332,6 @@ static int kvm_init(MachineState *ms)
QTAILQ_INIT(&s->kvm_sw_breakpoints);
#endif
QLIST_INIT(&s->kvm_parked_vcpus);
s->vmfd = -1;
s->fd = qemu_open_old("/dev/kvm", O_RDWR);
if (s->fd == -1) {
fprintf(stderr, "Could not access KVM kernel module: %m\n");
@ -2125,20 +2439,70 @@ static int kvm_init(MachineState *ms)
s->coalesced_pio = s->coalesced_mmio &&
kvm_check_extension(s, KVM_CAP_COALESCED_PIO);
dirty_log_manual_caps =
kvm_check_extension(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
KVM_DIRTY_LOG_INITIALLY_SET);
s->manual_dirty_log_protect = dirty_log_manual_caps;
if (dirty_log_manual_caps) {
ret = kvm_vm_enable_cap(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, 0,
dirty_log_manual_caps);
if (ret) {
warn_report("Trying to enable capability %"PRIu64" of "
"KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 but failed. "
"Falling back to the legacy mode. ",
dirty_log_manual_caps);
s->manual_dirty_log_protect = 0;
/*
* Enable KVM dirty ring if supported, otherwise fall back to
* dirty logging mode
*/
if (s->kvm_dirty_ring_size > 0) {
uint64_t ring_bytes;
ring_bytes = s->kvm_dirty_ring_size * sizeof(struct kvm_dirty_gfn);
/* Read the max supported pages */
ret = kvm_vm_check_extension(s, KVM_CAP_DIRTY_LOG_RING);
if (ret > 0) {
if (ring_bytes > ret) {
error_report("KVM dirty ring size %" PRIu32 " too big "
"(maximum is %ld). Please use a smaller value.",
s->kvm_dirty_ring_size,
(long)ret / sizeof(struct kvm_dirty_gfn));
ret = -EINVAL;
goto err;
}
ret = kvm_vm_enable_cap(s, KVM_CAP_DIRTY_LOG_RING, 0, ring_bytes);
if (ret) {
error_report("Enabling of KVM dirty ring failed: %s. "
"Suggested mininum value is 1024.", strerror(-ret));
goto err;
}
s->kvm_dirty_ring_bytes = ring_bytes;
} else {
warn_report("KVM dirty ring not available, using bitmap method");
s->kvm_dirty_ring_size = 0;
}
}
/*
* KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is not needed when dirty ring is
* enabled. More importantly, KVM_DIRTY_LOG_INITIALLY_SET will assume no
* page is wr-protected initially, which is against how kvm dirty ring is
* usage - kvm dirty ring requires all pages are wr-protected at the very
* beginning. Enabling this feature for dirty ring causes data corruption.
*
* TODO: Without KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and kvm clear dirty log,
* we may expect a higher stall time when starting the migration. In the
* future we can enable KVM_CLEAR_DIRTY_LOG to work with dirty ring too:
* instead of clearing dirty bit, it can be a way to explicitly wr-protect
* guest pages.
*/
if (!s->kvm_dirty_ring_size) {
dirty_log_manual_caps =
kvm_check_extension(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
KVM_DIRTY_LOG_INITIALLY_SET);
s->manual_dirty_log_protect = dirty_log_manual_caps;
if (dirty_log_manual_caps) {
ret = kvm_vm_enable_cap(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, 0,
dirty_log_manual_caps);
if (ret) {
warn_report("Trying to enable capability %"PRIu64" of "
"KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 but failed. "
"Falling back to the legacy mode. ",
dirty_log_manual_caps);
s->manual_dirty_log_protect = 0;
}
}
}
@ -2224,6 +2588,14 @@ static int kvm_init(MachineState *ms)
ret = ram_block_discard_disable(true);
assert(!ret);
}
if (s->kvm_dirty_ring_size) {
ret = kvm_dirty_ring_reaper_init(s);
if (ret) {
goto err;
}
}
return 0;
err:
@ -2536,6 +2908,17 @@ int kvm_cpu_exec(CPUState *cpu)
case KVM_EXIT_INTERNAL_ERROR:
ret = kvm_handle_internal_error(cpu, run);
break;
case KVM_EXIT_DIRTY_RING_FULL:
/*
* We shouldn't continue if the dirty ring of this vcpu is
* still full. Got kicked by KVM_RESET_DIRTY_RINGS.
*/
trace_kvm_dirty_ring_full(cpu->cpu_index);
qemu_mutex_lock_iothread();
kvm_dirty_ring_reap(kvm_state);
qemu_mutex_unlock_iothread();
ret = 0;
break;
case KVM_EXIT_SYSTEM_EVENT:
switch (run->system_event.type) {
case KVM_SYSTEM_EVENT_SHUTDOWN:
@ -3112,6 +3495,11 @@ static void kvm_set_kvm_shadow_mem(Object *obj, Visitor *v,
KVMState *s = KVM_STATE(obj);
int64_t value;
if (s->fd != -1) {
error_setg(errp, "Cannot set properties after the accelerator has been initialized");
return;
}
if (!visit_type_int(v, name, &value, errp)) {
return;
}
@ -3126,6 +3514,11 @@ static void kvm_set_kernel_irqchip(Object *obj, Visitor *v,
KVMState *s = KVM_STATE(obj);
OnOffSplit mode;
if (s->fd != -1) {
error_setg(errp, "Cannot set properties after the accelerator has been initialized");
return;
}
if (!visit_type_OnOffSplit(v, name, &mode, errp)) {
return;
}
@ -3168,13 +3561,53 @@ bool kvm_kernel_irqchip_split(void)
return kvm_state->kernel_irqchip_split == ON_OFF_AUTO_ON;
}
static void kvm_get_dirty_ring_size(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
KVMState *s = KVM_STATE(obj);
uint32_t value = s->kvm_dirty_ring_size;
visit_type_uint32(v, name, &value, errp);
}
static void kvm_set_dirty_ring_size(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
KVMState *s = KVM_STATE(obj);
Error *error = NULL;
uint32_t value;
if (s->fd != -1) {
error_setg(errp, "Cannot set properties after the accelerator has been initialized");
return;
}
visit_type_uint32(v, name, &value, &error);
if (error) {
error_propagate(errp, error);
return;
}
if (value & (value - 1)) {
error_setg(errp, "dirty-ring-size must be a power of two.");
return;
}
s->kvm_dirty_ring_size = value;
}
static void kvm_accel_instance_init(Object *obj)
{
KVMState *s = KVM_STATE(obj);
s->fd = -1;
s->vmfd = -1;
s->kvm_shadow_mem = -1;
s->kernel_irqchip_allowed = true;
s->kernel_irqchip_split = ON_OFF_AUTO_AUTO;
/* KVM dirty ring is by default off */
s->kvm_dirty_ring_size = 0;
}
static void kvm_accel_class_init(ObjectClass *oc, void *data)
@ -3196,6 +3629,12 @@ static void kvm_accel_class_init(ObjectClass *oc, void *data)
NULL, NULL);
object_class_property_set_description(oc, "kvm-shadow-mem",
"KVM shadow MMU size");
object_class_property_add(oc, "dirty-ring-size", "uint32",
kvm_get_dirty_ring_size, kvm_set_dirty_ring_size,
NULL, NULL);
object_class_property_set_description(oc, "dirty-ring-size",
"Size of KVM dirty page ring buffer (default: 0, i.e. use bitmap)");
}
static const TypeInfo kvm_accel_type = {

View file

@ -18,4 +18,11 @@ kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t
kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d"
kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32
kvm_resample_fd_notify(int gsi) "gsi %d"
kvm_dirty_ring_full(int id) "vcpu %d"
kvm_dirty_ring_reap_vcpu(int id) "vcpu %d"
kvm_dirty_ring_page(int vcpu, uint32_t slot, uint64_t offset) "vcpu %d fetch %"PRIu32" offset 0x%"PRIx64
kvm_dirty_ring_reaper(const char *s) "%s"
kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)"
kvm_dirty_ring_reaper_kick(const char *reason) "%s"
kvm_dirty_ring_flush(int finished) "%d"

View file

@ -22,7 +22,7 @@
#include "sysemu/block-backend.h"
#include "qapi/error.h"
#include "qapi/qmp/qdict.h"
#include "replication.h"
#include "block/replication.h"
typedef enum {
BLOCK_REPLICATION_NONE, /* block replication is not started */

79
configure vendored
View file

@ -256,31 +256,11 @@ gdb_bin=$(command -v "gdb-multiarch" || command -v "gdb")
if test -e "$source_path/.git"
then
git_submodules_action="update"
git_submodules="ui/keycodemapdb"
git_submodules="$git_submodules tests/fp/berkeley-testfloat-3"
git_submodules="$git_submodules tests/fp/berkeley-softfloat-3"
else
git_submodules_action="ignore"
git_submodules=""
if ! test -f "$source_path/ui/keycodemapdb/README"
then
echo
echo "ERROR: missing file $source_path/ui/keycodemapdb/README"
echo
echo "This is not a GIT checkout but module content appears to"
echo "be missing. Do not use 'git archive' or GitHub download links"
echo "to acquire QEMU source archives. Non-GIT builds are only"
echo "supported with source archives linked from:"
echo
echo " https://www.qemu.org/download/#source"
echo
echo "Developers working with GIT can use scripts/archive-source.sh"
echo "if they need to create valid source archives."
echo
exit 1
fi
fi
git_submodules="ui/keycodemapdb"
git="git"
# Don't accept a target_list environment variable.
@ -1593,6 +1573,28 @@ case $git_submodules_action in
fi
;;
ignore)
if ! test -f "$source_path/ui/keycodemapdb/README"
then
echo
echo "ERROR: missing GIT submodules"
echo
if test -e "$source_path/.git"; then
echo "--with-git-submodules=ignore specified but submodules were not"
echo "checked out. Please initialize and update submodules."
else
echo "This is not a GIT checkout but module content appears to"
echo "be missing. Do not use 'git archive' or GitHub download links"
echo "to acquire QEMU source archives. Non-GIT builds are only"
echo "supported with source archives linked from:"
echo
echo " https://www.qemu.org/download/#source"
echo
echo "Developers working with GIT can use scripts/archive-source.sh"
echo "if they need to create valid source archives."
fi
echo
exit 1
fi
;;
*)
echo "ERROR: invalid --with-git-submodules= value '$git_submodules_action'"
@ -2281,6 +2283,11 @@ if test "$solaris" = "yes" ; then
fi
fi
if test "$tcg" = "enabled"; then
git_submodules="$git_submodules tests/fp/berkeley-testfloat-3"
git_submodules="$git_submodules tests/fp/berkeley-softfloat-3"
fi
if test -z "${target_list+xxx}" ; then
default_targets=yes
for target in $default_target_list; do
@ -3620,9 +3627,7 @@ fi
case "$fdt" in
auto | enabled | internal)
# Simpler to always update submodule, even if not needed.
if test "$git_submodules_action" != "ignore"; then
git_submodules="${git_submodules} dtc"
fi
git_submodules="${git_submodules} dtc"
;;
esac
@ -4344,9 +4349,7 @@ fi
case "$capstone" in
auto | enabled | internal)
# Simpler to always update submodule, even if not needed.
if test "$git_submodules_action" != "ignore"; then
git_submodules="${git_submodules} capstone"
fi
git_submodules="${git_submodules} capstone"
;;
esac
@ -5276,9 +5279,7 @@ fi
case "$slirp" in
auto | enabled | internal)
# Simpler to always update submodule, even if not needed.
if test "$git_submodules_action" != "ignore"; then
git_submodules="${git_submodules} slirp"
fi
git_submodules="${git_submodules} slirp"
;;
esac
@ -5470,9 +5471,7 @@ if test "$cpu" = "s390x" ; then
roms="$roms s390-ccw"
# SLOF is required for building the s390-ccw firmware on s390x,
# since it is using the libnet code from SLOF for network booting.
if test "$git_submodules_action" != "ignore"; then
git_submodules="${git_submodules} roms/SLOF"
fi
git_submodules="${git_submodules} roms/SLOF"
fi
fi
@ -6504,10 +6503,14 @@ fi
# Create list of config switches that should be poisoned in common code...
# but filter out CONFIG_TCG and CONFIG_USER_ONLY which are special.
sed -n -e '/CONFIG_TCG/d' -e '/CONFIG_USER_ONLY/d' \
-e '/^#define / { s///; s/ .*//; s/^/#pragma GCC poison /p; }' \
*-config-devices.h *-config-target.h | \
sort -u > config-poison.h
target_configs_h=$(ls *-config-devices.h *-config-target.h 2>/dev/null)
if test -n "$target_configs_h" ; then
sed -n -e '/CONFIG_TCG/d' -e '/CONFIG_USER_ONLY/d' \
-e '/^#define / { s///; s/ .*//; s/^/#pragma GCC poison /p; }' \
$target_configs_h | sort -u > config-poison.h
else
:> config-poison.h
fi
# Save the configure command line for later reuse.
cat <<EOD >config.status

View file

@ -8,4 +8,3 @@ CONFIG_POWERNV=y
# For pSeries
CONFIG_PSERIES=y
CONFIG_NVDIMM=y

View file

@ -1297,6 +1297,7 @@ ERST
.help = "create QOM object",
.cmd = hmp_object_add,
.command_completion = object_add_completion,
.flags = "p",
},
SRST
@ -1311,6 +1312,7 @@ ERST
.help = "destroy QOM object",
.cmd = hmp_object_del,
.command_completion = object_del_completion,
.flags = "p",
},
SRST

View file

@ -6,6 +6,7 @@ config ARM_VIRT
imply VFIO_PLATFORM
imply VFIO_XGMAC
imply TPM_TIS_SYSBUS
imply NVDIMM
select ARM_GIC
select ACPI
select ARM_SMMUV3

View file

@ -23,6 +23,7 @@ config PC
imply TPM_TIS_ISA
imply VGA_PCI
imply VIRTIO_VGA
imply NVDIMM
select FDC
select I8259
select I8254

View file

@ -7,6 +7,4 @@ config MEM_DEVICE
config NVDIMM
bool
default y
depends on (PC || PSERIES || ARM_VIRT)
select MEM_DEVICE

View file

@ -3,6 +3,7 @@ config PSERIES
imply PCI_DEVICES
imply TEST_DEVICES
imply VIRTIO_VGA
imply NVDIMM
select DIMM
select PCI
select SPAPR_VSCSI

View file

@ -1582,6 +1582,7 @@ invalid_field:
scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
}
/* sector_num and nb_sectors expected to be in qdev blocksize */
static inline bool check_lba_range(SCSIDiskState *s,
uint64_t sector_num, uint32_t nb_sectors)
{
@ -1614,11 +1615,12 @@ static void scsi_unmap_complete_noio(UnmapCBData *data, int ret)
assert(r->req.aiocb == NULL);
if (data->count > 0) {
r->sector = ldq_be_p(&data->inbuf[0])
* (s->qdev.blocksize / BDRV_SECTOR_SIZE);
r->sector_count = (ldl_be_p(&data->inbuf[8]) & 0xffffffffULL)
* (s->qdev.blocksize / BDRV_SECTOR_SIZE);
if (!check_lba_range(s, r->sector, r->sector_count)) {
uint64_t sector_num = ldq_be_p(&data->inbuf[0]);
uint32_t nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL;
r->sector = sector_num * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
r->sector_count = nb_sectors * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
if (!check_lba_range(s, sector_num, nb_sectors)) {
block_acct_invalid(blk_get_stats(s->qdev.conf.blk),
BLOCK_ACCT_UNMAP);
scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));

View file

@ -23,7 +23,7 @@ typedef struct ReplicationOps ReplicationOps;
typedef struct ReplicationState ReplicationState;
/**
* SECTION:replication.h
* SECTION:block/replication.h
* @title:Base Replication System
* @short_description: interfaces for handling replication
*
@ -32,7 +32,7 @@ typedef struct ReplicationState ReplicationState;
* <example>
* <title>How to use replication interfaces</title>
* <programlisting>
* #include "replication.h"
* #include "block/replication.h"
*
* typedef struct BDRVReplicationState {
* ReplicationState *rs;

View file

@ -616,6 +616,18 @@ struct MemoryListener {
*/
void (*log_sync)(MemoryListener *listener, MemoryRegionSection *section);
/**
* @log_sync_global:
*
* This is the global version of @log_sync when the listener does
* not have a way to synchronize the log with finer granularity.
* When the listener registers with @log_sync_global defined, then
* its @log_sync must be NULL. Vice versa.
*
* @listener: The #MemoryListener.
*/
void (*log_sync_global)(MemoryListener *listener);
/**
* @log_clear:
*

View file

@ -329,6 +329,10 @@ struct qemu_work_item;
* @ignore_memory_transaction_failures: Cached copy of the MachineState
* flag of the same name: allows the board to suppress calling of the
* CPU do_transaction_failed hook function.
* @kvm_dirty_gfns: Points to the KVM dirty ring for this CPU when KVM dirty
* ring is enabled.
* @kvm_fetch_index: Keeps the index that we last fetched from the per-vCPU
* dirty ring structure.
*
* State of one CPU core or thread.
*/
@ -400,9 +404,12 @@ struct CPUState {
*/
uintptr_t mem_io_pc;
/* Only used in KVM */
int kvm_fd;
struct KVMState *kvm_state;
struct kvm_run *kvm_run;
struct kvm_dirty_gfn *kvm_dirty_gfns;
uint32_t kvm_fetch_index;
/* Used for events with 'vcpu' and *without* the 'disabled' properties */
DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);

View file

@ -1,7 +1,7 @@
#ifndef QEMU_CONFIG_FILE_H
#define QEMU_CONFIG_FILE_H
void qemu_load_module_for_opts(const char *group);
QemuOptsList *qemu_find_opts(const char *group);
QemuOptsList *qemu_find_opts_err(const char *group, Error **errp);
QemuOpts *qemu_find_opts_singleton(const char *group);

View file

@ -29,8 +29,13 @@
#define QEMU_OPTIONS_H
enum {
#define QEMU_OPTIONS_GENERATE_ENUM
#include "qemu-options-wrapper.h"
#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask) \
opt_enum,
#define DEFHEADING(text)
#define ARCHHEADING(text, arch_mask)
#include "qemu-options.def"
};
#endif

View file

@ -23,12 +23,15 @@ typedef struct KVMSlot
int old_flags;
/* Dirty bitmap cache for the slot */
unsigned long *dirty_bmap;
unsigned long dirty_bmap_size;
/* Cache of the address space ID */
int as_id;
/* Cache of the offset in ram address space */
ram_addr_t ram_start_offset;
} KVMSlot;
typedef struct KVMMemoryListener {
MemoryListener listener;
/* Protects the slots and all inside them */
QemuMutex slots_lock;
KVMSlot *slots;
int as_id;
} KVMMemoryListener;

View file

@ -2145,6 +2145,7 @@ common_all = common_ss.apply(config_all, strict: false)
common_all = static_library('common',
build_by_default: false,
sources: common_all.sources() + genh,
implicit_include_directories: false,
dependencies: common_all.dependencies(),
name_suffix: 'fa')

View file

@ -28,7 +28,7 @@
#include "migration/failover.h"
#include "migration/ram.h"
#ifdef CONFIG_REPLICATION
#include "replication.h"
#include "block/replication.h"
#endif
#include "net/colo-compare.h"
#include "net/colo.h"

View file

@ -32,7 +32,7 @@
#include "qemu-common.h"
/* Needed early for CONFIG_BSD etc. */
#include "net/slirp.h"
#include "qemu-options.h"
#include "qemu/qemu-options.h"
#include "qemu/error-report.h"
#include "qemu/log.h"
#include "sysemu/runstate.h"

View file

@ -27,7 +27,6 @@
#include <windows.h>
#include <mmsystem.h>
#include "qemu-common.h"
#include "qemu-options.h"
#include "sysemu/runstate.h"
static BOOL WINAPI qemu_ctrl_handler(DWORD type)

View file

@ -644,6 +644,21 @@
{ 'struct': 'PrManagerHelperProperties',
'data': { 'path': 'str' } }
##
# @QtestProperties:
#
# Properties for qtest objects.
#
# @chardev: the chardev to be used to receive qtest commands on.
#
# @log: the path to a log file
#
# Since: 6.0
##
{ 'struct': 'QtestProperties',
'data': { 'chardev': 'str',
'*log': 'str' } }
##
# @RemoteObjectProperties:
#
@ -769,6 +784,7 @@
'memory-backend-ram',
'pef-guest',
'pr-manager-helper',
'qtest',
'rng-builtin',
'rng-egd',
'rng-random',
@ -825,6 +841,7 @@
'if': 'defined(CONFIG_LINUX)' },
'memory-backend-ram': 'MemoryBackendProperties',
'pr-manager-helper': 'PrManagerHelperProperties',
'qtest': 'QtestProperties',
'rng-builtin': 'RngProperties',
'rng-egd': 'RngEgdProperties',
'rng-random': 'RngRandomProperties',
@ -857,7 +874,8 @@
# <- { "return": {} }
#
##
{ 'command': 'object-add', 'data': 'ObjectOptions', 'boxed': true }
{ 'command': 'object-add', 'data': 'ObjectOptions', 'boxed': true,
'allow-preconfig': true }
##
# @object-del:
@ -877,4 +895,5 @@
# <- { "return": {} }
#
##
{ 'command': 'object-del', 'data': {'id': 'str'} }
{ 'command': 'object-del', 'data': {'id': 'str'},
'allow-preconfig': true }

View file

@ -1,40 +0,0 @@
#if defined(QEMU_OPTIONS_GENERATE_ENUM)
#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask) \
opt_enum,
#define DEFHEADING(text)
#define ARCHHEADING(text, arch_mask)
#elif defined(QEMU_OPTIONS_GENERATE_HELP)
#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask) \
if ((arch_mask) & arch_type) \
fputs(opt_help, stdout);
#define ARCHHEADING(text, arch_mask) \
if ((arch_mask) & arch_type) \
puts(stringify(text));
#define DEFHEADING(text) ARCHHEADING(text, QEMU_ARCH_ALL)
#elif defined(QEMU_OPTIONS_GENERATE_OPTIONS)
#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask) \
{ option, opt_arg, opt_enum, arch_mask },
#define DEFHEADING(text)
#define ARCHHEADING(text, arch_mask)
#else
#error "qemu-options-wrapper.h included with no option defined"
#endif
#include "qemu-options.def"
#undef DEF
#undef DEFHEADING
#undef ARCHHEADING
#undef QEMU_OPTIONS_GENERATE_ENUM
#undef QEMU_OPTIONS_GENERATE_HELP
#undef QEMU_OPTIONS_GENERATE_OPTIONS

View file

@ -141,6 +141,7 @@ DEF("accel", HAS_ARG, QEMU_OPTION_accel,
" kvm-shadow-mem=size of KVM shadow MMU in bytes\n"
" split-wx=on|off (enable TCG split w^x mapping)\n"
" tb-size=n (TCG translation block cache size)\n"
" dirty-ring-size=n (KVM dirty ring GFN count, default 0)\n"
" thread=single|multi (enable multi-threaded TCG)\n", QEMU_ARCH_ALL)
SRST
``-accel name[,prop=value[,...]]``
@ -181,6 +182,17 @@ SRST
where both the back-end and front-ends support it and no
incompatible TCG features have been enabled (e.g.
icount/replay).
``dirty-ring-size=n``
When the KVM accelerator is used, it controls the size of the per-vCPU
dirty page ring buffer (number of entries for each vCPU). It should
be a value that is power of two, and it should be 1024 or bigger (but
still less than the maximum value that the kernel supports). 4096
could be a good initial value if you have no idea which is the best.
Set this value to 0 to disable the feature. By default, this feature
is disabled (dirty-ring-size=0). When enabled, KVM will instead
record dirty pages in a bitmap.
ERST
DEF("smp", HAS_ARG, QEMU_OPTION_smp,
@ -3787,8 +3799,11 @@ DEF("mon", HAS_ARG, QEMU_OPTION_mon, \
"-mon [chardev=]name[,mode=readline|control][,pretty[=on|off]]\n", QEMU_ARCH_ALL)
SRST
``-mon [chardev=]name[,mode=readline|control][,pretty[=on|off]]``
Setup monitor on chardev name. ``pretty`` is only valid when
``mode=control``, turning on JSON pretty printing to ease
Setup monitor on chardev name. ``mode=control`` configures
a QMP monitor (a JSON RPC-style protocol) and it is not the
same as HMP, the human monitor that has a "(qemu)" prompt.
``pretty`` is only valid when ``mode=control``,
turning on JSON pretty printing to ease
human reading and debugging.
ERST
@ -5264,3 +5279,7 @@ ERST
HXCOMM This is the last statement. Insert new options before this line!
#undef DEF
#undef DEFHEADING
#undef ARCHHEADING

View file

@ -14,7 +14,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "replication.h"
#include "block/replication.h"
static QLIST_HEAD(, ReplicationState) replication_states;

View file

@ -2055,6 +2055,10 @@ void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr,
memory_region_get_dirty_log_mask(mr));
}
/*
* If memory region `mr' is NULL, do global sync. Otherwise, sync
* dirty bitmap for the specified memory region.
*/
static void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
{
MemoryListener *listener;
@ -2068,18 +2072,24 @@ static void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
* address space once.
*/
QTAILQ_FOREACH(listener, &memory_listeners, link) {
if (!listener->log_sync) {
continue;
}
as = listener->address_space;
view = address_space_get_flatview(as);
FOR_EACH_FLAT_RANGE(fr, view) {
if (fr->dirty_log_mask && (!mr || fr->mr == mr)) {
MemoryRegionSection mrs = section_from_flat_range(fr, view);
listener->log_sync(listener, &mrs);
if (listener->log_sync) {
as = listener->address_space;
view = address_space_get_flatview(as);
FOR_EACH_FLAT_RANGE(fr, view) {
if (fr->dirty_log_mask && (!mr || fr->mr == mr)) {
MemoryRegionSection mrs = section_from_flat_range(fr, view);
listener->log_sync(listener, &mrs);
}
}
flatview_unref(view);
} else if (listener->log_sync_global) {
/*
* No matter whether MR is specified, what we can do here
* is to do a global sync, because we are not capable to
* sync in a finer granularity.
*/
listener->log_sync_global(listener);
}
flatview_unref(view);
}
}
@ -2767,6 +2777,9 @@ void memory_listener_register(MemoryListener *listener, AddressSpace *as)
{
MemoryListener *other = NULL;
/* Only one of them can be defined for a listener */
assert(!(listener->log_sync && listener->log_sync_global));
listener->address_space = as;
if (QTAILQ_EMPTY(&memory_listeners)
|| listener->priority >= QTAILQ_LAST(&memory_listeners)->priority) {

View file

@ -27,6 +27,8 @@
#include "qemu/error-report.h"
#include "qemu/module.h"
#include "qemu/cutils.h"
#include "qapi/qmp/qerror.h"
#include "qom/object_interfaces.h"
#include CONFIG_DEVICES
#ifdef CONFIG_PSERIES
#include "hw/ppc/spapr_rtas.h"
@ -34,11 +36,25 @@
#define MAX_IRQ 256
#define TYPE_QTEST "qtest"
OBJECT_DECLARE_SIMPLE_TYPE(QTest, QTEST)
struct QTest {
Object parent;
bool has_machine_link;
char *chr_name;
Chardev *chr;
CharBackend qtest_chr;
char *log;
};
bool qtest_allowed;
static DeviceState *irq_intercept_dev;
static FILE *qtest_log_fp;
static CharBackend qtest_chr;
static QTest *qtest;
static GString *inbuf;
static int irq_levels[MAX_IRQ];
static qemu_timeval start_time;
@ -320,7 +336,7 @@ static void qtest_irq_handler(void *opaque, int n, int level)
qemu_set_irq(old_irq, level);
if (irq_levels[n] != level) {
CharBackend *chr = &qtest_chr;
CharBackend *chr = &qtest->qtest_chr;
irq_levels[n] = level;
qtest_send_prefix(chr);
qtest_sendf(chr, "IRQ %s %d\n",
@ -849,18 +865,39 @@ static void qtest_event(void *opaque, QEMUChrEvent event)
break;
}
}
void qtest_server_init(const char *qtest_chrdev, const char *qtest_log, Error **errp)
{
ERRP_GUARD();
Chardev *chr;
Object *qtest;
chr = qemu_chr_new("qtest", qtest_chrdev, NULL);
if (chr == NULL) {
error_setg(errp, "Failed to initialize device for qtest: \"%s\"",
qtest_chrdev);
return;
}
qtest = object_new(TYPE_QTEST);
object_property_set_str(qtest, "chardev", "qtest", &error_abort);
if (qtest_log) {
object_property_set_str(qtest, "log", qtest_log, &error_abort);
}
object_property_add_child(qdev_get_machine(), "qtest", qtest);
user_creatable_complete(USER_CREATABLE(qtest), errp);
if (*errp) {
object_unparent(qtest);
}
object_unref(OBJECT(chr));
object_unref(qtest);
}
static bool qtest_server_start(QTest *q, Error **errp)
{
Chardev *chr = q->chr;
const char *qtest_log = q->log;
if (qtest_log) {
if (strcmp(qtest_log, "none") != 0) {
qtest_log_fp = fopen(qtest_log, "w+");
@ -869,16 +906,20 @@ void qtest_server_init(const char *qtest_chrdev, const char *qtest_log, Error **
qtest_log_fp = stderr;
}
qemu_chr_fe_init(&qtest_chr, chr, errp);
qemu_chr_fe_set_handlers(&qtest_chr, qtest_can_read, qtest_read,
qtest_event, NULL, &qtest_chr, NULL, true);
qemu_chr_fe_set_echo(&qtest_chr, true);
if (!qemu_chr_fe_init(&q->qtest_chr, chr, errp)) {
return false;
}
qemu_chr_fe_set_handlers(&q->qtest_chr, qtest_can_read, qtest_read,
qtest_event, NULL, &q->qtest_chr, NULL, true);
qemu_chr_fe_set_echo(&q->qtest_chr, true);
inbuf = g_string_new("");
if (!qtest_server_send) {
qtest_server_set_send_handler(qtest_server_char_be_send, &qtest_chr);
qtest_server_set_send_handler(qtest_server_char_be_send, &q->qtest_chr);
}
qtest = q;
return true;
}
void qtest_server_set_send_handler(void (*send)(void*, const char*),
@ -890,7 +931,7 @@ void qtest_server_set_send_handler(void (*send)(void*, const char*),
bool qtest_driver(void)
{
return qtest_chr.chr != NULL;
return qtest && qtest->qtest_chr.chr != NULL;
}
void qtest_server_inproc_recv(void *dummy, const char *buf)
@ -905,3 +946,129 @@ void qtest_server_inproc_recv(void *dummy, const char *buf)
g_string_truncate(gstr, 0);
}
}
static void qtest_complete(UserCreatable *uc, Error **errp)
{
QTest *q = QTEST(uc);
if (qtest) {
error_setg(errp, "Only one instance of qtest can be created");
return;
}
if (!q->chr_name) {
error_setg(errp, "No backend specified");
return;
}
if (OBJECT(uc)->parent != qdev_get_machine()) {
q->has_machine_link = true;
object_property_add_const_link(qdev_get_machine(), "qtest", OBJECT(uc));
} else {
/* -qtest was used. */
}
qtest_server_start(q, errp);
}
static void qtest_unparent(Object *obj)
{
QTest *q = QTEST(obj);
if (qtest == q) {
qemu_chr_fe_disconnect(&q->qtest_chr);
assert(!qtest_opened);
qemu_chr_fe_deinit(&q->qtest_chr, false);
if (qtest_log_fp) {
fclose(qtest_log_fp);
qtest_log_fp = NULL;
}
qtest = NULL;
}
if (q->has_machine_link) {
object_property_del(qdev_get_machine(), "qtest");
q->has_machine_link = false;
}
}
static void qtest_set_log(Object *obj, const char *value, Error **errp)
{
QTest *q = QTEST(obj);
if (qtest == q) {
error_setg(errp, QERR_PERMISSION_DENIED);
} else {
g_free(q->log);
q->log = g_strdup(value);
}
}
static char *qtest_get_log(Object *obj, Error **errp)
{
QTest *q = QTEST(obj);
return g_strdup(q->log);
}
static void qtest_set_chardev(Object *obj, const char *value, Error **errp)
{
QTest *q = QTEST(obj);
Chardev *chr;
if (qtest == q) {
error_setg(errp, QERR_PERMISSION_DENIED);
return;
}
chr = qemu_chr_find(value);
if (!chr) {
error_setg(errp, "Cannot find character device '%s'", value);
return;
}
g_free(q->chr_name);
q->chr_name = g_strdup(value);
if (q->chr) {
object_unref(q->chr);
}
q->chr = chr;
object_ref(chr);
}
static char *qtest_get_chardev(Object *obj, Error **errp)
{
QTest *q = QTEST(obj);
return g_strdup(q->chr_name);
}
static void qtest_class_init(ObjectClass *oc, void *data)
{
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
oc->unparent = qtest_unparent;
ucc->complete = qtest_complete;
object_class_property_add_str(oc, "chardev",
qtest_get_chardev, qtest_set_chardev);
object_class_property_add_str(oc, "log",
qtest_get_log, qtest_set_log);
}
static const TypeInfo qtest_info = {
.name = TYPE_QTEST,
.parent = TYPE_OBJECT,
.class_init = qtest_class_init,
.instance_size = sizeof(QTest),
.interfaces = (InterfaceInfo[]) {
{ TYPE_USER_CREATABLE },
{ }
}
};
static void register_types(void)
{
type_register_static(&qtest_info);
}
type_init(register_types);

View file

@ -88,7 +88,7 @@
#include "qapi/qobject-input-visitor.h"
#include "qemu/option.h"
#include "qemu/config-file.h"
#include "qemu-options.h"
#include "qemu/qemu-options.h"
#include "qemu/main-loop.h"
#ifdef CONFIG_VIRTFS
#include "fsdev/qemu-fsdev.h"
@ -854,8 +854,17 @@ static void help(int exitcode)
"'disk_image' is a raw hard disk image for IDE hard disk 0\n\n",
error_get_progname());
#define QEMU_OPTIONS_GENERATE_HELP
#include "qemu-options-wrapper.h"
#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask) \
if ((arch_mask) & arch_type) \
fputs(opt_help, stdout);
#define ARCHHEADING(text, arch_mask) \
if ((arch_mask) & arch_type) \
puts(stringify(text));
#define DEFHEADING(text) ARCHHEADING(text, QEMU_ARCH_ALL)
#include "qemu-options.def"
printf("\nDuring emulation, the following keys are useful:\n"
"ctrl-alt-f toggle full screen\n"
@ -880,8 +889,13 @@ typedef struct QEMUOption {
static const QEMUOption qemu_options[] = {
{ "h", 0, QEMU_OPTION_h, QEMU_ARCH_ALL },
#define QEMU_OPTIONS_GENERATE_OPTIONS
#include "qemu-options-wrapper.h"
#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask) \
{ option, opt_arg, opt_enum, arch_mask },
#define DEFHEADING(text)
#define ARCHHEADING(text, arch_mask)
#include "qemu-options.def"
{ NULL },
};
@ -1758,8 +1772,9 @@ static bool object_create_early(const char *type)
* add one, state the reason in a comment!
*/
/* Reason: rng-egd property "chardev" */
if (g_str_equal(type, "rng-egd")) {
/* Reason: property "chardev" */
if (g_str_equal(type, "rng-egd") ||
g_str_equal(type, "qtest")) {
return false;
}
@ -2024,8 +2039,6 @@ static void set_memory_options(MachineClass *mc)
exit(EXIT_FAILURE);
}
/* store value for the future use */
qemu_opt_set_number(opts, "size", ram_size, &error_abort);
maxram_size = ram_size;
if (qemu_opt_get(opts, "maxmem")) {
@ -2614,6 +2627,23 @@ void qmp_x_exit_preconfig(Error **errp)
}
}
#ifdef CONFIG_MODULES
void qemu_load_module_for_opts(const char *group)
{
static bool spice_tried;
if (g_str_equal(group, "spice") && !spice_tried) {
ui_module_load_one("spice-core");
spice_tried = true;
}
static bool iscsi_tried;
if (g_str_equal(group, "iscsi") && !iscsi_tried) {
block_module_load_one("iscsi");
iscsi_tried = true;
}
}
#endif
void qemu_init(int argc, char **argv, char **envp)
{
QemuOpts *opts;
@ -3373,10 +3403,6 @@ void qemu_init(int argc, char **argv, char **envp)
break;
case QEMU_OPTION_spice:
olist = qemu_find_opts_err("spice", NULL);
if (!olist) {
ui_module_load_one("spice-core");
olist = qemu_find_opts("spice");
}
if (!olist) {
error_report("spice support is disabled");
exit(1);

View file

@ -22,6 +22,7 @@ stub_ss.add(files('isa-bus.c'))
stub_ss.add(files('is-daemonized.c'))
stub_ss.add(when: 'CONFIG_LINUX_AIO', if_true: files('linux-aio.c'))
stub_ss.add(files('migr-blocker.c'))
stub_ss.add(files('module-opts.c'))
stub_ss.add(files('monitor.c'))
stub_ss.add(files('monitor-core.c'))
stub_ss.add(files('pci-bus.c'))

6
stubs/module-opts.c Normal file
View file

@ -0,0 +1,6 @@
#include "qemu/osdep.h"
#include "qemu/config-file.h"
void qemu_load_module_for_opts(const char *group)
{
}

View file

@ -940,7 +940,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
.type = CPUID_FEATURE_WORD,
.feat_names = {
NULL, NULL, NULL, NULL,
NULL, "avx512-bf16", NULL, NULL,
"avx-vnni", "avx512-bf16", NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
@ -3194,7 +3194,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO |
MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO,
.features[FEAT_7_1_EAX] =
CPUID_7_1_EAX_AVX512_BF16,
CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16,
/*
* Missing: XSAVES (not supported by some Linux versions,
* including v4.1 to v4.12).

View file

@ -817,6 +817,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
/* Speculative Store Bypass Disable */
#define CPUID_7_0_EDX_SPEC_CTRL_SSBD (1U << 31)
/* AVX VNNI Instruction */
#define CPUID_7_1_EAX_AVX_VNNI (1U << 4)
/* AVX512 BFloat16 Instruction */
#define CPUID_7_1_EAX_AVX512_BF16 (1U << 5)

View file

@ -16,6 +16,7 @@
#include <wordexp.h>
#include "qemu/datadir.h"
#include "sysemu/sysemu.h"
#include "sysemu/qtest.h"
#include "sysemu/runstate.h"
#include "qemu/main-loop.h"

View file

@ -14,7 +14,7 @@
#include "qapi/qmp/qdict.h"
#include "qemu/option.h"
#include "qemu/main-loop.h"
#include "replication.h"
#include "block/replication.h"
#include "block/block_int.h"
#include "block/qdict.h"
#include "sysemu/block-backend.h"

View file

@ -16,6 +16,7 @@ static QemuOptsList *find_list(QemuOptsList **lists, const char *group,
{
int i;
qemu_load_module_for_opts(group);
for (i = 0; lists[i] != NULL; i++) {
if (strcmp(lists[i]->name, group) == 0)
break;