From 0ff1f9f585712684dc46cb6cb6a46a8cc160b2ae Mon Sep 17 00:00:00 2001 From: Orit Wasserman Date: Tue, 19 Jun 2012 11:51:37 +0300 Subject: [PATCH 01/13] Add missing check for host_from_stream_offset return value for RAM_SAVE_FLAG_PAGE Signed-off-by: Orit Wasserman --- arch_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch_init.c b/arch_init.c index a9e8b7442b..81c2e5443f 100644 --- a/arch_init.c +++ b/arch_init.c @@ -483,6 +483,9 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) void *host; host = host_from_stream_offset(f, addr, flags); + if (!host) { + return -EINVAL; + } qemu_get_buffer(f, host, TARGET_PAGE_SIZE); } From 6607ae235bbc8c8c23159c21f545e7622489a236 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Tue, 19 Jun 2012 18:43:09 +0300 Subject: [PATCH 02/13] Add MigrationParams structure Signed-off-by: Isaku Yamahata --- block-migration.c | 8 ++++---- migration.c | 13 ++++++++----- migration.h | 8 ++++++-- qemu-common.h | 1 + savevm.c | 13 +++++++++---- sysemu.h | 3 ++- vmstate.h | 2 +- 7 files changed, 31 insertions(+), 17 deletions(-) diff --git a/block-migration.c b/block-migration.c index fd2ffff0d5..b95b4e1389 100644 --- a/block-migration.c +++ b/block-migration.c @@ -700,13 +700,13 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) return 0; } -static void block_set_params(int blk_enable, int shared_base, void *opaque) +static void block_set_params(const MigrationParams *params, void *opaque) { - block_mig_state.blk_enable = blk_enable; - block_mig_state.shared_base = shared_base; + block_mig_state.blk_enable = params->blk; + block_mig_state.shared_base = params->shared; /* shared base means that blk_enable = 1 */ - block_mig_state.blk_enable |= shared_base; + block_mig_state.blk_enable |= params->shared; } void blk_mig_init(void) diff --git a/migration.c b/migration.c index 3f485d33a5..810727fb2f 100644 --- a/migration.c +++ b/migration.c @@ -352,7 +352,7 @@ void migrate_fd_connect(MigrationState *s) migrate_fd_close); DPRINTF("beginning savevm\n"); - ret = qemu_savevm_state_begin(s->file, s->blk, s->shared); + ret = qemu_savevm_state_begin(s->file, &s->params); if (ret < 0) { DPRINTF("failed, %d\n", ret); migrate_fd_error(s); @@ -361,15 +361,14 @@ void migrate_fd_connect(MigrationState *s) migrate_fd_put_ready(s); } -static MigrationState *migrate_init(int blk, int inc) +static MigrationState *migrate_init(const MigrationParams *params) { MigrationState *s = migrate_get_current(); int64_t bandwidth_limit = s->bandwidth_limit; memset(s, 0, sizeof(*s)); s->bandwidth_limit = bandwidth_limit; - s->blk = blk; - s->shared = inc; + s->params = *params; s->bandwidth_limit = bandwidth_limit; s->state = MIG_STATE_SETUP; @@ -394,9 +393,13 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, Error **errp) { MigrationState *s = migrate_get_current(); + MigrationParams params; const char *p; int ret; + params.blk = blk; + params.shared = inc; + if (s->state == MIG_STATE_ACTIVE) { error_set(errp, QERR_MIGRATION_ACTIVE); return; @@ -411,7 +414,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, return; } - s = migrate_init(blk, inc); + s = migrate_init(¶ms); if (strstart(uri, "tcp:", &p)) { ret = tcp_start_outgoing_migration(s, p, errp); diff --git a/migration.h b/migration.h index 2e9ca2edf2..35207bd53e 100644 --- a/migration.h +++ b/migration.h @@ -19,6 +19,11 @@ #include "notify.h" #include "error.h" +struct MigrationParams { + bool blk; + bool shared; +}; + typedef struct MigrationState MigrationState; struct MigrationState @@ -31,8 +36,7 @@ struct MigrationState int (*close)(MigrationState *s); int (*write)(MigrationState *s, const void *buff, size_t size); void *opaque; - int blk; - int shared; + MigrationParams params; }; void process_incoming_migration(QEMUFile *f); diff --git a/qemu-common.h b/qemu-common.h index 9d9e603c6e..c8c6b2ae35 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -17,6 +17,7 @@ typedef struct DeviceState DeviceState; struct Monitor; typedef struct Monitor Monitor; +typedef struct MigrationParams MigrationParams; /* we put basic includes here to avoid repeating them in device drivers */ #include diff --git a/savevm.c b/savevm.c index faa81457d5..d1d90200e1 100644 --- a/savevm.c +++ b/savevm.c @@ -1561,7 +1561,8 @@ bool qemu_savevm_state_blocked(Error **errp) return false; } -int qemu_savevm_state_begin(QEMUFile *f, int blk_enable, int shared) +int qemu_savevm_state_begin(QEMUFile *f, + const MigrationParams *params) { SaveStateEntry *se; int ret; @@ -1569,8 +1570,8 @@ int qemu_savevm_state_begin(QEMUFile *f, int blk_enable, int shared) QTAILQ_FOREACH(se, &savevm_handlers, entry) { if(se->set_params == NULL) { continue; - } - se->set_params(blk_enable, shared, se->opaque); + } + se->set_params(params, se->opaque); } qemu_put_be32(f, QEMU_VM_FILE_MAGIC); @@ -1708,13 +1709,17 @@ void qemu_savevm_state_cancel(QEMUFile *f) static int qemu_savevm_state(QEMUFile *f) { int ret; + MigrationParams params = { + .blk = 0, + .shared = 0 + }; if (qemu_savevm_state_blocked(NULL)) { ret = -EINVAL; goto out; } - ret = qemu_savevm_state_begin(f, 0, 0); + ret = qemu_savevm_state_begin(f, ¶ms); if (ret < 0) goto out; diff --git a/sysemu.h b/sysemu.h index bc2c788921..6540c7912f 100644 --- a/sysemu.h +++ b/sysemu.h @@ -77,7 +77,8 @@ void do_info_snapshots(Monitor *mon); void qemu_announce_self(void); bool qemu_savevm_state_blocked(Error **errp); -int qemu_savevm_state_begin(QEMUFile *f, int blk_enable, int shared); +int qemu_savevm_state_begin(QEMUFile *f, + const MigrationParams *params); int qemu_savevm_state_iterate(QEMUFile *f); int qemu_savevm_state_complete(QEMUFile *f); void qemu_savevm_state_cancel(QEMUFile *f); diff --git a/vmstate.h b/vmstate.h index 82d97aead4..5af45e0c12 100644 --- a/vmstate.h +++ b/vmstate.h @@ -26,7 +26,7 @@ #ifndef QEMU_VMSTATE_H #define QEMU_VMSTATE_H 1 -typedef void SaveSetParamsHandler(int blk_enable, int shared, void * opaque); +typedef void SaveSetParamsHandler(const MigrationParams *params, void * opaque); typedef void SaveStateHandler(QEMUFile *f, void *opaque); typedef int SaveLiveStateHandler(QEMUFile *f, int stage, void *opaque); typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id); From 0c51f43d7a82fedca550f9dc149edff2952b8fe1 Mon Sep 17 00:00:00 2001 From: Orit Wasserman Date: Tue, 19 Jun 2012 18:43:14 +0300 Subject: [PATCH 03/13] Add save_block_hdr function Signed-off-by: Benoit Hudzia Signed-off-by: Petter Svard Signed-off-by: Aidan Shribman Signed-off-by: Orit Wasserman --- arch_init.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/arch_init.c b/arch_init.c index 81c2e5443f..cfa1af2fd6 100644 --- a/arch_init.c +++ b/arch_init.c @@ -161,6 +161,18 @@ static int is_dup_page(uint8_t *page) return 1; } +static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, + int cont, int flag) +{ + qemu_put_be64(f, offset | cont | flag); + if (!cont) { + qemu_put_byte(f, strlen(block->idstr)); + qemu_put_buffer(f, (uint8_t *)block->idstr, + strlen(block->idstr)); + } + +} + static RAMBlock *last_block; static ram_addr_t last_offset; @@ -187,21 +199,11 @@ static int ram_save_block(QEMUFile *f) p = memory_region_get_ram_ptr(mr) + offset; if (is_dup_page(p)) { - qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_COMPRESS); - if (!cont) { - qemu_put_byte(f, strlen(block->idstr)); - qemu_put_buffer(f, (uint8_t *)block->idstr, - strlen(block->idstr)); - } + save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS); qemu_put_byte(f, *p); bytes_sent = 1; } else { - qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_PAGE); - if (!cont) { - qemu_put_byte(f, strlen(block->idstr)); - qemu_put_buffer(f, (uint8_t *)block->idstr, - strlen(block->idstr)); - } + save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); qemu_put_buffer(f, p, TARGET_PAGE_SIZE); bytes_sent = TARGET_PAGE_SIZE; } From 3a697f697b611923d04598105859103d8b27a933 Mon Sep 17 00:00:00 2001 From: Orit Wasserman Date: Tue, 19 Jun 2012 18:43:15 +0300 Subject: [PATCH 04/13] Add debugging infrastructure Signed-off-by: Orit Wasserman --- arch_init.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/arch_init.c b/arch_init.c index cfa1af2fd6..78b3c0c100 100644 --- a/arch_init.c +++ b/arch_init.c @@ -44,6 +44,14 @@ #include "exec-memory.h" #include "hw/pcspk.h" +#ifdef DEBUG_ARCH_INIT +#define DPRINTF(fmt, ...) \ + do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + #ifdef TARGET_SPARC int graphic_width = 1024; int graphic_height = 768; @@ -380,6 +388,9 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; + DPRINTF("ram_save_live: expected(" PRIu64 ") <= max(" PRIu64 ")?\n", + expected_time, migrate_max_downtime()); + return (stage == 2) && (expected_time <= migrate_max_downtime()); } @@ -416,8 +427,11 @@ static inline void *host_from_stream_offset(QEMUFile *f, int ram_load(QEMUFile *f, void *opaque, int version_id) { ram_addr_t addr; - int flags; + int flags, ret = 0; int error; + static uint64_t seq_iter; + + seq_iter++; if (version_id < 4 || version_id > 4) { return -EINVAL; @@ -447,8 +461,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) QLIST_FOREACH(block, &ram_list.blocks, next) { if (!strncmp(id, block->idstr, sizeof(id))) { - if (block->length != length) - return -EINVAL; + if (block->length != length) { + ret = -EINVAL; + goto done; + } break; } } @@ -456,7 +472,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) if (!block) { fprintf(stderr, "Unknown ramblock \"%s\", cannot " "accept migration\n", id); - return -EINVAL; + ret = -EINVAL; + goto done; } total_ram_bytes -= length; @@ -493,11 +510,15 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) } error = qemu_file_get_error(f); if (error) { - return error; + ret = error; + goto done; } } while (!(flags & RAM_SAVE_FLAG_EOS)); - return 0; +done: + DPRINTF("Completed load of VM with exit code %d seq iteration " PRIu64 "\n", + ret, seq_iter); + return ret; } #ifdef HAS_AUDIO From 8e21cd32005f6be7475349eaeadde8d4ec8cf2e4 Mon Sep 17 00:00:00 2001 From: Orit Wasserman Date: Tue, 19 Jun 2012 18:43:17 +0300 Subject: [PATCH 05/13] Add migration_end function Signed-off-by: Orit Wasserman --- arch_init.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch_init.c b/arch_init.c index 78b3c0c100..f10692873e 100644 --- a/arch_init.c +++ b/arch_init.c @@ -304,6 +304,11 @@ static void sort_ram_list(void) g_free(blocks); } +static void migration_end(void) +{ + memory_global_dirty_log_stop(); +} + int ram_save_live(QEMUFile *f, int stage, void *opaque) { ram_addr_t addr; @@ -313,7 +318,7 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) int ret; if (stage < 0) { - memory_global_dirty_log_stop(); + migration_end(); return 0; } From d5f8a5701d3690b5ec0c34b6a5c0b5a24d274540 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Mon, 21 May 2012 22:01:07 +0200 Subject: [PATCH 06/13] Add spent time for migration We add time spent for migration to the output of "info migrate" command. 'total_time' means time since the start fo migration if migration is 'active', and total time of migration if migration is completed. As we are also interested in transferred ram when migration completes, adding all ram statistics Signed-off-by: Juan Quintela --- hmp.c | 2 ++ migration.c | 11 +++++++++++ migration.h | 1 + qapi-schema.json | 12 +++++++++--- 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/hmp.c b/hmp.c index b9cec1dafb..4c6d4ae942 100644 --- a/hmp.c +++ b/hmp.c @@ -145,6 +145,8 @@ void hmp_info_migrate(Monitor *mon) info->ram->remaining >> 10); monitor_printf(mon, "total ram: %" PRIu64 " kbytes\n", info->ram->total >> 10); + monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n", + info->ram->total_time); } if (info->has_disk) { diff --git a/migration.c b/migration.c index 810727fb2f..8db1b433f0 100644 --- a/migration.c +++ b/migration.c @@ -131,6 +131,8 @@ MigrationInfo *qmp_query_migrate(Error **errp) info->ram->transferred = ram_bytes_transferred(); info->ram->remaining = ram_bytes_remaining(); info->ram->total = ram_bytes_total(); + info->ram->total_time = qemu_get_clock_ms(rt_clock) + - s->total_time; if (blk_mig_active()) { info->has_disk = true; @@ -143,6 +145,13 @@ MigrationInfo *qmp_query_migrate(Error **errp) case MIG_STATE_COMPLETED: info->has_status = true; info->status = g_strdup("completed"); + + info->has_ram = true; + info->ram = g_malloc0(sizeof(*info->ram)); + info->ram->transferred = ram_bytes_transferred(); + info->ram->remaining = 0; + info->ram->total = ram_bytes_total(); + info->ram->total_time = s->total_time; break; case MIG_STATE_ERROR: info->has_status = true; @@ -260,6 +269,7 @@ static void migrate_fd_put_ready(void *opaque) } else { migrate_fd_completed(s); } + s->total_time = qemu_get_clock_ms(rt_clock) - s->total_time; if (s->state != MIG_STATE_COMPLETED) { if (old_vm_running) { vm_start(); @@ -372,6 +382,7 @@ static MigrationState *migrate_init(const MigrationParams *params) s->bandwidth_limit = bandwidth_limit; s->state = MIG_STATE_SETUP; + s->total_time = qemu_get_clock_ms(rt_clock); return s; } diff --git a/migration.h b/migration.h index 35207bd53e..de13004573 100644 --- a/migration.h +++ b/migration.h @@ -37,6 +37,7 @@ struct MigrationState int (*write)(MigrationState *s, const void *buff, size_t size); void *opaque; MigrationParams params; + int64_t total_time; }; void process_incoming_migration(QEMUFile *f); diff --git a/qapi-schema.json b/qapi-schema.json index 3b6e3468b4..1ab5dbd5ff 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -260,10 +260,15 @@ # # @total: total amount of bytes involved in the migration process # +# @total_time: tota0l amount of ms since migration started. If +# migration has ended, it returns the total migration +# time. (since 1.2) +# # Since: 0.14.0. ## { 'type': 'MigrationStats', - 'data': {'transferred': 'int', 'remaining': 'int', 'total': 'int' } } + 'data': {'transferred': 'int', 'remaining': 'int', 'total': 'int' , + 'total_time': 'int' } } ## # @MigrationInfo @@ -275,8 +280,9 @@ # 'cancelled'. If this field is not returned, no migration process # has been initiated # -# @ram: #optional @MigrationStats containing detailed migration status, -# only returned if status is 'active' +# @ram: #optional @MigrationStats containing detailed migration +# status, only returned if status is 'active' or +# 'completed'. 'comppleted' (since 1.2) # # @disk: #optional @MigrationStats containing detailed disk migration # status, only returned if status is 'active' and it is a block From 517a13c91a975987b4c2850c08078bd3cef4dce7 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Mon, 21 May 2012 23:46:44 +0200 Subject: [PATCH 07/13] Add tracepoints for savevm section start/end This allows to know how long each section takes to save. An awk script like this tells us sections that takes more that 10ms $1 ~ /savevm_state_iterate_end/ { /* Print savevm_section_end line when > 10ms duration */ if ($2 > 10000) { printf("%s times_missing=%u\n", $0, times_missing++); } } Signed-off-by: Juan Quintela fix ws tracepoints Signed-off-by: Juan Quintela --- savevm.c | 8 ++++++++ trace-events | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/savevm.c b/savevm.c index d1d90200e1..987c6c0cd0 100644 --- a/savevm.c +++ b/savevm.c @@ -85,6 +85,7 @@ #include "cpus.h" #include "memory.h" #include "qmp-commands.h" +#include "trace.h" #define SELF_ANNOUNCE_ROUNDS 5 @@ -1625,11 +1626,14 @@ int qemu_savevm_state_iterate(QEMUFile *f) if (se->save_live_state == NULL) continue; + trace_savevm_section_start(); /* Section type */ qemu_put_byte(f, QEMU_VM_SECTION_PART); qemu_put_be32(f, se->section_id); ret = se->save_live_state(f, QEMU_VM_SECTION_PART, se->opaque); + trace_savevm_section_end(se->section_id); + if (ret <= 0) { /* Do not proceed to the next vmstate before this one reported completion of the current stage. This serializes the migration @@ -1659,11 +1663,13 @@ int qemu_savevm_state_complete(QEMUFile *f) if (se->save_live_state == NULL) continue; + trace_savevm_section_start(); /* Section type */ qemu_put_byte(f, QEMU_VM_SECTION_END); qemu_put_be32(f, se->section_id); ret = se->save_live_state(f, QEMU_VM_SECTION_END, se->opaque); + trace_savevm_section_end(se->section_id); if (ret < 0) { return ret; } @@ -1675,6 +1681,7 @@ int qemu_savevm_state_complete(QEMUFile *f) if (se->save_state == NULL && se->vmsd == NULL) continue; + trace_savevm_section_start(); /* Section type */ qemu_put_byte(f, QEMU_VM_SECTION_FULL); qemu_put_be32(f, se->section_id); @@ -1688,6 +1695,7 @@ int qemu_savevm_state_complete(QEMUFile *f) qemu_put_be32(f, se->version_id); vmstate_save(f, se); + trace_savevm_section_end(se->section_id); } qemu_put_byte(f, QEMU_VM_EOF); diff --git a/trace-events b/trace-events index c935ba24f4..d671118fe1 100644 --- a/trace-events +++ b/trace-events @@ -783,6 +783,11 @@ displaysurface_resize(void *display_state, void *display_surface, int width, int # vga.c ppm_save(const char *filename, void *display_surface) "%s surface=%p" +# savevm.c + +savevm_section_start(void) "" +savevm_section_end(unsigned int section_id) "section_id %u" + # hw/qxl.c disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d" disable qxl_io_write_vga(int qid, const char *mode, uint32_t addr, uint32_t val) "%d %s addr=%u val=%u" From aac844ed97c39b8c0fb16d7bf9851fdedf325be3 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Tue, 22 May 2012 00:38:26 +0200 Subject: [PATCH 08/13] No need to iterate if we already are over the limit If buffers are full, don't iterate, just exit. Signed-off-by: Juan Quintela --- savevm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/savevm.c b/savevm.c index 987c6c0cd0..a15c163b6e 100644 --- a/savevm.c +++ b/savevm.c @@ -1626,6 +1626,9 @@ int qemu_savevm_state_iterate(QEMUFile *f) if (se->save_live_state == NULL) continue; + if (qemu_file_rate_limit(f)) { + return 0; + } trace_savevm_section_start(); /* Section type */ qemu_put_byte(f, QEMU_VM_SECTION_PART); From d24981d37e793b0a8fcde1879db19eb11fe0f975 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Tue, 22 May 2012 00:42:40 +0200 Subject: [PATCH 09/13] Only TCG needs TLB handling Refactor the code that is only needed for tcg to an static function. Call that only when tcg is enabled. We can't refactor to a dummy function in the kvm case, as qemu can be compiled at the same time with tcg and kvm. Signed-off-by: Juan Quintela --- exec.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/exec.c b/exec.c index 8244d54a85..a68b65cb4e 100644 --- a/exec.c +++ b/exec.c @@ -1824,19 +1824,10 @@ void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr) TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); } -/* Note: start and end must be within the same ram block. */ -void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end, - int dirty_flags) +static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end, + uintptr_t length) { - uintptr_t length, start1; - - start &= TARGET_PAGE_MASK; - end = TARGET_PAGE_ALIGN(end); - - length = end - start; - if (length == 0) - return; - cpu_physical_memory_mask_dirty_range(start, length, dirty_flags); + uintptr_t start1; /* we modify the TLB cache so that the dirty bit will be set again when accessing the range */ @@ -1848,6 +1839,26 @@ void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end, abort(); } cpu_tlb_reset_dirty_all(start1, length); + +} + +/* Note: start and end must be within the same ram block. */ +void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end, + int dirty_flags) +{ + uintptr_t length; + + start &= TARGET_PAGE_MASK; + end = TARGET_PAGE_ALIGN(end); + + length = end - start; + if (length == 0) + return; + cpu_physical_memory_mask_dirty_range(start, length, dirty_flags); + + if (tcg_enabled()) { + tlb_reset_dirty_range_all(start, end, length); + } } int cpu_physical_memory_set_dirty_tracking(int enable) From 5b3c96388741a99988497d734edec19c4f995cd7 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Tue, 22 May 2012 00:44:24 +0200 Subject: [PATCH 10/13] Only calculate expected_time for stage 2 ram_save_remaining() is an expensive operation when there is a lot of memory. So we only call the function when we need it. Signed-off-by: Juan Quintela --- arch_init.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch_init.c b/arch_init.c index f10692873e..3633f4c0a6 100644 --- a/arch_init.c +++ b/arch_init.c @@ -314,7 +314,6 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) ram_addr_t addr; uint64_t bytes_transferred_last; double bwidth = 0; - uint64_t expected_time = 0; int ret; if (stage < 0) { @@ -391,12 +390,16 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; + if (stage == 2) { + uint64_t expected_time; + expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; - DPRINTF("ram_save_live: expected(" PRIu64 ") <= max(" PRIu64 ")?\n", - expected_time, migrate_max_downtime()); + DPRINTF("ram_save_live: expected(" PRIu64 ") <= max(" PRIu64 ")?\n", + expected_time, migrate_max_downtime()); - return (stage == 2) && (expected_time <= migrate_max_downtime()); + return expected_time <= migrate_max_downtime(); + } + return 0; } static inline void *host_from_stream_offset(QEMUFile *f, From 4508bd9ed8053cef0a1a849bf2f1896a5dd86580 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Tue, 22 May 2012 16:27:59 +0200 Subject: [PATCH 11/13] Exit loop if we have been there too long Checking each 64 pages is a random magic number as good as any other. We don't want to test too many times, but on the other hand, qemu_get_clock_ns() is not so expensive either. We want to be sure that we spent less than 50ms (half of buffered_file timer), if we spent more than 100ms, all the accounting got wrong. Signed-off-by: Juan Quintela --- arch_init.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch_init.c b/arch_init.c index 3633f4c0a6..64b85fd1b1 100644 --- a/arch_init.c +++ b/arch_init.c @@ -309,12 +309,15 @@ static void migration_end(void) memory_global_dirty_log_stop(); } +#define MAX_WAIT 50 /* ms, half buffered_file limit */ + int ram_save_live(QEMUFile *f, int stage, void *opaque) { ram_addr_t addr; uint64_t bytes_transferred_last; double bwidth = 0; int ret; + int i; if (stage < 0) { migration_end(); @@ -354,6 +357,7 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) bytes_transferred_last = bytes_transferred; bwidth = qemu_get_clock_ns(rt_clock); + i = 0; while ((ret = qemu_file_rate_limit(f)) == 0) { int bytes_sent; @@ -362,6 +366,20 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque) if (bytes_sent == 0) { /* no more blocks */ break; } + /* we want to check in the 1st loop, just in case it was the 1st time + and we had to sync the dirty bitmap. + qemu_get_clock_ns() is a bit expensive, so we only check each some + iterations + */ + if ((i & 63) == 0) { + uint64_t t1 = (qemu_get_clock_ns(rt_clock) - bwidth) / 1000000; + if (t1 > MAX_WAIT) { + DPRINTF("big wait: " PRIu64 " milliseconds, %d iterations\n", + t1, i); + break; + } + } + i++; } if (ret < 0) { From 1720aeee72888f80b974c33b6eb39922a0bea992 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 22 Jun 2012 13:14:17 +0200 Subject: [PATCH 12/13] dirty bitmap: abstract its use Always use accessors to read/set the dirty bitmap. Signed-off-by: Juan Quintela --- exec-obsolete.h | 44 ++++++++++++++++++++++---------------------- exec.c | 3 +-- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/exec-obsolete.h b/exec-obsolete.h index 792c831718..f8ffce607a 100644 --- a/exec-obsolete.h +++ b/exec-obsolete.h @@ -45,57 +45,61 @@ int cpu_physical_memory_set_dirty_tracking(int enable); #define CODE_DIRTY_FLAG 0x02 #define MIGRATION_DIRTY_FLAG 0x08 -/* read dirty bit (return 0 or 1) */ -static inline int cpu_physical_memory_is_dirty(ram_addr_t addr) -{ - return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] == 0xff; -} - static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr) { return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS]; } +/* read dirty bit (return 0 or 1) */ +static inline int cpu_physical_memory_is_dirty(ram_addr_t addr) +{ + return cpu_physical_memory_get_dirty_flags(addr) == 0xff; +} + static inline int cpu_physical_memory_get_dirty(ram_addr_t start, ram_addr_t length, int dirty_flags) { int ret = 0; - uint8_t *p; ram_addr_t addr, end; end = TARGET_PAGE_ALIGN(start + length); start &= TARGET_PAGE_MASK; - p = ram_list.phys_dirty + (start >> TARGET_PAGE_BITS); for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) { - ret |= *p++ & dirty_flags; + ret |= cpu_physical_memory_get_dirty_flags(addr) & dirty_flags; } return ret; } -static inline void cpu_physical_memory_set_dirty(ram_addr_t addr) -{ - ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] = 0xff; -} - static inline int cpu_physical_memory_set_dirty_flags(ram_addr_t addr, int dirty_flags) { return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] |= dirty_flags; } +static inline void cpu_physical_memory_set_dirty(ram_addr_t addr) +{ + cpu_physical_memory_set_dirty_flags(addr, 0xff); +} + +static inline int cpu_physical_memory_clear_dirty_flags(ram_addr_t addr, + int dirty_flags) +{ + int mask = ~dirty_flags; + + return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] &= mask; +} + static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, ram_addr_t length, int dirty_flags) { - uint8_t *p; ram_addr_t addr, end; end = TARGET_PAGE_ALIGN(start + length); start &= TARGET_PAGE_MASK; - p = ram_list.phys_dirty + (start >> TARGET_PAGE_BITS); for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) { - *p++ |= dirty_flags; + cpu_physical_memory_set_dirty_flags(addr, dirty_flags); } } @@ -103,16 +107,12 @@ static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start, ram_addr_t length, int dirty_flags) { - int mask; - uint8_t *p; ram_addr_t addr, end; end = TARGET_PAGE_ALIGN(start + length); start &= TARGET_PAGE_MASK; - mask = ~dirty_flags; - p = ram_list.phys_dirty + (start >> TARGET_PAGE_BITS); for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) { - *p++ &= mask; + cpu_physical_memory_clear_dirty_flags(addr, dirty_flags); } } diff --git a/exec.c b/exec.c index a68b65cb4e..dd4833d316 100644 --- a/exec.c +++ b/exec.c @@ -2565,8 +2565,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, ram_list.phys_dirty = g_realloc(ram_list.phys_dirty, last_ram_offset() >> TARGET_PAGE_BITS); - memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS), - 0xff, size >> TARGET_PAGE_BITS); + cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff); if (kvm_enabled()) kvm_setup_guest_memory(new_block->host, size); From 45f33f01f359d6028ec4b4018a2bf2ff53806e11 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 22 Jun 2012 15:21:07 +0200 Subject: [PATCH 13/13] Maintain the number of dirty pages Calculate the number of dirty pages takes a lot on hosts with lots of memory. Just maintain how many pages are dirty. Signed-off-by: Juan Quintela --- arch_init.c | 15 +-------------- cpu-all.h | 1 + exec-obsolete.h | 10 ++++++++++ 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch_init.c b/arch_init.c index 64b85fd1b1..5b0f5626a9 100644 --- a/arch_init.c +++ b/arch_init.c @@ -238,20 +238,7 @@ static uint64_t bytes_transferred; static ram_addr_t ram_save_remaining(void) { - RAMBlock *block; - ram_addr_t count = 0; - - QLIST_FOREACH(block, &ram_list.blocks, next) { - ram_addr_t addr; - for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { - if (memory_region_get_dirty(block->mr, addr, TARGET_PAGE_SIZE, - DIRTY_MEMORY_MIGRATION)) { - count++; - } - } - } - - return count; + return ram_list.dirty_pages; } uint64_t ram_bytes_remaining(void) diff --git a/cpu-all.h b/cpu-all.h index 9dc249a165..82ba1d7cd5 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -486,6 +486,7 @@ typedef struct RAMBlock { typedef struct RAMList { uint8_t *phys_dirty; QLIST_HEAD(, RAMBlock) blocks; + uint64_t dirty_pages; } RAMList; extern RAMList ram_list; diff --git a/exec-obsolete.h b/exec-obsolete.h index f8ffce607a..c09925610d 100644 --- a/exec-obsolete.h +++ b/exec-obsolete.h @@ -74,6 +74,11 @@ static inline int cpu_physical_memory_get_dirty(ram_addr_t start, static inline int cpu_physical_memory_set_dirty_flags(ram_addr_t addr, int dirty_flags) { + if ((dirty_flags & MIGRATION_DIRTY_FLAG) && + !cpu_physical_memory_get_dirty(addr, TARGET_PAGE_SIZE, + MIGRATION_DIRTY_FLAG)) { + ram_list.dirty_pages++; + } return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] |= dirty_flags; } @@ -87,6 +92,11 @@ static inline int cpu_physical_memory_clear_dirty_flags(ram_addr_t addr, { int mask = ~dirty_flags; + if ((dirty_flags & MIGRATION_DIRTY_FLAG) && + cpu_physical_memory_get_dirty(addr, TARGET_PAGE_SIZE, + MIGRATION_DIRTY_FLAG)) { + ram_list.dirty_pages--; + } return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] &= mask; }