migration/next for 20170922

-----BEGIN PGP SIGNATURE-----
 
 iQIcBAABCAAGBQJZxP7KAAoJEPSH7xhYctcjn74P/10/uZuHoN06MiogXcLepb1w
 +cgFRN/FpIYaR4mgtFVaEPFdu4o0Zg3Yb7F1nuAyLXhU3Qao9YZZcwq7FAJnqNSw
 BEsjo2Rnw2gtwFkzrcOtSPvnO+uLjdBANZhUy5uL8AT5zMt3to/k92sp+dqj2n29
 DYDEE091gaxlj0HT5RC636P8BzJoPAQe+AvpC3SJ8uAZLjWHtUP0GBpcPRl7pBl/
 GjsgjzqEjmm5jx0/iUmOW+P99NzI33mgki2tZpEgQMs2HSgKCllVVkMBvSZg71jC
 PpF3vlDgVkA7FJo4u5MYDxNtNBBwcZLpptzIlHcIaB7Xk1+dByplXSoDErZZ84NL
 E+2zMwcduif/J42nit62Y/oIEpnvkitct93+iDNDA1CoQvOjBJImF43MTbJyhx6S
 xFL8Ttfo6/AXBB8oQlLC5n/FI+PcaVb+6hCSWidrivkx6ZuZ2oDzSJrt49ayQL6a
 b0u/H8FylXg3+kjIJwUvuhxr7Tak1n0g23jmFwqPGc5DwiQ//hK2dzzUxnF59cjO
 a2TLYWqHfp8TGF/UQf1sPlMT9jVfe7dl03eVPtA9rXwNpfPv0T9BrF6kJHBIitrJ
 i7jok+G9XyKN+3xn1LC/WzIsNl1uIQz2A4xi5JMS9AJnNtqFC3cR128jkzLaGvew
 qe14sIuDCXJda3epsxKf
 =AbaO
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20170922-1' into staging

migration/next for 20170922

# gpg: Signature made Fri 22 Sep 2017 13:15:06 BST
# gpg:                using RSA key 0xF487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>"
# gpg:                 aka "Juan Quintela <quintela@trasno.org>"
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* remotes/juanquintela/tags/migration/20170922-1:
  migration: split ufd_version_check onto receive/request features part
  migration: fix hardcoded function name in error report
  migration: pass MigrationIncomingState* into migration check functions
  migration: split common postcopy out of ram postcopy
  migration: fix ram_save_pending
  migration: add has_postcopy savevm handler
  bitmap: provide to_le/from_le helpers
  bitmap: introduce bitmap_count_one()
  bitmap: remove BITOP_WORD()
  migration: Split migration_fd_process_incoming
  migration: Create multifd migration threads
  migration: Create x-multifd-page-count parameter
  migration: Create x-multifd-channels parameter
  migration: Add multifd capability
  migration: Create migration_has_all_channels
  migration: Add comments to channel functions
  migration: Teach it about G_SOURCE_REMOVE
  migration: Create migration_ioc_process_incoming()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2017-09-22 14:04:10 +01:00
commit bef81b3eb5
18 changed files with 666 additions and 60 deletions

14
hmp.c
View file

@ -336,6 +336,12 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
monitor_printf(mon, "%s: %s\n",
MigrationParameter_str(MIGRATION_PARAMETER_BLOCK_INCREMENTAL),
params->block_incremental ? "on" : "off");
monitor_printf(mon, "%s: %" PRId64 "\n",
MigrationParameter_str(MIGRATION_PARAMETER_X_MULTIFD_CHANNELS),
params->x_multifd_channels);
monitor_printf(mon, "%s: %" PRId64 "\n",
MigrationParameter_str(MIGRATION_PARAMETER_X_MULTIFD_PAGE_COUNT),
params->x_multifd_page_count);
}
qapi_free_MigrationParameters(params);
@ -1621,6 +1627,14 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
p->has_block_incremental = true;
visit_type_bool(v, param, &p->block_incremental, &err);
break;
case MIGRATION_PARAMETER_X_MULTIFD_CHANNELS:
p->has_x_multifd_channels = true;
visit_type_int(v, param, &p->x_multifd_channels, &err);
break;
case MIGRATION_PARAMETER_X_MULTIFD_PAGE_COUNT:
p->has_x_multifd_page_count = true;
visit_type_int(v, param, &p->x_multifd_page_count, &err);
break;
default:
assert(0);
}

View file

@ -223,6 +223,8 @@ static inline gboolean g_hash_table_contains(GHashTable *hash_table,
{
return g_hash_table_lookup_extended(hash_table, key, NULL, NULL);
}
#define G_SOURCE_CONTINUE TRUE
#define G_SOURCE_REMOVE FALSE
#endif
#ifndef g_assert_true

View file

@ -24,6 +24,7 @@ typedef struct SaveVMHandlers {
/* This runs both outside and inside the iothread lock. */
bool (*is_active)(void *opaque);
bool (*has_postcopy)(void *opaque);
/* This runs outside the iothread lock in the migration case, and
* within the lock in the savevm case. The callback had better only

View file

@ -39,6 +39,8 @@
* bitmap_clear(dst, pos, nbits) Clear specified bit area
* bitmap_test_and_clear_atomic(dst, pos, nbits) Test and clear area
* bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area
* bitmap_to_le(dst, src, nbits) Convert bitmap to little endian
* bitmap_from_le(dst, src, nbits) Convert bitmap from little endian
*/
/*
@ -82,6 +84,7 @@ int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, long bits);
int slow_bitmap_intersects(const unsigned long *bitmap1,
const unsigned long *bitmap2, long bits);
long slow_bitmap_count_one(const unsigned long *bitmap, long nbits);
static inline unsigned long *bitmap_try_new(long nbits)
{
@ -216,6 +219,15 @@ static inline int bitmap_intersects(const unsigned long *src1,
}
}
static inline long bitmap_count_one(const unsigned long *bitmap, long nbits)
{
if (small_nbits(nbits)) {
return ctpopl(*bitmap & BITMAP_LAST_WORD_MASK(nbits));
} else {
return slow_bitmap_count_one(bitmap, nbits);
}
}
void bitmap_set(unsigned long *map, long i, long len);
void bitmap_set_atomic(unsigned long *map, long i, long len);
void bitmap_clear(unsigned long *map, long start, long nr);
@ -237,4 +249,9 @@ static inline unsigned long *bitmap_zero_extend(unsigned long *old,
return new;
}
void bitmap_to_le(unsigned long *dst, const unsigned long *src,
long nbits);
void bitmap_from_le(unsigned long *dst, const unsigned long *src,
long nbits);
#endif /* BITMAP_H */

View file

@ -19,6 +19,14 @@
#include "qapi/error.h"
#include "io/channel-tls.h"
/**
* @migration_channel_process_incoming - Create new incoming migration channel
*
* Notice that TLS is special. For it we listen in a listener socket,
* and then create a new client socket from the TLS library.
*
* @ioc: Channel to which we are connecting
*/
void migration_channel_process_incoming(QIOChannel *ioc)
{
MigrationState *s = migrate_get_current();
@ -36,12 +44,18 @@ void migration_channel_process_incoming(QIOChannel *ioc)
error_report_err(local_err);
}
} else {
QEMUFile *f = qemu_fopen_channel_input(ioc);
migration_fd_process_incoming(f);
migration_ioc_process_incoming(ioc);
}
}
/**
* @migration_channel_connect - Create new outgoing migration channel
*
* @s: Current migration state
* @ioc: Channel to which we are connecting
* @hostname: Where we want to connect
*/
void migration_channel_connect(MigrationState *s,
QIOChannel *ioc,
const char *hostname)

View file

@ -49,7 +49,7 @@ static gboolean exec_accept_incoming_migration(QIOChannel *ioc,
{
migration_channel_process_incoming(ioc);
object_unref(OBJECT(ioc));
return FALSE; /* unregister */
return G_SOURCE_REMOVE;
}
void exec_start_incoming_migration(const char *command, Error **errp)

View file

@ -49,7 +49,7 @@ static gboolean fd_accept_incoming_migration(QIOChannel *ioc,
{
migration_channel_process_incoming(ioc);
object_unref(OBJECT(ioc));
return FALSE; /* unregister */
return G_SOURCE_REMOVE;
}
void fd_start_incoming_migration(const char *infd, Error **errp)

View file

@ -77,6 +77,8 @@
* Note: Please change this default value to 10000 when we support hybrid mode.
*/
#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200
#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
#define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16
static NotifierList migration_state_notifiers =
NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@ -279,6 +281,10 @@ static void process_incoming_migration_bh(void *opaque)
*/
qemu_announce_self();
if (multifd_load_cleanup(&local_err) != 0) {
error_report_err(local_err);
autostart = false;
}
/* If global state section was not received or we are in running
state, we need to obey autostart. Any other state is set with
runstate_set. */
@ -306,17 +312,16 @@ static void process_incoming_migration_bh(void *opaque)
static void process_incoming_migration_co(void *opaque)
{
QEMUFile *f = opaque;
MigrationIncomingState *mis = migration_incoming_get_current();
PostcopyState ps;
int ret;
mis->from_src_file = f;
assert(mis->from_src_file);
mis->largest_page_size = qemu_ram_pagesize_largest();
postcopy_state_set(POSTCOPY_INCOMING_NONE);
migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
MIGRATION_STATUS_ACTIVE);
ret = qemu_loadvm_state(f);
ret = qemu_loadvm_state(mis->from_src_file);
ps = postcopy_state_get();
trace_process_incoming_migration_co_end(ret, ps);
@ -352,22 +357,69 @@ static void process_incoming_migration_co(void *opaque)
}
if (ret < 0) {
Error *local_err = NULL;
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_FAILED);
error_report("load of migration failed: %s", strerror(-ret));
qemu_fclose(mis->from_src_file);
if (multifd_load_cleanup(&local_err) != 0) {
error_report_err(local_err);
}
exit(EXIT_FAILURE);
}
mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
qemu_bh_schedule(mis->bh);
}
static void migration_incoming_setup(QEMUFile *f)
{
MigrationIncomingState *mis = migration_incoming_get_current();
if (multifd_load_setup() != 0) {
/* We haven't been able to create multifd threads
nothing better to do */
exit(EXIT_FAILURE);
}
if (!mis->from_src_file) {
mis->from_src_file = f;
}
qemu_file_set_blocking(f, false);
}
static void migration_incoming_process(void)
{
Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
qemu_coroutine_enter(co);
}
void migration_fd_process_incoming(QEMUFile *f)
{
Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, f);
migration_incoming_setup(f);
migration_incoming_process();
}
qemu_file_set_blocking(f, false);
qemu_coroutine_enter(co);
void migration_ioc_process_incoming(QIOChannel *ioc)
{
MigrationIncomingState *mis = migration_incoming_get_current();
if (!mis->from_src_file) {
QEMUFile *f = qemu_fopen_channel_input(ioc);
migration_fd_process_incoming(f);
}
/* We still only have a single channel. Nothing to do here yet */
}
/**
* @migration_has_all_channels: We have received all channels that we need
*
* Returns true when we have got connections to all the channels that
* we need for migration.
*/
bool migration_has_all_channels(void)
{
return true;
}
/*
@ -456,6 +508,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
params->has_block_incremental = true;
params->block_incremental = s->parameters.block_incremental;
params->has_x_multifd_channels = true;
params->x_multifd_channels = s->parameters.x_multifd_channels;
params->has_x_multifd_page_count = true;
params->x_multifd_page_count = s->parameters.x_multifd_page_count;
return params;
}
@ -603,6 +659,7 @@ static bool migrate_caps_check(bool *cap_list,
{
MigrationCapabilityStatusList *cap;
bool old_postcopy_cap;
MigrationIncomingState *mis = migration_incoming_get_current();
old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM];
@ -636,7 +693,7 @@ static bool migrate_caps_check(bool *cap_list,
* special support.
*/
if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
!postcopy_ram_supported_by_host()) {
!postcopy_ram_supported_by_host(mis)) {
/* postcopy_ram_supported_by_host will have emitted a more
* detailed message
*/
@ -737,6 +794,21 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
"is invalid, it should be positive");
return false;
}
if (params->has_x_multifd_channels &&
(params->x_multifd_channels < 1 || params->x_multifd_channels > 255)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"multifd_channels",
"is invalid, it should be in the range of 1 to 255");
return false;
}
if (params->has_x_multifd_page_count &&
(params->x_multifd_page_count < 1 ||
params->x_multifd_page_count > 10000)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"multifd_page_count",
"is invalid, it should be in the range of 1 to 10000");
return false;
}
return true;
}
@ -855,6 +927,12 @@ static void migrate_params_apply(MigrateSetParameters *params)
if (params->has_block_incremental) {
s->parameters.block_incremental = params->block_incremental;
}
if (params->has_x_multifd_channels) {
s->parameters.x_multifd_channels = params->x_multifd_channels;
}
if (params->has_x_multifd_page_count) {
s->parameters.x_multifd_page_count = params->x_multifd_page_count;
}
}
void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@ -968,6 +1046,8 @@ static void migrate_fd_cleanup(void *opaque)
s->cleanup_bh = NULL;
if (s->to_dst_file) {
Error *local_err = NULL;
trace_migrate_fd_cleanup();
qemu_mutex_unlock_iothread();
if (s->migration_thread_running) {
@ -976,6 +1056,9 @@ static void migrate_fd_cleanup(void *opaque)
}
qemu_mutex_lock_iothread();
if (multifd_save_cleanup(&local_err) != 0) {
error_report_err(local_err);
}
qemu_fclose(s->to_dst_file);
s->to_dst_file = NULL;
}
@ -1361,6 +1444,11 @@ bool migrate_postcopy_ram(void)
return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
}
bool migrate_postcopy(void)
{
return migrate_postcopy_ram();
}
bool migrate_auto_converge(void)
{
MigrationState *s;
@ -1424,6 +1512,33 @@ bool migrate_use_events(void)
return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
}
bool migrate_use_multifd(void)
{
MigrationState *s;
s = migrate_get_current();
return s->enabled_capabilities[MIGRATION_CAPABILITY_X_MULTIFD];
}
int migrate_multifd_channels(void)
{
MigrationState *s;
s = migrate_get_current();
return s->parameters.x_multifd_channels;
}
int migrate_multifd_page_count(void)
{
MigrationState *s;
s = migrate_get_current();
return s->parameters.x_multifd_page_count;
}
int migrate_use_xbzrle(void)
{
MigrationState *s;
@ -1717,9 +1832,11 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
* need to tell the destination to throw any pages it's already received
* that are dirty
*/
if (ram_postcopy_send_discard_bitmap(ms)) {
error_report("postcopy send discard bitmap failed");
goto fail;
if (migrate_postcopy_ram()) {
if (ram_postcopy_send_discard_bitmap(ms)) {
error_report("postcopy send discard bitmap failed");
goto fail;
}
}
/*
@ -1728,8 +1845,10 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
* wrap their state up here
*/
qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
/* Ping just for debugging, helps line traces up */
qemu_savevm_send_ping(ms->to_dst_file, 2);
if (migrate_postcopy_ram()) {
/* Ping just for debugging, helps line traces up */
qemu_savevm_send_ping(ms->to_dst_file, 2);
}
/*
* While loading the device state we may trigger page transfer
@ -1754,7 +1873,9 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
qemu_savevm_send_postcopy_listen(fb);
qemu_savevm_state_complete_precopy(fb, false, false);
qemu_savevm_send_ping(fb, 3);
if (migrate_postcopy_ram()) {
qemu_savevm_send_ping(fb, 3);
}
qemu_savevm_send_postcopy_run(fb);
@ -1789,11 +1910,13 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
qemu_mutex_unlock_iothread();
/*
* Although this ping is just for debug, it could potentially be
* used for getting a better measurement of downtime at the source.
*/
qemu_savevm_send_ping(ms->to_dst_file, 4);
if (migrate_postcopy_ram()) {
/*
* Although this ping is just for debug, it could potentially be
* used for getting a better measurement of downtime at the source.
*/
qemu_savevm_send_ping(ms->to_dst_file, 4);
}
if (migrate_release_ram()) {
ram_postcopy_migrated_memory_release(ms);
@ -1971,7 +2094,7 @@ static void *migration_thread(void *opaque)
qemu_savevm_send_ping(s->to_dst_file, 1);
}
if (migrate_postcopy_ram()) {
if (migrate_postcopy()) {
/*
* Tell the destination that we *might* want to do postcopy later;
* if the other end can't do postcopy it should fail now, nice and
@ -2004,7 +2127,7 @@ static void *migration_thread(void *opaque)
if (pending_size && pending_size >= threshold_size) {
/* Still a significant amount to transfer */
if (migrate_postcopy_ram() &&
if (migrate_postcopy() &&
s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE &&
pend_nonpost <= threshold_size &&
atomic_read(&s->start_postcopy)) {
@ -2138,6 +2261,12 @@ void migrate_fd_connect(MigrationState *s)
}
}
if (multifd_save_setup() != 0) {
migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_FAILED);
migrate_fd_cleanup(s);
return;
}
qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
QEMU_THREAD_JOINABLE);
s->migration_thread_running = true;
@ -2189,6 +2318,12 @@ static Property migration_properties[] = {
DEFINE_PROP_INT64("x-checkpoint-delay", MigrationState,
parameters.x_checkpoint_delay,
DEFAULT_MIGRATE_X_CHECKPOINT_DELAY),
DEFINE_PROP_INT64("x-multifd-channels", MigrationState,
parameters.x_multifd_channels,
DEFAULT_MIGRATE_MULTIFD_CHANNELS),
DEFINE_PROP_INT64("x-multifd-page-count", MigrationState,
parameters.x_multifd_page_count,
DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT),
/* Migration capabilities */
DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
@ -2202,6 +2337,7 @@ static Property migration_properties[] = {
DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM),
DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_X_MULTIFD),
DEFINE_PROP_END_OF_LIST(),
};
@ -2245,6 +2381,8 @@ static void migration_instance_init(Object *obj)
params->has_downtime_limit = true;
params->has_x_checkpoint_delay = true;
params->has_block_incremental = true;
params->has_x_multifd_channels = true;
params->has_x_multifd_page_count = true;
}
/*

View file

@ -20,6 +20,7 @@
#include "exec/cpu-common.h"
#include "qemu/coroutine_int.h"
#include "hw/qdev.h"
#include "io/channel.h"
/* State for the incoming migration */
struct MigrationIncomingState {
@ -152,6 +153,9 @@ struct MigrationState
void migrate_set_state(int *state, int old_state, int new_state);
void migration_fd_process_incoming(QEMUFile *f);
void migration_ioc_process_incoming(QIOChannel *ioc);
bool migration_has_all_channels(void);
uint64_t migrate_max_downtime(void);
@ -165,11 +169,16 @@ bool migration_is_blocked(Error **errp);
bool migration_in_postcopy(void);
MigrationState *migrate_get_current(void);
bool migrate_postcopy(void);
bool migrate_release_ram(void);
bool migrate_postcopy_ram(void);
bool migrate_zero_blocks(void);
bool migrate_auto_converge(void);
bool migrate_use_multifd(void);
int migrate_multifd_channels(void);
int migrate_multifd_page_count(void);
int migrate_use_xbzrle(void);
int64_t migrate_xbzrle_cache_size(void);

View file

@ -61,15 +61,66 @@ struct PostcopyDiscardState {
#include <sys/eventfd.h>
#include <linux/userfaultfd.h>
static bool ufd_version_check(int ufd)
{
struct uffdio_api api_struct;
uint64_t ioctl_mask;
/**
* receive_ufd_features: check userfault fd features, to request only supported
* features in the future.
*
* Returns: true on success
*
* __NR_userfaultfd - should be checked before
* @features: out parameter will contain uffdio_api.features provided by kernel
* in case of success
*/
static bool receive_ufd_features(uint64_t *features)
{
struct uffdio_api api_struct = {0};
int ufd;
bool ret = true;
/* if we are here __NR_userfaultfd should exists */
ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
if (ufd == -1) {
error_report("%s: syscall __NR_userfaultfd failed: %s", __func__,
strerror(errno));
return false;
}
/* ask features */
api_struct.api = UFFD_API;
api_struct.features = 0;
if (ioctl(ufd, UFFDIO_API, &api_struct)) {
error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s",
error_report("%s: UFFDIO_API failed: %s", __func__,
strerror(errno));
ret = false;
goto release_ufd;
}
*features = api_struct.features;
release_ufd:
close(ufd);
return ret;
}
/**
* request_ufd_features: this function should be called only once on a newly
* opened ufd, subsequent calls will lead to error.
*
* Returns: true on succes
*
* @ufd: fd obtained from userfaultfd syscall
* @features: bit mask see UFFD_API_FEATURES
*/
static bool request_ufd_features(int ufd, uint64_t features)
{
struct uffdio_api api_struct = {0};
uint64_t ioctl_mask;
api_struct.api = UFFD_API;
api_struct.features = features;
if (ioctl(ufd, UFFDIO_API, &api_struct)) {
error_report("%s failed: UFFDIO_API failed: %s", __func__,
strerror(errno));
return false;
}
@ -82,11 +133,42 @@ static bool ufd_version_check(int ufd)
return false;
}
return true;
}
static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
{
uint64_t asked_features = 0;
static uint64_t supported_features;
/*
* it's not possible to
* request UFFD_API twice per one fd
* userfault fd features is persistent
*/
if (!supported_features) {
if (!receive_ufd_features(&supported_features)) {
error_report("%s failed", __func__);
return false;
}
}
/*
* request features, even if asked_features is 0, due to
* kernel expects UFFD_API before UFFDIO_REGISTER, per
* userfault file descriptor
*/
if (!request_ufd_features(ufd, asked_features)) {
error_report("%s failed: features %" PRIu64, __func__,
asked_features);
return false;
}
if (getpagesize() != ram_pagesize_summary()) {
bool have_hp = false;
/* We've got a huge page */
#ifdef UFFD_FEATURE_MISSING_HUGETLBFS
have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS;
have_hp = supported_features & UFFD_FEATURE_MISSING_HUGETLBFS;
#endif
if (!have_hp) {
error_report("Userfault on this host does not support huge pages");
@ -124,7 +206,7 @@ static int test_ramblock_postcopiable(const char *block_name, void *host_addr,
* normally fine since if the postcopy succeeds it gets turned back on at the
* end.
*/
bool postcopy_ram_supported_by_host(void)
bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
{
long pagesize = getpagesize();
int ufd = -1;
@ -147,7 +229,7 @@ bool postcopy_ram_supported_by_host(void)
}
/* Version and features check */
if (!ufd_version_check(ufd)) {
if (!ufd_check_and_apply(ufd, mis)) {
goto out;
}
@ -523,7 +605,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
* Although the host check already tested the API, we need to
* do the check again as an ABI handshake on the new fd.
*/
if (!ufd_version_check(mis->userfault_fd)) {
if (!ufd_check_and_apply(mis->userfault_fd, mis)) {
return -1;
}
@ -661,7 +743,7 @@ void *postcopy_get_tmp_page(MigrationIncomingState *mis)
#else
/* No target OS support, stubs just fail */
bool postcopy_ram_supported_by_host(void)
bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
{
error_report("%s: No OS support", __func__);
return false;

View file

@ -14,7 +14,7 @@
#define QEMU_POSTCOPY_RAM_H
/* Return true if the host supports everything we need to do postcopy-ram */
bool postcopy_ram_supported_by_host(void);
bool postcopy_ram_supported_by_host(MigrationIncomingState *mis);
/*
* Make all of RAM sensitive to accesses to areas that haven't yet been written

View file

@ -356,6 +356,208 @@ static void compress_threads_save_setup(void)
}
}
/* Multiple fd's */
struct MultiFDSendParams {
uint8_t id;
char *name;
QemuThread thread;
QemuSemaphore sem;
QemuMutex mutex;
bool quit;
};
typedef struct MultiFDSendParams MultiFDSendParams;
struct {
MultiFDSendParams *params;
/* number of created threads */
int count;
} *multifd_send_state;
static void terminate_multifd_send_threads(Error *errp)
{
int i;
for (i = 0; i < multifd_send_state->count; i++) {
MultiFDSendParams *p = &multifd_send_state->params[i];
qemu_mutex_lock(&p->mutex);
p->quit = true;
qemu_sem_post(&p->sem);
qemu_mutex_unlock(&p->mutex);
}
}
int multifd_save_cleanup(Error **errp)
{
int i;
int ret = 0;
if (!migrate_use_multifd()) {
return 0;
}
terminate_multifd_send_threads(NULL);
for (i = 0; i < multifd_send_state->count; i++) {
MultiFDSendParams *p = &multifd_send_state->params[i];
qemu_thread_join(&p->thread);
qemu_mutex_destroy(&p->mutex);
qemu_sem_destroy(&p->sem);
g_free(p->name);
p->name = NULL;
}
g_free(multifd_send_state->params);
multifd_send_state->params = NULL;
g_free(multifd_send_state);
multifd_send_state = NULL;
return ret;
}
static void *multifd_send_thread(void *opaque)
{
MultiFDSendParams *p = opaque;
while (true) {
qemu_mutex_lock(&p->mutex);
if (p->quit) {
qemu_mutex_unlock(&p->mutex);
break;
}
qemu_mutex_unlock(&p->mutex);
qemu_sem_wait(&p->sem);
}
return NULL;
}
int multifd_save_setup(void)
{
int thread_count;
uint8_t i;
if (!migrate_use_multifd()) {
return 0;
}
thread_count = migrate_multifd_channels();
multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
multifd_send_state->count = 0;
for (i = 0; i < thread_count; i++) {
MultiFDSendParams *p = &multifd_send_state->params[i];
qemu_mutex_init(&p->mutex);
qemu_sem_init(&p->sem, 0);
p->quit = false;
p->id = i;
p->name = g_strdup_printf("multifdsend_%d", i);
qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
QEMU_THREAD_JOINABLE);
multifd_send_state->count++;
}
return 0;
}
struct MultiFDRecvParams {
uint8_t id;
char *name;
QemuThread thread;
QemuSemaphore sem;
QemuMutex mutex;
bool quit;
};
typedef struct MultiFDRecvParams MultiFDRecvParams;
struct {
MultiFDRecvParams *params;
/* number of created threads */
int count;
} *multifd_recv_state;
static void terminate_multifd_recv_threads(Error *errp)
{
int i;
for (i = 0; i < multifd_recv_state->count; i++) {
MultiFDRecvParams *p = &multifd_recv_state->params[i];
qemu_mutex_lock(&p->mutex);
p->quit = true;
qemu_sem_post(&p->sem);
qemu_mutex_unlock(&p->mutex);
}
}
int multifd_load_cleanup(Error **errp)
{
int i;
int ret = 0;
if (!migrate_use_multifd()) {
return 0;
}
terminate_multifd_recv_threads(NULL);
for (i = 0; i < multifd_recv_state->count; i++) {
MultiFDRecvParams *p = &multifd_recv_state->params[i];
qemu_thread_join(&p->thread);
qemu_mutex_destroy(&p->mutex);
qemu_sem_destroy(&p->sem);
g_free(p->name);
p->name = NULL;
}
g_free(multifd_recv_state->params);
multifd_recv_state->params = NULL;
g_free(multifd_recv_state);
multifd_recv_state = NULL;
return ret;
}
static void *multifd_recv_thread(void *opaque)
{
MultiFDRecvParams *p = opaque;
while (true) {
qemu_mutex_lock(&p->mutex);
if (p->quit) {
qemu_mutex_unlock(&p->mutex);
break;
}
qemu_mutex_unlock(&p->mutex);
qemu_sem_wait(&p->sem);
}
return NULL;
}
int multifd_load_setup(void)
{
int thread_count;
uint8_t i;
if (!migrate_use_multifd()) {
return 0;
}
thread_count = migrate_multifd_channels();
multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
multifd_recv_state->count = 0;
for (i = 0; i < thread_count; i++) {
MultiFDRecvParams *p = &multifd_recv_state->params[i];
qemu_mutex_init(&p->mutex);
qemu_sem_init(&p->sem, 0);
p->quit = false;
p->id = i;
p->name = g_strdup_printf("multifdrecv_%d", i);
qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
QEMU_THREAD_JOINABLE);
multifd_recv_state->count++;
}
return 0;
}
/**
* save_page_header: write page header to wire
*
@ -2074,8 +2276,12 @@ static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
}
/* We can do postcopy, and all the data is postcopiable */
*postcopiable_pending += remaining_size;
if (migrate_postcopy_ram()) {
/* We can do postcopy, and all the data is postcopiable */
*postcopiable_pending += remaining_size;
} else {
*non_postcopiable_pending += remaining_size;
}
}
static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
@ -2647,11 +2853,17 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
return ret;
}
static bool ram_has_postcopy(void *opaque)
{
return migrate_postcopy_ram();
}
static SaveVMHandlers savevm_ram_handlers = {
.save_setup = ram_save_setup,
.save_live_iterate = ram_save_iterate,
.save_live_complete_postcopy = ram_save_complete,
.save_live_complete_precopy = ram_save_complete,
.has_postcopy = ram_has_postcopy,
.save_live_pending = ram_save_pending,
.load_state = ram_load,
.save_cleanup = ram_save_cleanup,

View file

@ -39,6 +39,11 @@ int64_t xbzrle_cache_resize(int64_t new_size);
uint64_t ram_bytes_remaining(void);
uint64_t ram_bytes_total(void);
int multifd_save_setup(void);
int multifd_save_cleanup(Error **errp);
int multifd_load_setup(void);
int multifd_load_cleanup(Error **errp);
uint64_t ram_pagesize_summary(void);
int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len);
void acct_update_position(QEMUFile *f, size_t size, bool zero);

View file

@ -89,7 +89,7 @@ static struct mig_cmd_args {
[MIG_CMD_INVALID] = { .len = -1, .name = "INVALID" },
[MIG_CMD_OPEN_RETURN_PATH] = { .len = 0, .name = "OPEN_RETURN_PATH" },
[MIG_CMD_PING] = { .len = sizeof(uint32_t), .name = "PING" },
[MIG_CMD_POSTCOPY_ADVISE] = { .len = 16, .name = "POSTCOPY_ADVISE" },
[MIG_CMD_POSTCOPY_ADVISE] = { .len = -1, .name = "POSTCOPY_ADVISE" },
[MIG_CMD_POSTCOPY_LISTEN] = { .len = 0, .name = "POSTCOPY_LISTEN" },
[MIG_CMD_POSTCOPY_RUN] = { .len = 0, .name = "POSTCOPY_RUN" },
[MIG_CMD_POSTCOPY_RAM_DISCARD] = {
@ -98,6 +98,23 @@ static struct mig_cmd_args {
[MIG_CMD_MAX] = { .len = -1, .name = "MAX" },
};
/* Note for MIG_CMD_POSTCOPY_ADVISE:
* The format of arguments is depending on postcopy mode:
* - postcopy RAM only
* uint64_t host page size
* uint64_t taget page size
*
* - postcopy RAM and postcopy dirty bitmaps
* format is the same as for postcopy RAM only
*
* - postcopy dirty bitmaps only
* Nothing. Command length field is 0.
*
* Be careful: adding a new postcopy entity with some other parameters should
* not break format self-description ability. Good way is to introduce some
* generic extendable format with an exception for two old entities.
*/
static int announce_self_create(uint8_t *buf,
uint8_t *mac_addr)
{
@ -861,12 +878,17 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
/* Send prior to any postcopy transfer */
void qemu_savevm_send_postcopy_advise(QEMUFile *f)
{
uint64_t tmp[2];
tmp[0] = cpu_to_be64(ram_pagesize_summary());
tmp[1] = cpu_to_be64(qemu_target_page_size());
if (migrate_postcopy_ram()) {
uint64_t tmp[2];
tmp[0] = cpu_to_be64(ram_pagesize_summary());
tmp[1] = cpu_to_be64(qemu_target_page_size());
trace_qemu_savevm_send_postcopy_advise();
qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 16, (uint8_t *)tmp);
trace_qemu_savevm_send_postcopy_advise();
qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE,
16, (uint8_t *)tmp);
} else {
qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 0, NULL);
}
}
/* Sent prior to starting the destination running in postcopy, discard pages
@ -1008,7 +1030,8 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
* call that's already run, it might get confused if we call
* iterate afterwards.
*/
if (postcopy && !se->ops->save_live_complete_postcopy) {
if (postcopy &&
!(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) {
continue;
}
if (qemu_file_rate_limit(f)) {
@ -1097,7 +1120,8 @@ int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops ||
(in_postcopy && se->ops->save_live_complete_postcopy) ||
(in_postcopy && se->ops->has_postcopy &&
se->ops->has_postcopy(se->opaque)) ||
(in_postcopy && !iterable_only) ||
!se->ops->save_live_complete_precopy) {
continue;
@ -1352,7 +1376,11 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
return -1;
}
if (!postcopy_ram_supported_by_host()) {
if (!migrate_postcopy_ram()) {
return 0;
}
if (!postcopy_ram_supported_by_host(mis)) {
postcopy_state_set(POSTCOPY_INCOMING_NONE);
return -1;
}
@ -1562,7 +1590,9 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
* A rare case, we entered listen without having to do any discards,
* so do the setup that's normally done at the time of the 1st discard.
*/
postcopy_ram_prepare_discard(mis);
if (migrate_postcopy_ram()) {
postcopy_ram_prepare_discard(mis);
}
}
/*
@ -1570,8 +1600,10 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
* However, at this point the CPU shouldn't be running, and the IO
* shouldn't be doing anything yet so don't actually expect requests
*/
if (postcopy_ram_enable_notify(mis)) {
return -1;
if (migrate_postcopy_ram()) {
if (postcopy_ram_enable_notify(mis)) {
return -1;
}
}
if (mis->have_listen_thread) {

View file

@ -152,9 +152,13 @@ static gboolean socket_accept_incoming_migration(QIOChannel *ioc,
object_unref(OBJECT(sioc));
out:
/* Close listening socket as its no longer needed */
qio_channel_close(ioc, NULL);
return FALSE; /* unregister */
if (migration_has_all_channels()) {
/* Close listening socket as its no longer needed */
qio_channel_close(ioc, NULL);
return G_SOURCE_REMOVE;
} else {
return G_SOURCE_CONTINUE;
}
}

View file

@ -341,12 +341,14 @@
# @return-path: If enabled, migration will use the return path even
# for precopy. (since 2.10)
#
# @x-multifd: Use more than one fd for migration (since 2.11)
#
# Since: 1.2
##
{ 'enum': 'MigrationCapability',
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
'block', 'return-path' ] }
'block', 'return-path', 'x-multifd' ] }
##
# @MigrationCapabilityStatus:
@ -464,13 +466,22 @@
# migrated and the destination must already have access to the
# same backing chain as was used on the source. (since 2.10)
#
# @x-multifd-channels: Number of channels used to migrate data in
# parallel. This is the same number that the
# number of sockets used for migration. The
# default value is 2 (since 2.11)
#
# @x-multifd-page-count: Number of pages sent together to a thread
# The default value is 16 (since 2.11)
#
# Since: 2.4
##
{ 'enum': 'MigrationParameter',
'data': ['compress-level', 'compress-threads', 'decompress-threads',
'cpu-throttle-initial', 'cpu-throttle-increment',
'tls-creds', 'tls-hostname', 'max-bandwidth',
'downtime-limit', 'x-checkpoint-delay', 'block-incremental' ] }
'downtime-limit', 'x-checkpoint-delay', 'block-incremental',
'x-multifd-channels', 'x-multifd-page-count' ] }
##
# @MigrateSetParameters:
@ -526,6 +537,14 @@
# migrated and the destination must already have access to the
# same backing chain as was used on the source. (since 2.10)
#
# @x-multifd-channels: Number of channels used to migrate data in
# parallel. This is the same number that the
# number of sockets used for migration. The
# default value is 2 (since 2.11)
#
# @x-multifd-page-count: Number of pages sent together to a thread
# The default value is 16 (since 2.11)
#
# Since: 2.4
##
# TODO either fuse back into MigrationParameters, or make
@ -541,7 +560,9 @@
'*max-bandwidth': 'int',
'*downtime-limit': 'int',
'*x-checkpoint-delay': 'int',
'*block-incremental': 'bool' } }
'*block-incremental': 'bool',
'*x-multifd-channels': 'int',
'*x-multifd-page-count': 'int' } }
##
# @migrate-set-parameters:
@ -612,6 +633,14 @@
# migrated and the destination must already have access to the
# same backing chain as was used on the source. (since 2.10)
#
# @x-multifd-channels: Number of channels used to migrate data in
# parallel. This is the same number that the
# number of sockets used for migration.
# The default value is 2 (since 2.11)
#
# @x-multifd-page-count: Number of pages sent together to a thread
# The default value is 16 (since 2.11)
#
# Since: 2.4
##
{ 'struct': 'MigrationParameters',
@ -625,7 +654,9 @@
'*max-bandwidth': 'int',
'*downtime-limit': 'int',
'*x-checkpoint-delay': 'int',
'*block-incremental': 'bool' } }
'*block-incremental': 'bool' ,
'*x-multifd-channels': 'int',
'*x-multifd-page-count': 'int' } }
##
# @query-migrate-parameters:

View file

@ -355,3 +355,50 @@ int slow_bitmap_intersects(const unsigned long *bitmap1,
}
return 0;
}
long slow_bitmap_count_one(const unsigned long *bitmap, long nbits)
{
long k, lim = nbits / BITS_PER_LONG, result = 0;
for (k = 0; k < lim; k++) {
result += ctpopl(bitmap[k]);
}
if (nbits % BITS_PER_LONG) {
result += ctpopl(bitmap[k] & BITMAP_LAST_WORD_MASK(nbits));
}
return result;
}
static void bitmap_to_from_le(unsigned long *dst,
const unsigned long *src, long nbits)
{
long len = BITS_TO_LONGS(nbits);
#ifdef HOST_WORDS_BIGENDIAN
long index;
for (index = 0; index < len; index++) {
# if HOST_LONG_BITS == 64
dst[index] = bswap64(src[index]);
# else
dst[index] = bswap32(src[index]);
# endif
}
#else
memcpy(dst, src, len * sizeof(unsigned long));
#endif
}
void bitmap_from_le(unsigned long *dst, const unsigned long *src,
long nbits)
{
bitmap_to_from_le(dst, src, nbits);
}
void bitmap_to_le(unsigned long *dst, const unsigned long *src,
long nbits)
{
bitmap_to_from_le(dst, src, nbits);
}

View file

@ -14,15 +14,13 @@
#include "qemu/osdep.h"
#include "qemu/bitops.h"
#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
/*
* Find the next set bit in a memory region.
*/
unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
const unsigned long *p = addr + BITOP_WORD(offset);
const unsigned long *p = addr + BIT_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG-1);
unsigned long tmp;
@ -87,7 +85,7 @@ found_middle:
unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
const unsigned long *p = addr + BITOP_WORD(offset);
const unsigned long *p = addr + BIT_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG-1);
unsigned long tmp;