From 4f0fae7f2bbd09f7a70f678e8db1763d12e5d896 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Mon, 24 Jul 2017 12:42:02 +0200 Subject: [PATCH 01/18] migration: Create migration_ioc_process_incoming() We pass the ioc instead of the fd. This will allow us to have more than one channel open. We also make sure that we set the from_src_file sooner, so we don't need to pass it as a parameter. Signed-off-by: Juan Quintela Reviewed-by: Daniel P. Berrange -- Do not assing mis->from_src_file (peterxu) --- migration/channel.c | 3 +-- migration/migration.c | 22 ++++++++++++++++++---- migration/migration.h | 2 ++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/migration/channel.c b/migration/channel.c index 3b7252f5a2..edceebdb7b 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -36,8 +36,7 @@ void migration_channel_process_incoming(QIOChannel *ioc) error_report_err(local_err); } } else { - QEMUFile *f = qemu_fopen_channel_input(ioc); - migration_fd_process_incoming(f); + migration_ioc_process_incoming(ioc); } } diff --git a/migration/migration.c b/migration/migration.c index 959e8ec88e..2d4c56e612 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -306,17 +306,16 @@ static void process_incoming_migration_bh(void *opaque) static void process_incoming_migration_co(void *opaque) { - QEMUFile *f = opaque; MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyState ps; int ret; - mis->from_src_file = f; + assert(mis->from_src_file); mis->largest_page_size = qemu_ram_pagesize_largest(); postcopy_state_set(POSTCOPY_INCOMING_NONE); migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_ACTIVE); - ret = qemu_loadvm_state(f); + ret = qemu_loadvm_state(mis->from_src_file); ps = postcopy_state_get(); trace_process_incoming_migration_co_end(ret, ps); @@ -364,12 +363,27 @@ static void process_incoming_migration_co(void *opaque) void migration_fd_process_incoming(QEMUFile *f) { - Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, f); + Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); + MigrationIncomingState *mis = migration_incoming_get_current(); + if (!mis->from_src_file) { + mis->from_src_file = f; + } qemu_file_set_blocking(f, false); qemu_coroutine_enter(co); } +void migration_ioc_process_incoming(QIOChannel *ioc) +{ + MigrationIncomingState *mis = migration_incoming_get_current(); + + if (!mis->from_src_file) { + QEMUFile *f = qemu_fopen_channel_input(ioc); + migration_fd_process_incoming(f); + } + /* We still only have a single channel. Nothing to do here yet */ +} + /* * Send a 'SHUT' message on the return channel with the given value * to indicate that we've finished with the RP. Non-0 value indicates diff --git a/migration/migration.h b/migration/migration.h index 148c9facbc..99c398d484 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -20,6 +20,7 @@ #include "exec/cpu-common.h" #include "qemu/coroutine_int.h" #include "hw/qdev.h" +#include "io/channel.h" /* State for the incoming migration */ struct MigrationIncomingState { @@ -152,6 +153,7 @@ struct MigrationState void migrate_set_state(int *state, int old_state, int new_state); void migration_fd_process_incoming(QEMUFile *f); +void migration_ioc_process_incoming(QIOChannel *ioc); uint64_t migrate_max_downtime(void); From 2a543bfdfadb432d82bab256a9a72b00c331b484 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Mon, 24 Jul 2017 12:51:59 +0200 Subject: [PATCH 02/18] migration: Teach it about G_SOURCE_REMOVE As this is defined on glib 2.32, add compatibility macros for older glibs. Signed-off-by: Juan Quintela Reviewed-by: Daniel P. Berrange Reviewed-by: Peter Xu --- include/glib-compat.h | 2 ++ migration/exec.c | 2 +- migration/fd.c | 2 +- migration/socket.c | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/glib-compat.h b/include/glib-compat.h index fcffcd3f07..e15aca2d40 100644 --- a/include/glib-compat.h +++ b/include/glib-compat.h @@ -223,6 +223,8 @@ static inline gboolean g_hash_table_contains(GHashTable *hash_table, { return g_hash_table_lookup_extended(hash_table, key, NULL, NULL); } +#define G_SOURCE_CONTINUE TRUE +#define G_SOURCE_REMOVE FALSE #endif #ifndef g_assert_true diff --git a/migration/exec.c b/migration/exec.c index 08b599e0e2..f3be1baf2e 100644 --- a/migration/exec.c +++ b/migration/exec.c @@ -49,7 +49,7 @@ static gboolean exec_accept_incoming_migration(QIOChannel *ioc, { migration_channel_process_incoming(ioc); object_unref(OBJECT(ioc)); - return FALSE; /* unregister */ + return G_SOURCE_REMOVE; } void exec_start_incoming_migration(const char *command, Error **errp) diff --git a/migration/fd.c b/migration/fd.c index 30f5258a6a..30de4b9847 100644 --- a/migration/fd.c +++ b/migration/fd.c @@ -49,7 +49,7 @@ static gboolean fd_accept_incoming_migration(QIOChannel *ioc, { migration_channel_process_incoming(ioc); object_unref(OBJECT(ioc)); - return FALSE; /* unregister */ + return G_SOURCE_REMOVE; } void fd_start_incoming_migration(const char *infd, Error **errp) diff --git a/migration/socket.c b/migration/socket.c index 757d3821a1..b02d37d7a3 100644 --- a/migration/socket.c +++ b/migration/socket.c @@ -154,7 +154,7 @@ static gboolean socket_accept_incoming_migration(QIOChannel *ioc, out: /* Close listening socket as its no longer needed */ qio_channel_close(ioc, NULL); - return FALSE; /* unregister */ + return G_SOURCE_REMOVE; } From 8e1a1931ca9bcb09b9db1180c9c08ecd5cc6b4de Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Mon, 24 Jul 2017 12:55:26 +0200 Subject: [PATCH 03/18] migration: Add comments to channel functions Signed-off-by: Juan Quintela Reviewed-by: Daniel P. Berrange --- migration/channel.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/migration/channel.c b/migration/channel.c index edceebdb7b..70ec7ea3b7 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -19,6 +19,14 @@ #include "qapi/error.h" #include "io/channel-tls.h" +/** + * @migration_channel_process_incoming - Create new incoming migration channel + * + * Notice that TLS is special. For it we listen in a listener socket, + * and then create a new client socket from the TLS library. + * + * @ioc: Channel to which we are connecting + */ void migration_channel_process_incoming(QIOChannel *ioc) { MigrationState *s = migrate_get_current(); @@ -41,6 +49,13 @@ void migration_channel_process_incoming(QIOChannel *ioc) } +/** + * @migration_channel_connect - Create new outgoing migration channel + * + * @s: Current migration state + * @ioc: Channel to which we are connecting + * @hostname: Where we want to connect + */ void migration_channel_connect(MigrationState *s, QIOChannel *ioc, const char *hostname) From 428d89084c709e568f9cd301c2f6416a54c53d6d Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Mon, 24 Jul 2017 13:06:25 +0200 Subject: [PATCH 04/18] migration: Create migration_has_all_channels This function allows us to decide when to close the listener socket. For now, we only need one connection. Signed-off-by: Juan Quintela Reviewed-by: Daniel P. Berrange --- migration/migration.c | 11 +++++++++++ migration/migration.h | 2 ++ migration/socket.c | 10 +++++++--- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 2d4c56e612..bac4a99277 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -384,6 +384,17 @@ void migration_ioc_process_incoming(QIOChannel *ioc) /* We still only have a single channel. Nothing to do here yet */ } +/** + * @migration_has_all_channels: We have received all channels that we need + * + * Returns true when we have got connections to all the channels that + * we need for migration. + */ +bool migration_has_all_channels(void) +{ + return true; +} + /* * Send a 'SHUT' message on the return channel with the given value * to indicate that we've finished with the RP. Non-0 value indicates diff --git a/migration/migration.h b/migration/migration.h index 99c398d484..1881e4a754 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -155,6 +155,8 @@ void migrate_set_state(int *state, int old_state, int new_state); void migration_fd_process_incoming(QEMUFile *f); void migration_ioc_process_incoming(QIOChannel *ioc); +bool migration_has_all_channels(void); + uint64_t migrate_max_downtime(void); void migrate_fd_error(MigrationState *s, const Error *error); diff --git a/migration/socket.c b/migration/socket.c index b02d37d7a3..dee869044a 100644 --- a/migration/socket.c +++ b/migration/socket.c @@ -152,9 +152,13 @@ static gboolean socket_accept_incoming_migration(QIOChannel *ioc, object_unref(OBJECT(sioc)); out: - /* Close listening socket as its no longer needed */ - qio_channel_close(ioc, NULL); - return G_SOURCE_REMOVE; + if (migration_has_all_channels()) { + /* Close listening socket as its no longer needed */ + qio_channel_close(ioc, NULL); + return G_SOURCE_REMOVE; + } else { + return G_SOURCE_CONTINUE; + } } From 30126bbf1f7fcad0bf4c65b01a21ff22a36a9759 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Thu, 14 Jan 2016 12:23:00 +0100 Subject: [PATCH 05/18] migration: Add multifd capability Signed-off-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Daniel P. Berrange -- Use new DEFINE_PROP --- migration/migration.c | 10 ++++++++++ migration/migration.h | 1 + qapi/migration.json | 4 +++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index bac4a99277..958b783bcf 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1449,6 +1449,15 @@ bool migrate_use_events(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; } +bool migrate_use_multifd(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_X_MULTIFD]; +} + int migrate_use_xbzrle(void) { MigrationState *s; @@ -2227,6 +2236,7 @@ static Property migration_properties[] = { DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), + DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_X_MULTIFD), DEFINE_PROP_END_OF_LIST(), }; diff --git a/migration/migration.h b/migration/migration.h index 1881e4a754..b7437f16ce 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -174,6 +174,7 @@ bool migrate_postcopy_ram(void); bool migrate_zero_blocks(void); bool migrate_auto_converge(void); +bool migrate_use_multifd(void); int migrate_use_xbzrle(void); int64_t migrate_xbzrle_cache_size(void); diff --git a/qapi/migration.json b/qapi/migration.json index ee2b3b8733..ec4a88a43a 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -341,12 +341,14 @@ # @return-path: If enabled, migration will use the return path even # for precopy. (since 2.10) # +# @x-multifd: Use more than one fd for migration (since 2.11) +# # Since: 1.2 ## { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram', - 'block', 'return-path' ] } + 'block', 'return-path', 'x-multifd' ] } ## # @MigrationCapabilityStatus: From 4075fb1ca4ed673ff93d09936da014c1d2c6d2ca Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 15 Jan 2016 08:56:17 +0100 Subject: [PATCH 06/18] migration: Create x-multifd-channels parameter Indicates the number of channels that we will create. By default we create 2 channels. Signed-off-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu -- Catch inconsistent defaults (eric). Improve comment stating that number of threads is the same than number of sockets Use new DEFIN_PROP_* Rename x-multifd-threads to x-multifd-threads --- hmp.c | 7 +++++++ migration/migration.c | 26 ++++++++++++++++++++++++++ migration/migration.h | 1 + qapi/migration.json | 24 +++++++++++++++++++++--- 4 files changed, 55 insertions(+), 3 deletions(-) diff --git a/hmp.c b/hmp.c index 0fb2bc7043..bebe19ebe4 100644 --- a/hmp.c +++ b/hmp.c @@ -336,6 +336,9 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) monitor_printf(mon, "%s: %s\n", MigrationParameter_str(MIGRATION_PARAMETER_BLOCK_INCREMENTAL), params->block_incremental ? "on" : "off"); + monitor_printf(mon, "%s: %" PRId64 "\n", + MigrationParameter_str(MIGRATION_PARAMETER_X_MULTIFD_CHANNELS), + params->x_multifd_channels); } qapi_free_MigrationParameters(params); @@ -1621,6 +1624,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) p->has_block_incremental = true; visit_type_bool(v, param, &p->block_incremental, &err); break; + case MIGRATION_PARAMETER_X_MULTIFD_CHANNELS: + p->has_x_multifd_channels = true; + visit_type_int(v, param, &p->x_multifd_channels, &err); + break; default: assert(0); } diff --git a/migration/migration.c b/migration/migration.c index 958b783bcf..7e79310d03 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -77,6 +77,7 @@ * Note: Please change this default value to 10000 when we support hybrid mode. */ #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200 +#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 static NotifierList migration_state_notifiers = NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); @@ -481,6 +482,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; params->has_block_incremental = true; params->block_incremental = s->parameters.block_incremental; + params->has_x_multifd_channels = true; + params->x_multifd_channels = s->parameters.x_multifd_channels; return params; } @@ -762,6 +765,13 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) "is invalid, it should be positive"); return false; } + if (params->has_x_multifd_channels && + (params->x_multifd_channels < 1 || params->x_multifd_channels > 255)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "multifd_channels", + "is invalid, it should be in the range of 1 to 255"); + return false; + } return true; } @@ -880,6 +890,9 @@ static void migrate_params_apply(MigrateSetParameters *params) if (params->has_block_incremental) { s->parameters.block_incremental = params->block_incremental; } + if (params->has_x_multifd_channels) { + s->parameters.x_multifd_channels = params->x_multifd_channels; + } } void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) @@ -1458,6 +1471,15 @@ bool migrate_use_multifd(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_X_MULTIFD]; } +int migrate_multifd_channels(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->parameters.x_multifd_channels; +} + int migrate_use_xbzrle(void) { MigrationState *s; @@ -2223,6 +2245,9 @@ static Property migration_properties[] = { DEFINE_PROP_INT64("x-checkpoint-delay", MigrationState, parameters.x_checkpoint_delay, DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), + DEFINE_PROP_INT64("x-multifd-channels", MigrationState, + parameters.x_multifd_channels, + DEFAULT_MIGRATE_MULTIFD_CHANNELS), /* Migration capabilities */ DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), @@ -2280,6 +2305,7 @@ static void migration_instance_init(Object *obj) params->has_downtime_limit = true; params->has_x_checkpoint_delay = true; params->has_block_incremental = true; + params->has_x_multifd_channels = true; } /* diff --git a/migration/migration.h b/migration/migration.h index b7437f16ce..3060cbc374 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -175,6 +175,7 @@ bool migrate_zero_blocks(void); bool migrate_auto_converge(void); bool migrate_use_multifd(void); +int migrate_multifd_channels(void); int migrate_use_xbzrle(void); int64_t migrate_xbzrle_cache_size(void); diff --git a/qapi/migration.json b/qapi/migration.json index ec4a88a43a..c766fb1e52 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -466,13 +466,19 @@ # migrated and the destination must already have access to the # same backing chain as was used on the source. (since 2.10) # +# @x-multifd-channels: Number of channels used to migrate data in +# parallel. This is the same number that the +# number of sockets used for migration. The +# default value is 2 (since 2.11) +# # Since: 2.4 ## { 'enum': 'MigrationParameter', 'data': ['compress-level', 'compress-threads', 'decompress-threads', 'cpu-throttle-initial', 'cpu-throttle-increment', 'tls-creds', 'tls-hostname', 'max-bandwidth', - 'downtime-limit', 'x-checkpoint-delay', 'block-incremental' ] } + 'downtime-limit', 'x-checkpoint-delay', 'block-incremental', + 'x-multifd-channels'] } ## # @MigrateSetParameters: @@ -528,6 +534,11 @@ # migrated and the destination must already have access to the # same backing chain as was used on the source. (since 2.10) # +# @x-multifd-channels: Number of channels used to migrate data in +# parallel. This is the same number that the +# number of sockets used for migration. The +# default value is 2 (since 2.11) +# # Since: 2.4 ## # TODO either fuse back into MigrationParameters, or make @@ -543,7 +554,8 @@ '*max-bandwidth': 'int', '*downtime-limit': 'int', '*x-checkpoint-delay': 'int', - '*block-incremental': 'bool' } } + '*block-incremental': 'bool', + '*x-multifd-channels': 'int' } } ## # @migrate-set-parameters: @@ -614,6 +626,11 @@ # migrated and the destination must already have access to the # same backing chain as was used on the source. (since 2.10) # +# @x-multifd-channels: Number of channels used to migrate data in +# parallel. This is the same number that the +# number of sockets used for migration. +# The default value is 2 (since 2.11) +# # Since: 2.4 ## { 'struct': 'MigrationParameters', @@ -627,7 +644,8 @@ '*max-bandwidth': 'int', '*downtime-limit': 'int', '*x-checkpoint-delay': 'int', - '*block-incremental': 'bool' } } + '*block-incremental': 'bool' , + '*x-multifd-channels': 'int' } } ## # @query-migrate-parameters: From 0fb86605eac50d488b1a8d4a9d6986defc3adca9 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Thu, 27 Apr 2017 10:48:25 +0200 Subject: [PATCH 07/18] migration: Create x-multifd-page-count parameter Indicates how many pages we are going to send in each batch to a multifd thread. Signed-off-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu -- Be consistent with defaults and documentation Use new DEFINE_PROP_* Rename x-multifd-group to x-multifd-page-count --- hmp.c | 7 +++++++ migration/migration.c | 27 +++++++++++++++++++++++++++ migration/migration.h | 1 + qapi/migration.json | 17 ++++++++++++++--- 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/hmp.c b/hmp.c index bebe19ebe4..ace729d03f 100644 --- a/hmp.c +++ b/hmp.c @@ -339,6 +339,9 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) monitor_printf(mon, "%s: %" PRId64 "\n", MigrationParameter_str(MIGRATION_PARAMETER_X_MULTIFD_CHANNELS), params->x_multifd_channels); + monitor_printf(mon, "%s: %" PRId64 "\n", + MigrationParameter_str(MIGRATION_PARAMETER_X_MULTIFD_PAGE_COUNT), + params->x_multifd_page_count); } qapi_free_MigrationParameters(params); @@ -1628,6 +1631,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) p->has_x_multifd_channels = true; visit_type_int(v, param, &p->x_multifd_channels, &err); break; + case MIGRATION_PARAMETER_X_MULTIFD_PAGE_COUNT: + p->has_x_multifd_page_count = true; + visit_type_int(v, param, &p->x_multifd_page_count, &err); + break; default: assert(0); } diff --git a/migration/migration.c b/migration/migration.c index 7e79310d03..494c6eb435 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -78,6 +78,7 @@ */ #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 +#define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16 static NotifierList migration_state_notifiers = NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); @@ -484,6 +485,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->block_incremental = s->parameters.block_incremental; params->has_x_multifd_channels = true; params->x_multifd_channels = s->parameters.x_multifd_channels; + params->has_x_multifd_page_count = true; + params->x_multifd_page_count = s->parameters.x_multifd_page_count; return params; } @@ -772,6 +775,14 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) "is invalid, it should be in the range of 1 to 255"); return false; } + if (params->has_x_multifd_page_count && + (params->x_multifd_page_count < 1 || + params->x_multifd_page_count > 10000)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "multifd_page_count", + "is invalid, it should be in the range of 1 to 10000"); + return false; + } return true; } @@ -893,6 +904,9 @@ static void migrate_params_apply(MigrateSetParameters *params) if (params->has_x_multifd_channels) { s->parameters.x_multifd_channels = params->x_multifd_channels; } + if (params->has_x_multifd_page_count) { + s->parameters.x_multifd_page_count = params->x_multifd_page_count; + } } void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) @@ -1480,6 +1494,15 @@ int migrate_multifd_channels(void) return s->parameters.x_multifd_channels; } +int migrate_multifd_page_count(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->parameters.x_multifd_page_count; +} + int migrate_use_xbzrle(void) { MigrationState *s; @@ -2248,6 +2271,9 @@ static Property migration_properties[] = { DEFINE_PROP_INT64("x-multifd-channels", MigrationState, parameters.x_multifd_channels, DEFAULT_MIGRATE_MULTIFD_CHANNELS), + DEFINE_PROP_INT64("x-multifd-page-count", MigrationState, + parameters.x_multifd_page_count, + DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT), /* Migration capabilities */ DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), @@ -2306,6 +2332,7 @@ static void migration_instance_init(Object *obj) params->has_x_checkpoint_delay = true; params->has_block_incremental = true; params->has_x_multifd_channels = true; + params->has_x_multifd_page_count = true; } /* diff --git a/migration/migration.h b/migration/migration.h index 3060cbc374..641290f51b 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -176,6 +176,7 @@ bool migrate_zero_blocks(void); bool migrate_auto_converge(void); bool migrate_use_multifd(void); int migrate_multifd_channels(void); +int migrate_multifd_page_count(void); int migrate_use_xbzrle(void); int64_t migrate_xbzrle_cache_size(void); diff --git a/qapi/migration.json b/qapi/migration.json index c766fb1e52..f8b365e3f5 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -471,6 +471,9 @@ # number of sockets used for migration. The # default value is 2 (since 2.11) # +# @x-multifd-page-count: Number of pages sent together to a thread +# The default value is 16 (since 2.11) +# # Since: 2.4 ## { 'enum': 'MigrationParameter', @@ -478,7 +481,7 @@ 'cpu-throttle-initial', 'cpu-throttle-increment', 'tls-creds', 'tls-hostname', 'max-bandwidth', 'downtime-limit', 'x-checkpoint-delay', 'block-incremental', - 'x-multifd-channels'] } + 'x-multifd-channels', 'x-multifd-page-count' ] } ## # @MigrateSetParameters: @@ -539,6 +542,9 @@ # number of sockets used for migration. The # default value is 2 (since 2.11) # +# @x-multifd-page-count: Number of pages sent together to a thread +# The default value is 16 (since 2.11) +# # Since: 2.4 ## # TODO either fuse back into MigrationParameters, or make @@ -555,7 +561,8 @@ '*downtime-limit': 'int', '*x-checkpoint-delay': 'int', '*block-incremental': 'bool', - '*x-multifd-channels': 'int' } } + '*x-multifd-channels': 'int', + '*x-multifd-page-count': 'int' } } ## # @migrate-set-parameters: @@ -631,6 +638,9 @@ # number of sockets used for migration. # The default value is 2 (since 2.11) # +# @x-multifd-page-count: Number of pages sent together to a thread +# The default value is 16 (since 2.11) +# # Since: 2.4 ## { 'struct': 'MigrationParameters', @@ -645,7 +655,8 @@ '*downtime-limit': 'int', '*x-checkpoint-delay': 'int', '*block-incremental': 'bool' , - '*x-multifd-channels': 'int' } } + '*x-multifd-channels': 'int', + '*x-multifd-page-count': 'int' } } ## # @query-migrate-parameters: From f986c3d2569c6aa5cd03a6b8bbb5371e4b608d3c Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Thu, 14 Jan 2016 16:52:55 +0100 Subject: [PATCH 08/18] migration: Create multifd migration threads Creation of the threads, nothing inside yet. Signed-off-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert -- Use pointers instead of long array names Move to use semaphores instead of conditions as paolo suggestion Put all the state inside one struct. Use a counter for the number of threads created. Needed during cancellation. Add error return to thread creation Add id field Rename functions to multifd_save/load_setup/cleanup Change recv parameters to a pointer to struct Change back to a struct Use Error * for _cleanup --- migration/migration.c | 26 ++++++ migration/ram.c | 202 ++++++++++++++++++++++++++++++++++++++++++ migration/ram.h | 5 ++ 3 files changed, 233 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index 494c6eb435..336da038f5 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -281,6 +281,10 @@ static void process_incoming_migration_bh(void *opaque) */ qemu_announce_self(); + if (multifd_load_cleanup(&local_err) != 0) { + error_report_err(local_err); + autostart = false; + } /* If global state section was not received or we are in running state, we need to obey autostart. Any other state is set with runstate_set. */ @@ -353,10 +357,15 @@ static void process_incoming_migration_co(void *opaque) } if (ret < 0) { + Error *local_err = NULL; + migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED); error_report("load of migration failed: %s", strerror(-ret)); qemu_fclose(mis->from_src_file); + if (multifd_load_cleanup(&local_err) != 0) { + error_report_err(local_err); + } exit(EXIT_FAILURE); } mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); @@ -368,6 +377,12 @@ void migration_fd_process_incoming(QEMUFile *f) Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); MigrationIncomingState *mis = migration_incoming_get_current(); + if (multifd_load_setup() != 0) { + /* We haven't been able to create multifd threads + nothing better to do */ + exit(EXIT_FAILURE); + } + if (!mis->from_src_file) { mis->from_src_file = f; } @@ -1020,6 +1035,8 @@ static void migrate_fd_cleanup(void *opaque) s->cleanup_bh = NULL; if (s->to_dst_file) { + Error *local_err = NULL; + trace_migrate_fd_cleanup(); qemu_mutex_unlock_iothread(); if (s->migration_thread_running) { @@ -1028,6 +1045,9 @@ static void migrate_fd_cleanup(void *opaque) } qemu_mutex_lock_iothread(); + if (multifd_save_cleanup(&local_err) != 0) { + error_report_err(local_err); + } qemu_fclose(s->to_dst_file); s->to_dst_file = NULL; } @@ -2217,6 +2237,12 @@ void migrate_fd_connect(MigrationState *s) } } + if (multifd_save_setup() != 0) { + migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, + MIGRATION_STATUS_FAILED); + migrate_fd_cleanup(s); + return; + } qemu_thread_create(&s->thread, "live_migration", migration_thread, s, QEMU_THREAD_JOINABLE); s->migration_thread_running = true; diff --git a/migration/ram.c b/migration/ram.c index e18b3e2d4f..bb369e72d4 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -356,6 +356,208 @@ static void compress_threads_save_setup(void) } } +/* Multiple fd's */ + +struct MultiFDSendParams { + uint8_t id; + char *name; + QemuThread thread; + QemuSemaphore sem; + QemuMutex mutex; + bool quit; +}; +typedef struct MultiFDSendParams MultiFDSendParams; + +struct { + MultiFDSendParams *params; + /* number of created threads */ + int count; +} *multifd_send_state; + +static void terminate_multifd_send_threads(Error *errp) +{ + int i; + + for (i = 0; i < multifd_send_state->count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + + qemu_mutex_lock(&p->mutex); + p->quit = true; + qemu_sem_post(&p->sem); + qemu_mutex_unlock(&p->mutex); + } +} + +int multifd_save_cleanup(Error **errp) +{ + int i; + int ret = 0; + + if (!migrate_use_multifd()) { + return 0; + } + terminate_multifd_send_threads(NULL); + for (i = 0; i < multifd_send_state->count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + + qemu_thread_join(&p->thread); + qemu_mutex_destroy(&p->mutex); + qemu_sem_destroy(&p->sem); + g_free(p->name); + p->name = NULL; + } + g_free(multifd_send_state->params); + multifd_send_state->params = NULL; + g_free(multifd_send_state); + multifd_send_state = NULL; + return ret; +} + +static void *multifd_send_thread(void *opaque) +{ + MultiFDSendParams *p = opaque; + + while (true) { + qemu_mutex_lock(&p->mutex); + if (p->quit) { + qemu_mutex_unlock(&p->mutex); + break; + } + qemu_mutex_unlock(&p->mutex); + qemu_sem_wait(&p->sem); + } + + return NULL; +} + +int multifd_save_setup(void) +{ + int thread_count; + uint8_t i; + + if (!migrate_use_multifd()) { + return 0; + } + thread_count = migrate_multifd_channels(); + multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); + multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); + multifd_send_state->count = 0; + for (i = 0; i < thread_count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + + qemu_mutex_init(&p->mutex); + qemu_sem_init(&p->sem, 0); + p->quit = false; + p->id = i; + p->name = g_strdup_printf("multifdsend_%d", i); + qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, + QEMU_THREAD_JOINABLE); + + multifd_send_state->count++; + } + return 0; +} + +struct MultiFDRecvParams { + uint8_t id; + char *name; + QemuThread thread; + QemuSemaphore sem; + QemuMutex mutex; + bool quit; +}; +typedef struct MultiFDRecvParams MultiFDRecvParams; + +struct { + MultiFDRecvParams *params; + /* number of created threads */ + int count; +} *multifd_recv_state; + +static void terminate_multifd_recv_threads(Error *errp) +{ + int i; + + for (i = 0; i < multifd_recv_state->count; i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + + qemu_mutex_lock(&p->mutex); + p->quit = true; + qemu_sem_post(&p->sem); + qemu_mutex_unlock(&p->mutex); + } +} + +int multifd_load_cleanup(Error **errp) +{ + int i; + int ret = 0; + + if (!migrate_use_multifd()) { + return 0; + } + terminate_multifd_recv_threads(NULL); + for (i = 0; i < multifd_recv_state->count; i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + + qemu_thread_join(&p->thread); + qemu_mutex_destroy(&p->mutex); + qemu_sem_destroy(&p->sem); + g_free(p->name); + p->name = NULL; + } + g_free(multifd_recv_state->params); + multifd_recv_state->params = NULL; + g_free(multifd_recv_state); + multifd_recv_state = NULL; + + return ret; +} + +static void *multifd_recv_thread(void *opaque) +{ + MultiFDRecvParams *p = opaque; + + while (true) { + qemu_mutex_lock(&p->mutex); + if (p->quit) { + qemu_mutex_unlock(&p->mutex); + break; + } + qemu_mutex_unlock(&p->mutex); + qemu_sem_wait(&p->sem); + } + + return NULL; +} + +int multifd_load_setup(void) +{ + int thread_count; + uint8_t i; + + if (!migrate_use_multifd()) { + return 0; + } + thread_count = migrate_multifd_channels(); + multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state)); + multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count); + multifd_recv_state->count = 0; + for (i = 0; i < thread_count; i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + + qemu_mutex_init(&p->mutex); + qemu_sem_init(&p->sem, 0); + p->quit = false; + p->id = i; + p->name = g_strdup_printf("multifdrecv_%d", i); + qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, + QEMU_THREAD_JOINABLE); + multifd_recv_state->count++; + } + return 0; +} + /** * save_page_header: write page header to wire * diff --git a/migration/ram.h b/migration/ram.h index c081fde86c..4a72d66503 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -39,6 +39,11 @@ int64_t xbzrle_cache_resize(int64_t new_size); uint64_t ram_bytes_remaining(void); uint64_t ram_bytes_total(void); +int multifd_save_setup(void); +int multifd_save_cleanup(Error **errp); +int multifd_load_setup(void); +int multifd_load_cleanup(Error **errp); + uint64_t ram_pagesize_summary(void); int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len); void acct_update_position(QEMUFile *f, size_t size, bool zero); From e595a01ab656192b11254b3dafcc83b0adb18470 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Mon, 17 Jul 2017 12:30:25 +0200 Subject: [PATCH 09/18] migration: Split migration_fd_process_incoming We need that on later patches. Signed-off-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Daniel P. Berrange --- migration/migration.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 336da038f5..067dd929c4 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -372,9 +372,8 @@ static void process_incoming_migration_co(void *opaque) qemu_bh_schedule(mis->bh); } -void migration_fd_process_incoming(QEMUFile *f) +static void migration_incoming_setup(QEMUFile *f) { - Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); MigrationIncomingState *mis = migration_incoming_get_current(); if (multifd_load_setup() != 0) { @@ -387,9 +386,20 @@ void migration_fd_process_incoming(QEMUFile *f) mis->from_src_file = f; } qemu_file_set_blocking(f, false); +} + +static void migration_incoming_process(void) +{ + Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); qemu_coroutine_enter(co); } +void migration_fd_process_incoming(QEMUFile *f) +{ + migration_incoming_setup(f); + migration_incoming_process(); +} + void migration_ioc_process_incoming(QIOChannel *ioc) { MigrationIncomingState *mis = migration_incoming_get_current(); From ab089e058eb443a9a11ade4d3fc57ced1947bfa3 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Aug 2017 16:31:58 +0800 Subject: [PATCH 10/18] bitmap: remove BITOP_WORD() We have BIT_WORD(). It's the same. Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- util/bitops.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/util/bitops.c b/util/bitops.c index b0c35dd5f1..f2364015c4 100644 --- a/util/bitops.c +++ b/util/bitops.c @@ -14,15 +14,13 @@ #include "qemu/osdep.h" #include "qemu/bitops.h" -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) - /* * Find the next set bit in a memory region. */ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - const unsigned long *p = addr + BITOP_WORD(offset); + const unsigned long *p = addr + BIT_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG-1); unsigned long tmp; @@ -87,7 +85,7 @@ found_middle: unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - const unsigned long *p = addr + BITOP_WORD(offset); + const unsigned long *p = addr + BIT_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG-1); unsigned long tmp; From fc7deeea26af3d08f45bad85b8bd3fc3d790a090 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Aug 2017 16:31:59 +0800 Subject: [PATCH 11/18] bitmap: introduce bitmap_count_one() Count how many bits set in the bitmap. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- include/qemu/bitmap.h | 10 ++++++++++ util/bitmap.c | 15 +++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h index c318da12d7..2718706edb 100644 --- a/include/qemu/bitmap.h +++ b/include/qemu/bitmap.h @@ -82,6 +82,7 @@ int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, long bits); int slow_bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, long bits); +long slow_bitmap_count_one(const unsigned long *bitmap, long nbits); static inline unsigned long *bitmap_try_new(long nbits) { @@ -216,6 +217,15 @@ static inline int bitmap_intersects(const unsigned long *src1, } } +static inline long bitmap_count_one(const unsigned long *bitmap, long nbits) +{ + if (small_nbits(nbits)) { + return ctpopl(*bitmap & BITMAP_LAST_WORD_MASK(nbits)); + } else { + return slow_bitmap_count_one(bitmap, nbits); + } +} + void bitmap_set(unsigned long *map, long i, long len); void bitmap_set_atomic(unsigned long *map, long i, long len); void bitmap_clear(unsigned long *map, long start, long nr); diff --git a/util/bitmap.c b/util/bitmap.c index efced9a7d8..90a42ff625 100644 --- a/util/bitmap.c +++ b/util/bitmap.c @@ -355,3 +355,18 @@ int slow_bitmap_intersects(const unsigned long *bitmap1, } return 0; } + +long slow_bitmap_count_one(const unsigned long *bitmap, long nbits) +{ + long k, lim = nbits / BITS_PER_LONG, result = 0; + + for (k = 0; k < lim; k++) { + result += ctpopl(bitmap[k]); + } + + if (nbits % BITS_PER_LONG) { + result += ctpopl(bitmap[k] & BITMAP_LAST_WORD_MASK(nbits)); + } + + return result; +} From d7788151a0807d5d2d410e3f8944d8c8a651f8d2 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Aug 2017 16:32:00 +0800 Subject: [PATCH 12/18] bitmap: provide to_le/from_le helpers Provide helpers to convert bitmaps to little endian format. It can be used when we want to send one bitmap via network to some other hosts. One thing to mention is that, these helpers only solve the problem of endianess, but it does not solve the problem of different word size on machines (the bitmaps managing same count of bits may contains different size when malloced). So we need to take care of the size alignment issue on the callers for now. Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- include/qemu/bitmap.h | 7 +++++++ util/bitmap.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h index 2718706edb..509eeddece 100644 --- a/include/qemu/bitmap.h +++ b/include/qemu/bitmap.h @@ -39,6 +39,8 @@ * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_test_and_clear_atomic(dst, pos, nbits) Test and clear area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area + * bitmap_to_le(dst, src, nbits) Convert bitmap to little endian + * bitmap_from_le(dst, src, nbits) Convert bitmap from little endian */ /* @@ -247,4 +249,9 @@ static inline unsigned long *bitmap_zero_extend(unsigned long *old, return new; } +void bitmap_to_le(unsigned long *dst, const unsigned long *src, + long nbits); +void bitmap_from_le(unsigned long *dst, const unsigned long *src, + long nbits); + #endif /* BITMAP_H */ diff --git a/util/bitmap.c b/util/bitmap.c index 90a42ff625..cb618c65a5 100644 --- a/util/bitmap.c +++ b/util/bitmap.c @@ -370,3 +370,35 @@ long slow_bitmap_count_one(const unsigned long *bitmap, long nbits) return result; } + +static void bitmap_to_from_le(unsigned long *dst, + const unsigned long *src, long nbits) +{ + long len = BITS_TO_LONGS(nbits); + +#ifdef HOST_WORDS_BIGENDIAN + long index; + + for (index = 0; index < len; index++) { +# if HOST_LONG_BITS == 64 + dst[index] = bswap64(src[index]); +# else + dst[index] = bswap32(src[index]); +# endif + } +#else + memcpy(dst, src, len * sizeof(unsigned long)); +#endif +} + +void bitmap_from_le(unsigned long *dst, const unsigned long *src, + long nbits) +{ + bitmap_to_from_le(dst, src, nbits); +} + +void bitmap_to_le(unsigned long *dst, const unsigned long *src, + long nbits) +{ + bitmap_to_from_le(dst, src, nbits); +} From c6467627369b2518ea3cf466da6cd39da7e3a85a Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 10 Jul 2017 19:30:14 +0300 Subject: [PATCH 13/18] migration: add has_postcopy savevm handler Now postcopy-able states are recognized by not NULL save_live_complete_postcopy handler. But when we have several different postcopy-able states, it is not convenient. Ram postcopy may be disabled, while some other postcopy enabled, in this case Ram state should behave as it is not postcopy-able. This patch add separate has_postcopy handler to specify behaviour of savevm state. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- include/migration/register.h | 1 + migration/ram.c | 6 ++++++ migration/savevm.c | 6 ++++-- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/migration/register.h b/include/migration/register.h index a0f1edd8c7..f4f7bdc177 100644 --- a/include/migration/register.h +++ b/include/migration/register.h @@ -24,6 +24,7 @@ typedef struct SaveVMHandlers { /* This runs both outside and inside the iothread lock. */ bool (*is_active)(void *opaque); + bool (*has_postcopy)(void *opaque); /* This runs outside the iothread lock in the migration case, and * within the lock in the savevm case. The callback had better only diff --git a/migration/ram.c b/migration/ram.c index bb369e72d4..cedbeae48d 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2849,11 +2849,17 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) return ret; } +static bool ram_has_postcopy(void *opaque) +{ + return migrate_postcopy_ram(); +} + static SaveVMHandlers savevm_ram_handlers = { .save_setup = ram_save_setup, .save_live_iterate = ram_save_iterate, .save_live_complete_postcopy = ram_save_complete, .save_live_complete_precopy = ram_save_complete, + .has_postcopy = ram_has_postcopy, .save_live_pending = ram_save_pending, .load_state = ram_load, .save_cleanup = ram_save_cleanup, diff --git a/migration/savevm.c b/migration/savevm.c index 7a55023d1a..9a48b7b4cb 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1008,7 +1008,8 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy) * call that's already run, it might get confused if we call * iterate afterwards. */ - if (postcopy && !se->ops->save_live_complete_postcopy) { + if (postcopy && + !(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) { continue; } if (qemu_file_rate_limit(f)) { @@ -1097,7 +1098,8 @@ int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only, QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (!se->ops || - (in_postcopy && se->ops->save_live_complete_postcopy) || + (in_postcopy && se->ops->has_postcopy && + se->ops->has_postcopy(se->opaque)) || (in_postcopy && !iterable_only) || !se->ops->save_live_complete_precopy) { continue; From 86e1167e9ad3a34ab00ed412fc67e1d02ec0ca7b Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 10 Jul 2017 19:30:15 +0300 Subject: [PATCH 14/18] migration: fix ram_save_pending Fill postcopy-able pending only if ram postcopy is enabled. It is necessary because of there will be other postcopy-able states and when ram postcopy is disabled, it should not spoil common postcopy related pending. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/ram.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index cedbeae48d..88ca69e7b2 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2276,8 +2276,12 @@ static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; } - /* We can do postcopy, and all the data is postcopiable */ - *postcopiable_pending += remaining_size; + if (migrate_postcopy_ram()) { + /* We can do postcopy, and all the data is postcopiable */ + *postcopiable_pending += remaining_size; + } else { + *non_postcopiable_pending += remaining_size; + } } static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) From 58110f0acb1a33e2bc60a2f1b26d2690a92e8a14 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 10 Jul 2017 19:30:16 +0300 Subject: [PATCH 15/18] migration: split common postcopy out of ram postcopy Split common postcopy staff from ram postcopy staff. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/migration.c | 39 +++++++++++++++++++++++------------ migration/migration.h | 2 ++ migration/savevm.c | 48 +++++++++++++++++++++++++++++++++++-------- 3 files changed, 67 insertions(+), 22 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 067dd929c4..266cd39c36 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1443,6 +1443,11 @@ bool migrate_postcopy_ram(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; } +bool migrate_postcopy(void) +{ + return migrate_postcopy_ram(); +} + bool migrate_auto_converge(void) { MigrationState *s; @@ -1826,9 +1831,11 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running) * need to tell the destination to throw any pages it's already received * that are dirty */ - if (ram_postcopy_send_discard_bitmap(ms)) { - error_report("postcopy send discard bitmap failed"); - goto fail; + if (migrate_postcopy_ram()) { + if (ram_postcopy_send_discard_bitmap(ms)) { + error_report("postcopy send discard bitmap failed"); + goto fail; + } } /* @@ -1837,8 +1844,10 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running) * wrap their state up here */ qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX); - /* Ping just for debugging, helps line traces up */ - qemu_savevm_send_ping(ms->to_dst_file, 2); + if (migrate_postcopy_ram()) { + /* Ping just for debugging, helps line traces up */ + qemu_savevm_send_ping(ms->to_dst_file, 2); + } /* * While loading the device state we may trigger page transfer @@ -1863,7 +1872,9 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running) qemu_savevm_send_postcopy_listen(fb); qemu_savevm_state_complete_precopy(fb, false, false); - qemu_savevm_send_ping(fb, 3); + if (migrate_postcopy_ram()) { + qemu_savevm_send_ping(fb, 3); + } qemu_savevm_send_postcopy_run(fb); @@ -1898,11 +1909,13 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running) qemu_mutex_unlock_iothread(); - /* - * Although this ping is just for debug, it could potentially be - * used for getting a better measurement of downtime at the source. - */ - qemu_savevm_send_ping(ms->to_dst_file, 4); + if (migrate_postcopy_ram()) { + /* + * Although this ping is just for debug, it could potentially be + * used for getting a better measurement of downtime at the source. + */ + qemu_savevm_send_ping(ms->to_dst_file, 4); + } if (migrate_release_ram()) { ram_postcopy_migrated_memory_release(ms); @@ -2080,7 +2093,7 @@ static void *migration_thread(void *opaque) qemu_savevm_send_ping(s->to_dst_file, 1); } - if (migrate_postcopy_ram()) { + if (migrate_postcopy()) { /* * Tell the destination that we *might* want to do postcopy later; * if the other end can't do postcopy it should fail now, nice and @@ -2113,7 +2126,7 @@ static void *migration_thread(void *opaque) if (pending_size && pending_size >= threshold_size) { /* Still a significant amount to transfer */ - if (migrate_postcopy_ram() && + if (migrate_postcopy() && s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE && pend_nonpost <= threshold_size && atomic_read(&s->start_postcopy)) { diff --git a/migration/migration.h b/migration/migration.h index 641290f51b..b83cceadc4 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -169,6 +169,8 @@ bool migration_is_blocked(Error **errp); bool migration_in_postcopy(void); MigrationState *migrate_get_current(void); +bool migrate_postcopy(void); + bool migrate_release_ram(void); bool migrate_postcopy_ram(void); bool migrate_zero_blocks(void); diff --git a/migration/savevm.c b/migration/savevm.c index 9a48b7b4cb..231474da34 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -89,7 +89,7 @@ static struct mig_cmd_args { [MIG_CMD_INVALID] = { .len = -1, .name = "INVALID" }, [MIG_CMD_OPEN_RETURN_PATH] = { .len = 0, .name = "OPEN_RETURN_PATH" }, [MIG_CMD_PING] = { .len = sizeof(uint32_t), .name = "PING" }, - [MIG_CMD_POSTCOPY_ADVISE] = { .len = 16, .name = "POSTCOPY_ADVISE" }, + [MIG_CMD_POSTCOPY_ADVISE] = { .len = -1, .name = "POSTCOPY_ADVISE" }, [MIG_CMD_POSTCOPY_LISTEN] = { .len = 0, .name = "POSTCOPY_LISTEN" }, [MIG_CMD_POSTCOPY_RUN] = { .len = 0, .name = "POSTCOPY_RUN" }, [MIG_CMD_POSTCOPY_RAM_DISCARD] = { @@ -98,6 +98,23 @@ static struct mig_cmd_args { [MIG_CMD_MAX] = { .len = -1, .name = "MAX" }, }; +/* Note for MIG_CMD_POSTCOPY_ADVISE: + * The format of arguments is depending on postcopy mode: + * - postcopy RAM only + * uint64_t host page size + * uint64_t taget page size + * + * - postcopy RAM and postcopy dirty bitmaps + * format is the same as for postcopy RAM only + * + * - postcopy dirty bitmaps only + * Nothing. Command length field is 0. + * + * Be careful: adding a new postcopy entity with some other parameters should + * not break format self-description ability. Good way is to introduce some + * generic extendable format with an exception for two old entities. + */ + static int announce_self_create(uint8_t *buf, uint8_t *mac_addr) { @@ -861,12 +878,17 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len) /* Send prior to any postcopy transfer */ void qemu_savevm_send_postcopy_advise(QEMUFile *f) { - uint64_t tmp[2]; - tmp[0] = cpu_to_be64(ram_pagesize_summary()); - tmp[1] = cpu_to_be64(qemu_target_page_size()); + if (migrate_postcopy_ram()) { + uint64_t tmp[2]; + tmp[0] = cpu_to_be64(ram_pagesize_summary()); + tmp[1] = cpu_to_be64(qemu_target_page_size()); - trace_qemu_savevm_send_postcopy_advise(); - qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 16, (uint8_t *)tmp); + trace_qemu_savevm_send_postcopy_advise(); + qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, + 16, (uint8_t *)tmp); + } else { + qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 0, NULL); + } } /* Sent prior to starting the destination running in postcopy, discard pages @@ -1354,6 +1376,10 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis) return -1; } + if (!migrate_postcopy_ram()) { + return 0; + } + if (!postcopy_ram_supported_by_host()) { postcopy_state_set(POSTCOPY_INCOMING_NONE); return -1; @@ -1564,7 +1590,9 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) * A rare case, we entered listen without having to do any discards, * so do the setup that's normally done at the time of the 1st discard. */ - postcopy_ram_prepare_discard(mis); + if (migrate_postcopy_ram()) { + postcopy_ram_prepare_discard(mis); + } } /* @@ -1572,8 +1600,10 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) * However, at this point the CPU shouldn't be running, and the IO * shouldn't be doing anything yet so don't actually expect requests */ - if (postcopy_ram_enable_notify(mis)) { - return -1; + if (migrate_postcopy_ram()) { + if (postcopy_ram_enable_notify(mis)) { + return -1; + } } if (mis->have_listen_thread) { From d7651f150d61936344c4fab45eaeb0716c606af2 Mon Sep 17 00:00:00 2001 From: Alexey Perevalov Date: Tue, 19 Sep 2017 19:47:56 +0300 Subject: [PATCH 16/18] migration: pass MigrationIncomingState* into migration check functions That tiny refactoring is necessary to be able to set UFFD_FEATURE_THREAD_ID while requesting features, and then to create downtime context in case when kernel supports it. Signed-off-by: Alexey Perevalov Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/migration.c | 3 ++- migration/postcopy-ram.c | 10 +++++----- migration/postcopy-ram.h | 2 +- migration/savevm.c | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 266cd39c36..98429dc843 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -659,6 +659,7 @@ static bool migrate_caps_check(bool *cap_list, { MigrationCapabilityStatusList *cap; bool old_postcopy_cap; + MigrationIncomingState *mis = migration_incoming_get_current(); old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; @@ -692,7 +693,7 @@ static bool migrate_caps_check(bool *cap_list, * special support. */ if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && - !postcopy_ram_supported_by_host()) { + !postcopy_ram_supported_by_host(mis)) { /* postcopy_ram_supported_by_host will have emitted a more * detailed message */ diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 7e21e6fd36..c70305ccc7 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -61,7 +61,7 @@ struct PostcopyDiscardState { #include #include -static bool ufd_version_check(int ufd) +static bool ufd_version_check(int ufd, MigrationIncomingState *mis) { struct uffdio_api api_struct; uint64_t ioctl_mask; @@ -124,7 +124,7 @@ static int test_ramblock_postcopiable(const char *block_name, void *host_addr, * normally fine since if the postcopy succeeds it gets turned back on at the * end. */ -bool postcopy_ram_supported_by_host(void) +bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) { long pagesize = getpagesize(); int ufd = -1; @@ -147,7 +147,7 @@ bool postcopy_ram_supported_by_host(void) } /* Version and features check */ - if (!ufd_version_check(ufd)) { + if (!ufd_version_check(ufd, mis)) { goto out; } @@ -523,7 +523,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) * Although the host check already tested the API, we need to * do the check again as an ABI handshake on the new fd. */ - if (!ufd_version_check(mis->userfault_fd)) { + if (!ufd_version_check(mis->userfault_fd, mis)) { return -1; } @@ -661,7 +661,7 @@ void *postcopy_get_tmp_page(MigrationIncomingState *mis) #else /* No target OS support, stubs just fail */ -bool postcopy_ram_supported_by_host(void) +bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) { error_report("%s: No OS support", __func__); return false; diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 52d51e8007..587a8b86a7 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -14,7 +14,7 @@ #define QEMU_POSTCOPY_RAM_H /* Return true if the host supports everything we need to do postcopy-ram */ -bool postcopy_ram_supported_by_host(void); +bool postcopy_ram_supported_by_host(MigrationIncomingState *mis); /* * Make all of RAM sensitive to accesses to areas that haven't yet been written diff --git a/migration/savevm.c b/migration/savevm.c index 231474da34..2478352baf 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1380,7 +1380,7 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis) return 0; } - if (!postcopy_ram_supported_by_host()) { + if (!postcopy_ram_supported_by_host(mis)) { postcopy_state_set(POSTCOPY_INCOMING_NONE); return -1; } From 5553499f043d4b7fc020600ffda75d1162ff970b Mon Sep 17 00:00:00 2001 From: Alexey Perevalov Date: Tue, 19 Sep 2017 19:47:57 +0300 Subject: [PATCH 17/18] migration: fix hardcoded function name in error report Reviewed-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Alexey Perevalov Signed-off-by: Juan Quintela --- migration/postcopy-ram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index c70305ccc7..cf62fc756f 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -69,7 +69,7 @@ static bool ufd_version_check(int ufd, MigrationIncomingState *mis) api_struct.api = UFFD_API; api_struct.features = 0; if (ioctl(ufd, UFFDIO_API, &api_struct)) { - error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s", + error_report("%s: UFFDIO_API failed: %s", __func__, strerror(errno)); return false; } From 54ae0886b12c4934e84381af777d4df6147cc2ec Mon Sep 17 00:00:00 2001 From: Alexey Perevalov Date: Tue, 19 Sep 2017 19:47:58 +0300 Subject: [PATCH 18/18] migration: split ufd_version_check onto receive/request features part This modification is necessary for userfault fd features which are required to be requested from userspace. UFFD_FEATURE_THREAD_ID is a one of such "on demand" feature, which will be introduced in the next patch. QEMU have to use separate userfault file descriptor, due to userfault context has internal state, and after first call of ioctl UFFD_API it changes its state to UFFD_STATE_RUNNING (in case of success), but kernel while handling ioctl UFFD_API expects UFFD_STATE_WAIT_API. So only one ioctl with UFFD_API is possible per ufd. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Alexey Perevalov Signed-off-by: Juan Quintela --- migration/postcopy-ram.c | 96 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 7 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index cf62fc756f..0de68e8b25 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -61,16 +61,67 @@ struct PostcopyDiscardState { #include #include -static bool ufd_version_check(int ufd, MigrationIncomingState *mis) -{ - struct uffdio_api api_struct; - uint64_t ioctl_mask; +/** + * receive_ufd_features: check userfault fd features, to request only supported + * features in the future. + * + * Returns: true on success + * + * __NR_userfaultfd - should be checked before + * @features: out parameter will contain uffdio_api.features provided by kernel + * in case of success + */ +static bool receive_ufd_features(uint64_t *features) +{ + struct uffdio_api api_struct = {0}; + int ufd; + bool ret = true; + + /* if we are here __NR_userfaultfd should exists */ + ufd = syscall(__NR_userfaultfd, O_CLOEXEC); + if (ufd == -1) { + error_report("%s: syscall __NR_userfaultfd failed: %s", __func__, + strerror(errno)); + return false; + } + + /* ask features */ api_struct.api = UFFD_API; api_struct.features = 0; if (ioctl(ufd, UFFDIO_API, &api_struct)) { error_report("%s: UFFDIO_API failed: %s", __func__, strerror(errno)); + ret = false; + goto release_ufd; + } + + *features = api_struct.features; + +release_ufd: + close(ufd); + return ret; +} + +/** + * request_ufd_features: this function should be called only once on a newly + * opened ufd, subsequent calls will lead to error. + * + * Returns: true on succes + * + * @ufd: fd obtained from userfaultfd syscall + * @features: bit mask see UFFD_API_FEATURES + */ +static bool request_ufd_features(int ufd, uint64_t features) +{ + struct uffdio_api api_struct = {0}; + uint64_t ioctl_mask; + + api_struct.api = UFFD_API; + api_struct.features = features; + if (ioctl(ufd, UFFDIO_API, &api_struct)) { + error_report("%s failed: UFFDIO_API failed: %s", __func__, + strerror(errno)); return false; } @@ -82,11 +133,42 @@ static bool ufd_version_check(int ufd, MigrationIncomingState *mis) return false; } + return true; +} + +static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) +{ + uint64_t asked_features = 0; + static uint64_t supported_features; + + /* + * it's not possible to + * request UFFD_API twice per one fd + * userfault fd features is persistent + */ + if (!supported_features) { + if (!receive_ufd_features(&supported_features)) { + error_report("%s failed", __func__); + return false; + } + } + + /* + * request features, even if asked_features is 0, due to + * kernel expects UFFD_API before UFFDIO_REGISTER, per + * userfault file descriptor + */ + if (!request_ufd_features(ufd, asked_features)) { + error_report("%s failed: features %" PRIu64, __func__, + asked_features); + return false; + } + if (getpagesize() != ram_pagesize_summary()) { bool have_hp = false; /* We've got a huge page */ #ifdef UFFD_FEATURE_MISSING_HUGETLBFS - have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS; + have_hp = supported_features & UFFD_FEATURE_MISSING_HUGETLBFS; #endif if (!have_hp) { error_report("Userfault on this host does not support huge pages"); @@ -147,7 +229,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) } /* Version and features check */ - if (!ufd_version_check(ufd, mis)) { + if (!ufd_check_and_apply(ufd, mis)) { goto out; } @@ -523,7 +605,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) * Although the host check already tested the API, we need to * do the check again as an ABI handshake on the new fd. */ - if (!ufd_version_check(mis->userfault_fd, mis)) { + if (!ufd_check_and_apply(mis->userfault_fd, mis)) { return -1; }