migration/next for 20151110
-----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABCAAGBQJWQf3eAAoJEPSH7xhYctcjqnsP/iIvpP9wfHxNCFWy3o9pjeJm s5SEyA+/Rzef0+eoUlETyuWeivZA40lyhYzCRBZfjAshLiAjGl2T+/S+gkwNB4Na IMgYfdQ6EzGzZIUgskqFcdHF0PkYKsFjQN9OnDdkVDG7WW20MEf7UmhgEDMZ2fnA 4o2e/jPcZSF4v4J6/Dl1J6pev50OBwoGclFaVIRA5U3Me9/+0C8U9nodrWvRW1Yp 3bLxA3/Sr8pjApap+gYADuAMq/C85H0nxU1bnUZEdJc5KyLiFC1hqLC7zQS0+FMW 6wdPULWeqf03enFONeiRa2TGlYP0kPFDrdmz8HGQgJ5PgjtlkUdmDK8flTLnoN7z 7yX9C8qF/afe/FjCyCxphEM1NBmu8d/8LjoNpxZOY4AKhm4YVWfRLJCrePBilx3l qLbeIBTjjcq59JYnj0cqIamLRf7U9CvFxb6dVT/ejX8aqvH1a1wNfgMgn5Vh9ICv PmnAHO1gaYthhd76uHASMSE9v/neY6xa8r+f3VP8RveC/SmriAtkTMa/VpL8Bp0B O5ERqQg27RjUbfKidAUcrlC1jb4pWwX48Lh3yo6cSrCUGBiVoESfEbpgCfZQQDnD l8tapPZX14y1wUN5Rn9HjFq11AN0MKGlRaTA5KMzL3eaAExKWwNlAV6tawQnsghQ NOzZfechjlENjpfJJbc2 =tlQv -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20151110' into staging migration/next for 20151110 # gpg: Signature made Tue 10 Nov 2015 14:23:26 GMT using RSA key ID 5872D723 # gpg: Good signature from "Juan Quintela <quintela@redhat.com>" # gpg: aka "Juan Quintela <quintela@trasno.org>" * remotes/juanquintela/tags/migration/20151110: (57 commits) migration: qemu_savevm_state_cleanup becomes mandatory operation Inhibit ballooning during postcopy Disable mlock around incoming postcopy End of migration for postcopy Postcopy: Mark nohugepage before discard postcopy: Wire up loadvm_postcopy_handle_ commands Start up a postcopy/listener thread ready for incoming page data Postcopy; Handle userfault requests Round up RAMBlock sizes to host page sizes Host page!=target page: Cleanup bitmaps Don't iterate on precopy-only devices during postcopy Don't sync dirty bitmaps in postcopy postcopy: Check order of received target pages Postcopy: Use helpers to map pages during migration postcopy_ram.c: place_page and helpers Page request: Consume pages off the post-copy queue Page request: Process incoming page request Page request: Add MIG_RP_MSG_REQ_PAGES reverse command Postcopy: End of iteration Postcopy: Postcopy startup in migration thread ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
a77067f6ac
11
balloon.c
11
balloon.c
|
@ -36,6 +36,17 @@
|
||||||
static QEMUBalloonEvent *balloon_event_fn;
|
static QEMUBalloonEvent *balloon_event_fn;
|
||||||
static QEMUBalloonStatus *balloon_stat_fn;
|
static QEMUBalloonStatus *balloon_stat_fn;
|
||||||
static void *balloon_opaque;
|
static void *balloon_opaque;
|
||||||
|
static bool balloon_inhibited;
|
||||||
|
|
||||||
|
bool qemu_balloon_is_inhibited(void)
|
||||||
|
{
|
||||||
|
return balloon_inhibited;
|
||||||
|
}
|
||||||
|
|
||||||
|
void qemu_balloon_inhibit(bool state)
|
||||||
|
{
|
||||||
|
balloon_inhibited = state;
|
||||||
|
}
|
||||||
|
|
||||||
static bool have_balloon(Error **errp)
|
static bool have_balloon(Error **errp)
|
||||||
{
|
{
|
||||||
|
|
|
@ -291,3 +291,194 @@ save/send this state when we are in the middle of a pio operation
|
||||||
(that is what ide_drive_pio_state_needed() checks). If DRQ_STAT is
|
(that is what ide_drive_pio_state_needed() checks). If DRQ_STAT is
|
||||||
not enabled, the values on that fields are garbage and don't need to
|
not enabled, the values on that fields are garbage and don't need to
|
||||||
be sent.
|
be sent.
|
||||||
|
|
||||||
|
= Return path =
|
||||||
|
|
||||||
|
In most migration scenarios there is only a single data path that runs
|
||||||
|
from the source VM to the destination, typically along a single fd (although
|
||||||
|
possibly with another fd or similar for some fast way of throwing pages across).
|
||||||
|
|
||||||
|
However, some uses need two way communication; in particular the Postcopy
|
||||||
|
destination needs to be able to request pages on demand from the source.
|
||||||
|
|
||||||
|
For these scenarios there is a 'return path' from the destination to the source;
|
||||||
|
qemu_file_get_return_path(QEMUFile* fwdpath) gives the QEMUFile* for the return
|
||||||
|
path.
|
||||||
|
|
||||||
|
Source side
|
||||||
|
Forward path - written by migration thread
|
||||||
|
Return path - opened by main thread, read by return-path thread
|
||||||
|
|
||||||
|
Destination side
|
||||||
|
Forward path - read by main thread
|
||||||
|
Return path - opened by main thread, written by main thread AND postcopy
|
||||||
|
thread (protected by rp_mutex)
|
||||||
|
|
||||||
|
= Postcopy =
|
||||||
|
'Postcopy' migration is a way to deal with migrations that refuse to converge
|
||||||
|
(or take too long to converge) its plus side is that there is an upper bound on
|
||||||
|
the amount of migration traffic and time it takes, the down side is that during
|
||||||
|
the postcopy phase, a failure of *either* side or the network connection causes
|
||||||
|
the guest to be lost.
|
||||||
|
|
||||||
|
In postcopy the destination CPUs are started before all the memory has been
|
||||||
|
transferred, and accesses to pages that are yet to be transferred cause
|
||||||
|
a fault that's translated by QEMU into a request to the source QEMU.
|
||||||
|
|
||||||
|
Postcopy can be combined with precopy (i.e. normal migration) so that if precopy
|
||||||
|
doesn't finish in a given time the switch is made to postcopy.
|
||||||
|
|
||||||
|
=== Enabling postcopy ===
|
||||||
|
|
||||||
|
To enable postcopy, issue this command on the monitor prior to the
|
||||||
|
start of migration:
|
||||||
|
|
||||||
|
migrate_set_capability x-postcopy-ram on
|
||||||
|
|
||||||
|
The normal commands are then used to start a migration, which is still
|
||||||
|
started in precopy mode. Issuing:
|
||||||
|
|
||||||
|
migrate_start_postcopy
|
||||||
|
|
||||||
|
will now cause the transition from precopy to postcopy.
|
||||||
|
It can be issued immediately after migration is started or any
|
||||||
|
time later on. Issuing it after the end of a migration is harmless.
|
||||||
|
|
||||||
|
Note: During the postcopy phase, the bandwidth limits set using
|
||||||
|
migrate_set_speed is ignored (to avoid delaying requested pages that
|
||||||
|
the destination is waiting for).
|
||||||
|
|
||||||
|
=== Postcopy device transfer ===
|
||||||
|
|
||||||
|
Loading of device data may cause the device emulation to access guest RAM
|
||||||
|
that may trigger faults that have to be resolved by the source, as such
|
||||||
|
the migration stream has to be able to respond with page data *during* the
|
||||||
|
device load, and hence the device data has to be read from the stream completely
|
||||||
|
before the device load begins to free the stream up. This is achieved by
|
||||||
|
'packaging' the device data into a blob that's read in one go.
|
||||||
|
|
||||||
|
Source behaviour
|
||||||
|
|
||||||
|
Until postcopy is entered the migration stream is identical to normal
|
||||||
|
precopy, except for the addition of a 'postcopy advise' command at
|
||||||
|
the beginning, to tell the destination that postcopy might happen.
|
||||||
|
When postcopy starts the source sends the page discard data and then
|
||||||
|
forms the 'package' containing:
|
||||||
|
|
||||||
|
Command: 'postcopy listen'
|
||||||
|
The device state
|
||||||
|
A series of sections, identical to the precopy streams device state stream
|
||||||
|
containing everything except postcopiable devices (i.e. RAM)
|
||||||
|
Command: 'postcopy run'
|
||||||
|
|
||||||
|
The 'package' is sent as the data part of a Command: 'CMD_PACKAGED', and the
|
||||||
|
contents are formatted in the same way as the main migration stream.
|
||||||
|
|
||||||
|
During postcopy the source scans the list of dirty pages and sends them
|
||||||
|
to the destination without being requested (in much the same way as precopy),
|
||||||
|
however when a page request is received from the destination, the dirty page
|
||||||
|
scanning restarts from the requested location. This causes requested pages
|
||||||
|
to be sent quickly, and also causes pages directly after the requested page
|
||||||
|
to be sent quickly in the hope that those pages are likely to be used
|
||||||
|
by the destination soon.
|
||||||
|
|
||||||
|
Destination behaviour
|
||||||
|
|
||||||
|
Initially the destination looks the same as precopy, with a single thread
|
||||||
|
reading the migration stream; the 'postcopy advise' and 'discard' commands
|
||||||
|
are processed to change the way RAM is managed, but don't affect the stream
|
||||||
|
processing.
|
||||||
|
|
||||||
|
------------------------------------------------------------------------------
|
||||||
|
1 2 3 4 5 6 7
|
||||||
|
main -----DISCARD-CMD_PACKAGED ( LISTEN DEVICE DEVICE DEVICE RUN )
|
||||||
|
thread | |
|
||||||
|
| (page request)
|
||||||
|
| \___
|
||||||
|
v \
|
||||||
|
listen thread: --- page -- page -- page -- page -- page --
|
||||||
|
|
||||||
|
a b c
|
||||||
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
On receipt of CMD_PACKAGED (1)
|
||||||
|
All the data associated with the package - the ( ... ) section in the
|
||||||
|
diagram - is read into memory (into a QEMUSizedBuffer), and the main thread
|
||||||
|
recurses into qemu_loadvm_state_main to process the contents of the package (2)
|
||||||
|
which contains commands (3,6) and devices (4...)
|
||||||
|
|
||||||
|
On receipt of 'postcopy listen' - 3 -(i.e. the 1st command in the package)
|
||||||
|
a new thread (a) is started that takes over servicing the migration stream,
|
||||||
|
while the main thread carries on loading the package. It loads normal
|
||||||
|
background page data (b) but if during a device load a fault happens (5) the
|
||||||
|
returned page (c) is loaded by the listen thread allowing the main threads
|
||||||
|
device load to carry on.
|
||||||
|
|
||||||
|
The last thing in the CMD_PACKAGED is a 'RUN' command (6) letting the destination
|
||||||
|
CPUs start running.
|
||||||
|
At the end of the CMD_PACKAGED (7) the main thread returns to normal running behaviour
|
||||||
|
and is no longer used by migration, while the listen thread carries
|
||||||
|
on servicing page data until the end of migration.
|
||||||
|
|
||||||
|
=== Postcopy states ===
|
||||||
|
|
||||||
|
Postcopy moves through a series of states (see postcopy_state) from
|
||||||
|
ADVISE->DISCARD->LISTEN->RUNNING->END
|
||||||
|
|
||||||
|
Advise: Set at the start of migration if postcopy is enabled, even
|
||||||
|
if it hasn't had the start command; here the destination
|
||||||
|
checks that its OS has the support needed for postcopy, and performs
|
||||||
|
setup to ensure the RAM mappings are suitable for later postcopy.
|
||||||
|
The destination will fail early in migration at this point if the
|
||||||
|
required OS support is not present.
|
||||||
|
(Triggered by reception of POSTCOPY_ADVISE command)
|
||||||
|
|
||||||
|
Discard: Entered on receipt of the first 'discard' command; prior to
|
||||||
|
the first Discard being performed, hugepages are switched off
|
||||||
|
(using madvise) to ensure that no new huge pages are created
|
||||||
|
during the postcopy phase, and to cause any huge pages that
|
||||||
|
have discards on them to be broken.
|
||||||
|
|
||||||
|
Listen: The first command in the package, POSTCOPY_LISTEN, switches
|
||||||
|
the destination state to Listen, and starts a new thread
|
||||||
|
(the 'listen thread') which takes over the job of receiving
|
||||||
|
pages off the migration stream, while the main thread carries
|
||||||
|
on processing the blob. With this thread able to process page
|
||||||
|
reception, the destination now 'sensitises' the RAM to detect
|
||||||
|
any access to missing pages (on Linux using the 'userfault'
|
||||||
|
system).
|
||||||
|
|
||||||
|
Running: POSTCOPY_RUN causes the destination to synchronise all
|
||||||
|
state and start the CPUs and IO devices running. The main
|
||||||
|
thread now finishes processing the migration package and
|
||||||
|
now carries on as it would for normal precopy migration
|
||||||
|
(although it can't do the cleanup it would do as it
|
||||||
|
finishes a normal migration).
|
||||||
|
|
||||||
|
End: The listen thread can now quit, and perform the cleanup of migration
|
||||||
|
state, the migration is now complete.
|
||||||
|
|
||||||
|
=== Source side page maps ===
|
||||||
|
|
||||||
|
The source side keeps two bitmaps during postcopy; 'the migration bitmap'
|
||||||
|
and 'unsent map'. The 'migration bitmap' is basically the same as in
|
||||||
|
the precopy case, and holds a bit to indicate that page is 'dirty' -
|
||||||
|
i.e. needs sending. During the precopy phase this is updated as the CPU
|
||||||
|
dirties pages, however during postcopy the CPUs are stopped and nothing
|
||||||
|
should dirty anything any more.
|
||||||
|
|
||||||
|
The 'unsent map' is used for the transition to postcopy. It is a bitmap that
|
||||||
|
has a bit cleared whenever a page is sent to the destination, however during
|
||||||
|
the transition to postcopy mode it is combined with the migration bitmap
|
||||||
|
to form a set of pages that:
|
||||||
|
a) Have been sent but then redirtied (which must be discarded)
|
||||||
|
b) Have not yet been sent - which also must be discarded to cause any
|
||||||
|
transparent huge pages built during precopy to be broken.
|
||||||
|
|
||||||
|
Note that the contents of the unsentmap are sacrificed during the calculation
|
||||||
|
of the discard set and thus aren't valid once in postcopy. The dirtymap
|
||||||
|
is still valid and is used to ensure that no page is sent more than once. Any
|
||||||
|
request for a page that has already been sent is ignored. Duplicate requests
|
||||||
|
such as this can happen as a page is sent at about the same time the
|
||||||
|
destination accesses it.
|
||||||
|
|
||||||
|
|
92
exec.c
92
exec.c
|
@ -1377,6 +1377,11 @@ static RAMBlock *find_ram_block(ram_addr_t addr)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char *qemu_ram_get_idstr(RAMBlock *rb)
|
||||||
|
{
|
||||||
|
return rb->idstr;
|
||||||
|
}
|
||||||
|
|
||||||
/* Called with iothread lock held. */
|
/* Called with iothread lock held. */
|
||||||
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
|
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
|
||||||
{
|
{
|
||||||
|
@ -1447,7 +1452,7 @@ int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
|
||||||
|
|
||||||
assert(block);
|
assert(block);
|
||||||
|
|
||||||
newsize = TARGET_PAGE_ALIGN(newsize);
|
newsize = HOST_PAGE_ALIGN(newsize);
|
||||||
|
|
||||||
if (block->used_length == newsize) {
|
if (block->used_length == newsize) {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1591,7 +1596,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
size = TARGET_PAGE_ALIGN(size);
|
size = HOST_PAGE_ALIGN(size);
|
||||||
new_block = g_malloc0(sizeof(*new_block));
|
new_block = g_malloc0(sizeof(*new_block));
|
||||||
new_block->mr = mr;
|
new_block->mr = mr;
|
||||||
new_block->used_length = size;
|
new_block->used_length = size;
|
||||||
|
@ -1627,8 +1632,8 @@ ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
|
||||||
ram_addr_t addr;
|
ram_addr_t addr;
|
||||||
Error *local_err = NULL;
|
Error *local_err = NULL;
|
||||||
|
|
||||||
size = TARGET_PAGE_ALIGN(size);
|
size = HOST_PAGE_ALIGN(size);
|
||||||
max_size = TARGET_PAGE_ALIGN(max_size);
|
max_size = HOST_PAGE_ALIGN(max_size);
|
||||||
new_block = g_malloc0(sizeof(*new_block));
|
new_block = g_malloc0(sizeof(*new_block));
|
||||||
new_block->mr = mr;
|
new_block->mr = mr;
|
||||||
new_block->resized = resized;
|
new_block->resized = resized;
|
||||||
|
@ -1877,8 +1882,16 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Some of the softmmu routines need to translate from a host pointer
|
/*
|
||||||
* (typically a TLB entry) back to a ram offset.
|
* Translates a host ptr back to a RAMBlock, a ram_addr and an offset
|
||||||
|
* in that RAMBlock.
|
||||||
|
*
|
||||||
|
* ptr: Host pointer to look up
|
||||||
|
* round_offset: If true round the result offset down to a page boundary
|
||||||
|
* *ram_addr: set to result ram_addr
|
||||||
|
* *offset: set to result offset within the RAMBlock
|
||||||
|
*
|
||||||
|
* Returns: RAMBlock (or NULL if not found)
|
||||||
*
|
*
|
||||||
* By the time this function returns, the returned pointer is not protected
|
* By the time this function returns, the returned pointer is not protected
|
||||||
* by RCU anymore. If the caller is not within an RCU critical section and
|
* by RCU anymore. If the caller is not within an RCU critical section and
|
||||||
|
@ -1886,18 +1899,22 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
|
||||||
* pointer, such as a reference to the region that includes the incoming
|
* pointer, such as a reference to the region that includes the incoming
|
||||||
* ram_addr_t.
|
* ram_addr_t.
|
||||||
*/
|
*/
|
||||||
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
|
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
|
||||||
|
ram_addr_t *ram_addr,
|
||||||
|
ram_addr_t *offset)
|
||||||
{
|
{
|
||||||
RAMBlock *block;
|
RAMBlock *block;
|
||||||
uint8_t *host = ptr;
|
uint8_t *host = ptr;
|
||||||
MemoryRegion *mr;
|
|
||||||
|
|
||||||
if (xen_enabled()) {
|
if (xen_enabled()) {
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
*ram_addr = xen_ram_addr_from_mapcache(ptr);
|
*ram_addr = xen_ram_addr_from_mapcache(ptr);
|
||||||
mr = qemu_get_ram_block(*ram_addr)->mr;
|
block = qemu_get_ram_block(*ram_addr);
|
||||||
|
if (block) {
|
||||||
|
*offset = (host - block->host);
|
||||||
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
return mr;
|
return block;
|
||||||
}
|
}
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
@ -1920,10 +1937,49 @@ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
found:
|
found:
|
||||||
*ram_addr = block->offset + (host - block->host);
|
*offset = (host - block->host);
|
||||||
mr = block->mr;
|
if (round_offset) {
|
||||||
|
*offset &= TARGET_PAGE_MASK;
|
||||||
|
}
|
||||||
|
*ram_addr = block->offset + *offset;
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
return mr;
|
return block;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Finds the named RAMBlock
|
||||||
|
*
|
||||||
|
* name: The name of RAMBlock to find
|
||||||
|
*
|
||||||
|
* Returns: RAMBlock (or NULL if not found)
|
||||||
|
*/
|
||||||
|
RAMBlock *qemu_ram_block_by_name(const char *name)
|
||||||
|
{
|
||||||
|
RAMBlock *block;
|
||||||
|
|
||||||
|
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
|
||||||
|
if (!strcmp(name, block->idstr)) {
|
||||||
|
return block;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Some of the softmmu routines need to translate from a host pointer
|
||||||
|
(typically a TLB entry) back to a ram offset. */
|
||||||
|
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
|
||||||
|
{
|
||||||
|
RAMBlock *block;
|
||||||
|
ram_addr_t offset; /* Not used */
|
||||||
|
|
||||||
|
block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
|
||||||
|
|
||||||
|
if (!block) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return block->mr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
|
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
|
||||||
|
@ -3502,6 +3558,16 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allows code that needs to deal with migration bitmaps etc to still be built
|
||||||
|
* target independent.
|
||||||
|
*/
|
||||||
|
size_t qemu_target_page_bits(void)
|
||||||
|
{
|
||||||
|
return TARGET_PAGE_BITS;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1005,6 +1005,21 @@ STEXI
|
||||||
@item migrate_set_parameter @var{parameter} @var{value}
|
@item migrate_set_parameter @var{parameter} @var{value}
|
||||||
@findex migrate_set_parameter
|
@findex migrate_set_parameter
|
||||||
Set the parameter @var{parameter} for migration.
|
Set the parameter @var{parameter} for migration.
|
||||||
|
ETEXI
|
||||||
|
|
||||||
|
{
|
||||||
|
.name = "migrate_start_postcopy",
|
||||||
|
.args_type = "",
|
||||||
|
.params = "",
|
||||||
|
.help = "Switch migration to postcopy mode",
|
||||||
|
.mhandler.cmd = hmp_migrate_start_postcopy,
|
||||||
|
},
|
||||||
|
|
||||||
|
STEXI
|
||||||
|
@item migrate_start_postcopy
|
||||||
|
@findex migrate_start_postcopy
|
||||||
|
Switch in-progress migration to postcopy mode. Ignored after the end of
|
||||||
|
migration (or once already in postcopy).
|
||||||
ETEXI
|
ETEXI
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|
7
hmp.c
7
hmp.c
|
@ -1293,6 +1293,13 @@ void hmp_client_migrate_info(Monitor *mon, const QDict *qdict)
|
||||||
hmp_handle_error(mon, &err);
|
hmp_handle_error(mon, &err);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict)
|
||||||
|
{
|
||||||
|
Error *err = NULL;
|
||||||
|
qmp_migrate_start_postcopy(&err);
|
||||||
|
hmp_handle_error(mon, &err);
|
||||||
|
}
|
||||||
|
|
||||||
void hmp_set_password(Monitor *mon, const QDict *qdict)
|
void hmp_set_password(Monitor *mon, const QDict *qdict)
|
||||||
{
|
{
|
||||||
const char *protocol = qdict_get_str(qdict, "protocol");
|
const char *protocol = qdict_get_str(qdict, "protocol");
|
||||||
|
|
1
hmp.h
1
hmp.h
|
@ -69,6 +69,7 @@ void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict);
|
||||||
void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict);
|
void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict);
|
||||||
void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict);
|
void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict);
|
||||||
void hmp_client_migrate_info(Monitor *mon, const QDict *qdict);
|
void hmp_client_migrate_info(Monitor *mon, const QDict *qdict);
|
||||||
|
void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict);
|
||||||
void hmp_set_password(Monitor *mon, const QDict *qdict);
|
void hmp_set_password(Monitor *mon, const QDict *qdict);
|
||||||
void hmp_expire_password(Monitor *mon, const QDict *qdict);
|
void hmp_expire_password(Monitor *mon, const QDict *qdict);
|
||||||
void hmp_eject(Monitor *mon, const QDict *qdict);
|
void hmp_eject(Monitor *mon, const QDict *qdict);
|
||||||
|
|
|
@ -1588,7 +1588,7 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
|
||||||
static SaveVMHandlers savevm_htab_handlers = {
|
static SaveVMHandlers savevm_htab_handlers = {
|
||||||
.save_live_setup = htab_save_setup,
|
.save_live_setup = htab_save_setup,
|
||||||
.save_live_iterate = htab_save_iterate,
|
.save_live_iterate = htab_save_iterate,
|
||||||
.save_live_complete = htab_save_complete,
|
.save_live_complete_precopy = htab_save_complete,
|
||||||
.load_state = htab_load,
|
.load_state = htab_load,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -37,9 +37,11 @@
|
||||||
static void balloon_page(void *addr, int deflate)
|
static void balloon_page(void *addr, int deflate)
|
||||||
{
|
{
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
if (!kvm_enabled() || kvm_has_sync_mmu())
|
if (!qemu_balloon_is_inhibited() && (!kvm_enabled() ||
|
||||||
|
kvm_has_sync_mmu())) {
|
||||||
qemu_madvise(addr, TARGET_PAGE_SIZE,
|
qemu_madvise(addr, TARGET_PAGE_SIZE,
|
||||||
deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED);
|
deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -64,8 +64,12 @@ typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
|
||||||
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
|
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
|
||||||
/* This should not be used by devices. */
|
/* This should not be used by devices. */
|
||||||
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
|
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
|
||||||
|
RAMBlock *qemu_ram_block_by_name(const char *name);
|
||||||
|
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
|
||||||
|
ram_addr_t *ram_addr, ram_addr_t *offset);
|
||||||
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);
|
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);
|
||||||
void qemu_ram_unset_idstr(ram_addr_t addr);
|
void qemu_ram_unset_idstr(ram_addr_t addr);
|
||||||
|
const char *qemu_ram_get_idstr(RAMBlock *rb);
|
||||||
|
|
||||||
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
|
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
|
||||||
int len, int is_write);
|
int len, int is_write);
|
||||||
|
|
|
@ -72,7 +72,6 @@ void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb,
|
||||||
|
|
||||||
void cpu_gen_init(void);
|
void cpu_gen_init(void);
|
||||||
bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc);
|
bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc);
|
||||||
void page_size_init(void);
|
|
||||||
|
|
||||||
void QEMU_NORETURN cpu_resume_from_signal(CPUState *cpu, void *puc);
|
void QEMU_NORETURN cpu_resume_from_signal(CPUState *cpu, void *puc);
|
||||||
void QEMU_NORETURN cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
|
void QEMU_NORETURN cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
|
||||||
|
|
|
@ -22,8 +22,6 @@
|
||||||
#ifndef CONFIG_USER_ONLY
|
#ifndef CONFIG_USER_ONLY
|
||||||
#include "hw/xen/xen.h"
|
#include "hw/xen/xen.h"
|
||||||
|
|
||||||
typedef struct RAMBlock RAMBlock;
|
|
||||||
|
|
||||||
struct RAMBlock {
|
struct RAMBlock {
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
struct MemoryRegion *mr;
|
struct MemoryRegion *mr;
|
||||||
|
|
|
@ -35,6 +35,7 @@
|
||||||
#define QEMU_VM_SUBSECTION 0x05
|
#define QEMU_VM_SUBSECTION 0x05
|
||||||
#define QEMU_VM_VMDESCRIPTION 0x06
|
#define QEMU_VM_VMDESCRIPTION 0x06
|
||||||
#define QEMU_VM_CONFIGURATION 0x07
|
#define QEMU_VM_CONFIGURATION 0x07
|
||||||
|
#define QEMU_VM_COMMAND 0x08
|
||||||
#define QEMU_VM_SECTION_FOOTER 0x7e
|
#define QEMU_VM_SECTION_FOOTER 0x7e
|
||||||
|
|
||||||
struct MigrationParams {
|
struct MigrationParams {
|
||||||
|
@ -42,13 +43,67 @@ struct MigrationParams {
|
||||||
bool shared;
|
bool shared;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct MigrationState MigrationState;
|
/* Messages sent on the return path from destination to source */
|
||||||
|
enum mig_rp_message_type {
|
||||||
|
MIG_RP_MSG_INVALID = 0, /* Must be 0 */
|
||||||
|
MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */
|
||||||
|
MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */
|
||||||
|
|
||||||
|
MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
|
||||||
|
MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */
|
||||||
|
|
||||||
|
MIG_RP_MSG_MAX
|
||||||
|
};
|
||||||
|
|
||||||
typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head;
|
typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head;
|
||||||
|
|
||||||
|
/* The current postcopy state is read/set by postcopy_state_get/set
|
||||||
|
* which update it atomically.
|
||||||
|
* The state is updated as postcopy messages are received, and
|
||||||
|
* in general only one thread should be writing to the state at any one
|
||||||
|
* time, initially the main thread and then the listen thread;
|
||||||
|
* Corner cases are where either thread finishes early and/or errors.
|
||||||
|
* The state is checked as messages are received to ensure that
|
||||||
|
* the source is sending us messages in the correct order.
|
||||||
|
* The state is also used by the RAM reception code to know if it
|
||||||
|
* has to place pages atomically, and the cleanup code at the end of
|
||||||
|
* the main thread to know if it has to delay cleanup until the end
|
||||||
|
* of postcopy.
|
||||||
|
*/
|
||||||
|
typedef enum {
|
||||||
|
POSTCOPY_INCOMING_NONE = 0, /* Initial state - no postcopy */
|
||||||
|
POSTCOPY_INCOMING_ADVISE,
|
||||||
|
POSTCOPY_INCOMING_DISCARD,
|
||||||
|
POSTCOPY_INCOMING_LISTENING,
|
||||||
|
POSTCOPY_INCOMING_RUNNING,
|
||||||
|
POSTCOPY_INCOMING_END
|
||||||
|
} PostcopyState;
|
||||||
|
|
||||||
/* State for the incoming migration */
|
/* State for the incoming migration */
|
||||||
struct MigrationIncomingState {
|
struct MigrationIncomingState {
|
||||||
QEMUFile *file;
|
QEMUFile *from_src_file;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Free at the start of the main state load, set as the main thread finishes
|
||||||
|
* loading state.
|
||||||
|
*/
|
||||||
|
QemuEvent main_thread_load_event;
|
||||||
|
|
||||||
|
bool have_fault_thread;
|
||||||
|
QemuThread fault_thread;
|
||||||
|
QemuSemaphore fault_thread_sem;
|
||||||
|
|
||||||
|
bool have_listen_thread;
|
||||||
|
QemuThread listen_thread;
|
||||||
|
QemuSemaphore listen_thread_sem;
|
||||||
|
|
||||||
|
/* For the kernel to send us notifications */
|
||||||
|
int userfault_fd;
|
||||||
|
/* To tell the fault_thread to quit */
|
||||||
|
int userfault_quit_fd;
|
||||||
|
QEMUFile *to_src_file;
|
||||||
|
QemuMutex rp_mutex; /* We send replies from multiple threads */
|
||||||
|
void *postcopy_tmp_page;
|
||||||
|
|
||||||
/* See savevm.c */
|
/* See savevm.c */
|
||||||
LoadStateEntry_Head loadvm_handlers;
|
LoadStateEntry_Head loadvm_handlers;
|
||||||
|
@ -58,6 +113,18 @@ MigrationIncomingState *migration_incoming_get_current(void);
|
||||||
MigrationIncomingState *migration_incoming_state_new(QEMUFile *f);
|
MigrationIncomingState *migration_incoming_state_new(QEMUFile *f);
|
||||||
void migration_incoming_state_destroy(void);
|
void migration_incoming_state_destroy(void);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* An outstanding page request, on the source, having been received
|
||||||
|
* and queued
|
||||||
|
*/
|
||||||
|
struct MigrationSrcPageRequest {
|
||||||
|
RAMBlock *rb;
|
||||||
|
hwaddr offset;
|
||||||
|
hwaddr len;
|
||||||
|
|
||||||
|
QSIMPLEQ_ENTRY(MigrationSrcPageRequest) next_req;
|
||||||
|
};
|
||||||
|
|
||||||
struct MigrationState
|
struct MigrationState
|
||||||
{
|
{
|
||||||
int64_t bandwidth_limit;
|
int64_t bandwidth_limit;
|
||||||
|
@ -70,6 +137,14 @@ struct MigrationState
|
||||||
|
|
||||||
int state;
|
int state;
|
||||||
MigrationParams params;
|
MigrationParams params;
|
||||||
|
|
||||||
|
/* State related to return path */
|
||||||
|
struct {
|
||||||
|
QEMUFile *from_dst_file;
|
||||||
|
QemuThread rp_thread;
|
||||||
|
bool error;
|
||||||
|
} rp_state;
|
||||||
|
|
||||||
double mbps;
|
double mbps;
|
||||||
int64_t total_time;
|
int64_t total_time;
|
||||||
int64_t downtime;
|
int64_t downtime;
|
||||||
|
@ -80,6 +155,18 @@ struct MigrationState
|
||||||
int64_t xbzrle_cache_size;
|
int64_t xbzrle_cache_size;
|
||||||
int64_t setup_time;
|
int64_t setup_time;
|
||||||
int64_t dirty_sync_count;
|
int64_t dirty_sync_count;
|
||||||
|
|
||||||
|
/* Flag set once the migration has been asked to enter postcopy */
|
||||||
|
bool start_postcopy;
|
||||||
|
|
||||||
|
/* Flag set once the migration thread is running (and needs joining) */
|
||||||
|
bool migration_thread_running;
|
||||||
|
|
||||||
|
/* Queue of outstanding page requests from the destination */
|
||||||
|
QemuMutex src_page_req_mutex;
|
||||||
|
QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests;
|
||||||
|
/* The RAMBlock used in the last src_page_request */
|
||||||
|
RAMBlock *last_req_rb;
|
||||||
};
|
};
|
||||||
|
|
||||||
void process_incoming_migration(QEMUFile *f);
|
void process_incoming_migration(QEMUFile *f);
|
||||||
|
@ -116,9 +203,12 @@ int migrate_fd_close(MigrationState *s);
|
||||||
|
|
||||||
void add_migration_state_change_notifier(Notifier *notify);
|
void add_migration_state_change_notifier(Notifier *notify);
|
||||||
void remove_migration_state_change_notifier(Notifier *notify);
|
void remove_migration_state_change_notifier(Notifier *notify);
|
||||||
|
MigrationState *migrate_init(const MigrationParams *params);
|
||||||
bool migration_in_setup(MigrationState *);
|
bool migration_in_setup(MigrationState *);
|
||||||
bool migration_has_finished(MigrationState *);
|
bool migration_has_finished(MigrationState *);
|
||||||
bool migration_has_failed(MigrationState *);
|
bool migration_has_failed(MigrationState *);
|
||||||
|
/* True if outgoing migration has entered postcopy phase */
|
||||||
|
bool migration_in_postcopy(MigrationState *);
|
||||||
MigrationState *migrate_get_current(void);
|
MigrationState *migrate_get_current(void);
|
||||||
|
|
||||||
void migrate_compress_threads_create(void);
|
void migrate_compress_threads_create(void);
|
||||||
|
@ -145,6 +235,13 @@ uint64_t xbzrle_mig_pages_cache_miss(void);
|
||||||
double xbzrle_mig_cache_miss_rate(void);
|
double xbzrle_mig_cache_miss_rate(void);
|
||||||
|
|
||||||
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
|
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
|
||||||
|
void ram_debug_dump_bitmap(unsigned long *todump, bool expected);
|
||||||
|
/* For outgoing discard bitmap */
|
||||||
|
int ram_postcopy_send_discard_bitmap(MigrationState *ms);
|
||||||
|
/* For incoming postcopy discard */
|
||||||
|
int ram_discard_range(MigrationIncomingState *mis, const char *block_name,
|
||||||
|
uint64_t start, size_t length);
|
||||||
|
int ram_postcopy_incoming_init(MigrationIncomingState *mis);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @migrate_add_blocker - prevent migration from proceeding
|
* @migrate_add_blocker - prevent migration from proceeding
|
||||||
|
@ -160,6 +257,7 @@ void migrate_add_blocker(Error *reason);
|
||||||
*/
|
*/
|
||||||
void migrate_del_blocker(Error *reason);
|
void migrate_del_blocker(Error *reason);
|
||||||
|
|
||||||
|
bool migrate_postcopy_ram(void);
|
||||||
bool migrate_zero_blocks(void);
|
bool migrate_zero_blocks(void);
|
||||||
|
|
||||||
bool migrate_auto_converge(void);
|
bool migrate_auto_converge(void);
|
||||||
|
@ -179,6 +277,17 @@ int migrate_compress_threads(void);
|
||||||
int migrate_decompress_threads(void);
|
int migrate_decompress_threads(void);
|
||||||
bool migrate_use_events(void);
|
bool migrate_use_events(void);
|
||||||
|
|
||||||
|
/* Sending on the return path - generic and then for each message type */
|
||||||
|
void migrate_send_rp_message(MigrationIncomingState *mis,
|
||||||
|
enum mig_rp_message_type message_type,
|
||||||
|
uint16_t len, void *data);
|
||||||
|
void migrate_send_rp_shut(MigrationIncomingState *mis,
|
||||||
|
uint32_t value);
|
||||||
|
void migrate_send_rp_pong(MigrationIncomingState *mis,
|
||||||
|
uint32_t value);
|
||||||
|
void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char* rbname,
|
||||||
|
ram_addr_t start, size_t len);
|
||||||
|
|
||||||
void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
|
void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
|
||||||
void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
|
void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
|
||||||
void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data);
|
void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data);
|
||||||
|
@ -204,4 +313,12 @@ void global_state_set_optional(void);
|
||||||
void savevm_skip_configuration(void);
|
void savevm_skip_configuration(void);
|
||||||
int global_state_store(void);
|
int global_state_store(void);
|
||||||
void global_state_store_running(void);
|
void global_state_store_running(void);
|
||||||
|
|
||||||
|
void flush_page_queue(MigrationState *ms);
|
||||||
|
int ram_save_queue_pages(MigrationState *ms, const char *rbname,
|
||||||
|
ram_addr_t start, ram_addr_t len);
|
||||||
|
|
||||||
|
PostcopyState postcopy_state_get(void);
|
||||||
|
/* Set the state and return the old state */
|
||||||
|
PostcopyState postcopy_state_set(PostcopyState new_state);
|
||||||
#endif
|
#endif
|
||||||
|
|
99
include/migration/postcopy-ram.h
Normal file
99
include/migration/postcopy-ram.h
Normal file
|
@ -0,0 +1,99 @@
|
||||||
|
/*
|
||||||
|
* Postcopy migration for RAM
|
||||||
|
*
|
||||||
|
* Copyright 2013 Red Hat, Inc. and/or its affiliates
|
||||||
|
*
|
||||||
|
* Authors:
|
||||||
|
* Dave Gilbert <dgilbert@redhat.com>
|
||||||
|
*
|
||||||
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||||
|
* See the COPYING file in the top-level directory.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#ifndef QEMU_POSTCOPY_RAM_H
|
||||||
|
#define QEMU_POSTCOPY_RAM_H
|
||||||
|
|
||||||
|
/* Return true if the host supports everything we need to do postcopy-ram */
|
||||||
|
bool postcopy_ram_supported_by_host(void);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make all of RAM sensitive to accesses to areas that haven't yet been written
|
||||||
|
* and wire up anything necessary to deal with it.
|
||||||
|
*/
|
||||||
|
int postcopy_ram_enable_notify(MigrationIncomingState *mis);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialise postcopy-ram, setting the RAM to a state where we can go into
|
||||||
|
* postcopy later; must be called prior to any precopy.
|
||||||
|
* called from ram.c's similarly named ram_postcopy_incoming_init
|
||||||
|
*/
|
||||||
|
int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At the end of a migration where postcopy_ram_incoming_init was called.
|
||||||
|
*/
|
||||||
|
int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Discard the contents of 'length' bytes from 'start'
|
||||||
|
* We can assume that if we've been called postcopy_ram_hosttest returned true
|
||||||
|
*/
|
||||||
|
int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
|
||||||
|
size_t length);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
|
||||||
|
* however leaving it until after precopy means that most of the precopy
|
||||||
|
* data is still THPd
|
||||||
|
*/
|
||||||
|
int postcopy_ram_prepare_discard(MigrationIncomingState *mis);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called at the start of each RAMBlock by the bitmap code.
|
||||||
|
* 'offset' is the bitmap offset of the named RAMBlock in the migration
|
||||||
|
* bitmap.
|
||||||
|
* Returns a new PDS
|
||||||
|
*/
|
||||||
|
PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
|
||||||
|
unsigned long offset,
|
||||||
|
const char *name);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called by the bitmap code for each chunk to discard.
|
||||||
|
* May send a discard message, may just leave it queued to
|
||||||
|
* be sent later.
|
||||||
|
* @start,@length: a range of pages in the migration bitmap in the
|
||||||
|
* RAM block passed to postcopy_discard_send_init() (length=1 is one page)
|
||||||
|
*/
|
||||||
|
void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
|
||||||
|
unsigned long start, unsigned long length);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called at the end of each RAMBlock by the bitmap code.
|
||||||
|
* Sends any outstanding discard messages, frees the PDS.
|
||||||
|
*/
|
||||||
|
void postcopy_discard_send_finish(MigrationState *ms,
|
||||||
|
PostcopyDiscardState *pds);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Place a page (from) at (host) efficiently
|
||||||
|
* There are restrictions on how 'from' must be mapped, in general best
|
||||||
|
* to use other postcopy_ routines to allocate.
|
||||||
|
* returns 0 on success
|
||||||
|
*/
|
||||||
|
int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Place a zero page at (host) atomically
|
||||||
|
* returns 0 on success
|
||||||
|
*/
|
||||||
|
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate a page of memory that can be mapped at a later point in time
|
||||||
|
* using postcopy_place_page
|
||||||
|
* Returns: Pointer to allocated page
|
||||||
|
*/
|
||||||
|
void *postcopy_get_tmp_page(MigrationIncomingState *mis);
|
||||||
|
|
||||||
|
#endif
|
|
@ -88,6 +88,11 @@ typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
|
||||||
size_t size,
|
size_t size,
|
||||||
uint64_t *bytes_sent);
|
uint64_t *bytes_sent);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return a QEMUFile for comms in the opposite direction
|
||||||
|
*/
|
||||||
|
typedef QEMUFile *(QEMURetPathFunc)(void *opaque);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Stop any read or write (depending on flags) on the underlying
|
* Stop any read or write (depending on flags) on the underlying
|
||||||
* transport on the QEMUFile.
|
* transport on the QEMUFile.
|
||||||
|
@ -106,6 +111,7 @@ typedef struct QEMUFileOps {
|
||||||
QEMURamHookFunc *after_ram_iterate;
|
QEMURamHookFunc *after_ram_iterate;
|
||||||
QEMURamHookFunc *hook_ram_load;
|
QEMURamHookFunc *hook_ram_load;
|
||||||
QEMURamSaveFunc *save_page;
|
QEMURamSaveFunc *save_page;
|
||||||
|
QEMURetPathFunc *get_return_path;
|
||||||
QEMUFileShutdownFunc *shut_down;
|
QEMUFileShutdownFunc *shut_down;
|
||||||
} QEMUFileOps;
|
} QEMUFileOps;
|
||||||
|
|
||||||
|
@ -163,9 +169,11 @@ void qemu_put_be32(QEMUFile *f, unsigned int v);
|
||||||
void qemu_put_be64(QEMUFile *f, uint64_t v);
|
void qemu_put_be64(QEMUFile *f, uint64_t v);
|
||||||
size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset);
|
size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset);
|
||||||
size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size);
|
size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size);
|
||||||
|
size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size);
|
||||||
ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size,
|
ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size,
|
||||||
int level);
|
int level);
|
||||||
int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src);
|
int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note that you can only peek continuous bytes from where the current pointer
|
* Note that you can only peek continuous bytes from where the current pointer
|
||||||
* is; you aren't guaranteed to be able to peak to +n bytes unless you've
|
* is; you aren't guaranteed to be able to peak to +n bytes unless you've
|
||||||
|
@ -194,7 +202,9 @@ int64_t qemu_file_get_rate_limit(QEMUFile *f);
|
||||||
int qemu_file_get_error(QEMUFile *f);
|
int qemu_file_get_error(QEMUFile *f);
|
||||||
void qemu_file_set_error(QEMUFile *f, int ret);
|
void qemu_file_set_error(QEMUFile *f, int ret);
|
||||||
int qemu_file_shutdown(QEMUFile *f);
|
int qemu_file_shutdown(QEMUFile *f);
|
||||||
|
QEMUFile *qemu_file_get_return_path(QEMUFile *f);
|
||||||
void qemu_fflush(QEMUFile *f);
|
void qemu_fflush(QEMUFile *f);
|
||||||
|
void qemu_file_set_blocking(QEMUFile *f, bool block);
|
||||||
|
|
||||||
static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
|
static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
|
||||||
{
|
{
|
||||||
|
|
|
@ -40,7 +40,8 @@ typedef struct SaveVMHandlers {
|
||||||
SaveStateHandler *save_state;
|
SaveStateHandler *save_state;
|
||||||
|
|
||||||
void (*cleanup)(void *opaque);
|
void (*cleanup)(void *opaque);
|
||||||
int (*save_live_complete)(QEMUFile *f, void *opaque);
|
int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque);
|
||||||
|
int (*save_live_complete_precopy)(QEMUFile *f, void *opaque);
|
||||||
|
|
||||||
/* This runs both outside and inside the iothread lock. */
|
/* This runs both outside and inside the iothread lock. */
|
||||||
bool (*is_active)(void *opaque);
|
bool (*is_active)(void *opaque);
|
||||||
|
@ -54,8 +55,9 @@ typedef struct SaveVMHandlers {
|
||||||
|
|
||||||
/* This runs outside the iothread lock! */
|
/* This runs outside the iothread lock! */
|
||||||
int (*save_live_setup)(QEMUFile *f, void *opaque);
|
int (*save_live_setup)(QEMUFile *f, void *opaque);
|
||||||
uint64_t (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size);
|
void (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size,
|
||||||
|
uint64_t *non_postcopiable_pending,
|
||||||
|
uint64_t *postcopiable_pending);
|
||||||
LoadStateHandler *load_state;
|
LoadStateHandler *load_state;
|
||||||
} SaveVMHandlers;
|
} SaveVMHandlers;
|
||||||
|
|
||||||
|
|
|
@ -499,5 +499,6 @@ size_t buffer_find_nonzero_offset(const void *buf, size_t len);
|
||||||
int parse_debug_env(const char *name, int max, int initial);
|
int parse_debug_env(const char *name, int max, int initial);
|
||||||
|
|
||||||
const char *qemu_ether_ntoa(const MACAddr *mac);
|
const char *qemu_ether_ntoa(const MACAddr *mac);
|
||||||
|
void page_size_init(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -139,6 +139,8 @@ void qemu_anon_ram_free(void *ptr, size_t size);
|
||||||
|
|
||||||
#if defined(CONFIG_MADVISE)
|
#if defined(CONFIG_MADVISE)
|
||||||
|
|
||||||
|
#include <sys/mman.h>
|
||||||
|
|
||||||
#define QEMU_MADV_WILLNEED MADV_WILLNEED
|
#define QEMU_MADV_WILLNEED MADV_WILLNEED
|
||||||
#define QEMU_MADV_DONTNEED MADV_DONTNEED
|
#define QEMU_MADV_DONTNEED MADV_DONTNEED
|
||||||
#ifdef MADV_DONTFORK
|
#ifdef MADV_DONTFORK
|
||||||
|
@ -171,6 +173,11 @@ void qemu_anon_ram_free(void *ptr, size_t size);
|
||||||
#else
|
#else
|
||||||
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
|
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef MADV_NOHUGEPAGE
|
||||||
|
#define QEMU_MADV_NOHUGEPAGE MADV_NOHUGEPAGE
|
||||||
|
#else
|
||||||
|
#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
|
||||||
|
#endif
|
||||||
|
|
||||||
#elif defined(CONFIG_POSIX_MADVISE)
|
#elif defined(CONFIG_POSIX_MADVISE)
|
||||||
|
|
||||||
|
@ -182,6 +189,7 @@ void qemu_anon_ram_free(void *ptr, size_t size);
|
||||||
#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
|
#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
|
||||||
#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
|
#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
|
||||||
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
|
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
|
||||||
|
#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
|
||||||
|
|
||||||
#else /* no-op */
|
#else /* no-op */
|
||||||
|
|
||||||
|
@ -193,6 +201,7 @@ void qemu_anon_ram_free(void *ptr, size_t size);
|
||||||
#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
|
#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
|
||||||
#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
|
#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
|
||||||
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
|
#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
|
||||||
|
#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,7 @@ typedef struct MemoryRegion MemoryRegion;
|
||||||
typedef struct MemoryRegionSection MemoryRegionSection;
|
typedef struct MemoryRegionSection MemoryRegionSection;
|
||||||
typedef struct MigrationIncomingState MigrationIncomingState;
|
typedef struct MigrationIncomingState MigrationIncomingState;
|
||||||
typedef struct MigrationParams MigrationParams;
|
typedef struct MigrationParams MigrationParams;
|
||||||
|
typedef struct MigrationState MigrationState;
|
||||||
typedef struct Monitor Monitor;
|
typedef struct Monitor Monitor;
|
||||||
typedef struct MouseTransformInfo MouseTransformInfo;
|
typedef struct MouseTransformInfo MouseTransformInfo;
|
||||||
typedef struct MSIMessage MSIMessage;
|
typedef struct MSIMessage MSIMessage;
|
||||||
|
@ -66,6 +67,7 @@ typedef struct PCMachineState PCMachineState;
|
||||||
typedef struct PCMachineClass PCMachineClass;
|
typedef struct PCMachineClass PCMachineClass;
|
||||||
typedef struct PCMCIACardState PCMCIACardState;
|
typedef struct PCMCIACardState PCMCIACardState;
|
||||||
typedef struct PixelFormat PixelFormat;
|
typedef struct PixelFormat PixelFormat;
|
||||||
|
typedef struct PostcopyDiscardState PostcopyDiscardState;
|
||||||
typedef struct PropertyInfo PropertyInfo;
|
typedef struct PropertyInfo PropertyInfo;
|
||||||
typedef struct Property Property;
|
typedef struct Property Property;
|
||||||
typedef struct QEMUBH QEMUBH;
|
typedef struct QEMUBH QEMUBH;
|
||||||
|
@ -79,6 +81,7 @@ typedef struct QEMUSizedBuffer QEMUSizedBuffer;
|
||||||
typedef struct QEMUTimerListGroup QEMUTimerListGroup;
|
typedef struct QEMUTimerListGroup QEMUTimerListGroup;
|
||||||
typedef struct QEMUTimer QEMUTimer;
|
typedef struct QEMUTimer QEMUTimer;
|
||||||
typedef struct Range Range;
|
typedef struct Range Range;
|
||||||
|
typedef struct RAMBlock RAMBlock;
|
||||||
typedef struct SerialState SerialState;
|
typedef struct SerialState SerialState;
|
||||||
typedef struct SHPCDevice SHPCDevice;
|
typedef struct SHPCDevice SHPCDevice;
|
||||||
typedef struct SMBusDevice SMBusDevice;
|
typedef struct SMBusDevice SMBusDevice;
|
||||||
|
|
|
@ -22,5 +22,7 @@ typedef void (QEMUBalloonStatus)(void *opaque, BalloonInfo *info);
|
||||||
int qemu_add_balloon_handler(QEMUBalloonEvent *event_func,
|
int qemu_add_balloon_handler(QEMUBalloonEvent *event_func,
|
||||||
QEMUBalloonStatus *stat_func, void *opaque);
|
QEMUBalloonStatus *stat_func, void *opaque);
|
||||||
void qemu_remove_balloon_handler(void *opaque);
|
void qemu_remove_balloon_handler(void *opaque);
|
||||||
|
bool qemu_balloon_is_inhibited(void);
|
||||||
|
void qemu_balloon_inhibit(bool state);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -70,6 +70,7 @@ void qemu_system_killed(int signal, pid_t pid);
|
||||||
void qemu_devices_reset(void);
|
void qemu_devices_reset(void);
|
||||||
void qemu_system_reset(bool report);
|
void qemu_system_reset(bool report);
|
||||||
void qemu_system_guest_panicked(void);
|
void qemu_system_guest_panicked(void);
|
||||||
|
size_t qemu_target_page_bits(void);
|
||||||
|
|
||||||
void qemu_add_exit_notifier(Notifier *notify);
|
void qemu_add_exit_notifier(Notifier *notify);
|
||||||
void qemu_remove_exit_notifier(Notifier *notify);
|
void qemu_remove_exit_notifier(Notifier *notify);
|
||||||
|
@ -83,14 +84,52 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict);
|
||||||
|
|
||||||
void qemu_announce_self(void);
|
void qemu_announce_self(void);
|
||||||
|
|
||||||
|
/* Subcommands for QEMU_VM_COMMAND */
|
||||||
|
enum qemu_vm_cmd {
|
||||||
|
MIG_CMD_INVALID = 0, /* Must be 0 */
|
||||||
|
MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */
|
||||||
|
MIG_CMD_PING, /* Request a PONG on the RP */
|
||||||
|
|
||||||
|
MIG_CMD_POSTCOPY_ADVISE, /* Prior to any page transfers, just
|
||||||
|
warn we might want to do PC */
|
||||||
|
MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming
|
||||||
|
pages as it's running. */
|
||||||
|
MIG_CMD_POSTCOPY_RUN, /* Start execution */
|
||||||
|
|
||||||
|
MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that
|
||||||
|
were previously sent during
|
||||||
|
precopy but are dirty. */
|
||||||
|
MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */
|
||||||
|
MIG_CMD_MAX
|
||||||
|
};
|
||||||
|
|
||||||
|
#define MAX_VM_CMD_PACKAGED_SIZE (1ul << 24)
|
||||||
|
|
||||||
bool qemu_savevm_state_blocked(Error **errp);
|
bool qemu_savevm_state_blocked(Error **errp);
|
||||||
void qemu_savevm_state_begin(QEMUFile *f,
|
void qemu_savevm_state_begin(QEMUFile *f,
|
||||||
const MigrationParams *params);
|
const MigrationParams *params);
|
||||||
void qemu_savevm_state_header(QEMUFile *f);
|
void qemu_savevm_state_header(QEMUFile *f);
|
||||||
int qemu_savevm_state_iterate(QEMUFile *f);
|
int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy);
|
||||||
void qemu_savevm_state_complete(QEMUFile *f);
|
|
||||||
void qemu_savevm_state_cleanup(void);
|
void qemu_savevm_state_cleanup(void);
|
||||||
uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size);
|
void qemu_savevm_state_complete_postcopy(QEMUFile *f);
|
||||||
|
void qemu_savevm_state_complete_precopy(QEMUFile *f);
|
||||||
|
void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
|
||||||
|
uint64_t *res_non_postcopiable,
|
||||||
|
uint64_t *res_postcopiable);
|
||||||
|
void qemu_savevm_command_send(QEMUFile *f, enum qemu_vm_cmd command,
|
||||||
|
uint16_t len, uint8_t *data);
|
||||||
|
void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
|
||||||
|
void qemu_savevm_send_open_return_path(QEMUFile *f);
|
||||||
|
int qemu_savevm_send_packaged(QEMUFile *f, const QEMUSizedBuffer *qsb);
|
||||||
|
void qemu_savevm_send_postcopy_advise(QEMUFile *f);
|
||||||
|
void qemu_savevm_send_postcopy_listen(QEMUFile *f);
|
||||||
|
void qemu_savevm_send_postcopy_run(QEMUFile *f);
|
||||||
|
|
||||||
|
void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
|
||||||
|
uint16_t len,
|
||||||
|
uint64_t *start_list,
|
||||||
|
uint64_t *length_list);
|
||||||
|
|
||||||
int qemu_loadvm_state(QEMUFile *f);
|
int qemu_loadvm_state(QEMUFile *f);
|
||||||
|
|
||||||
typedef enum DisplayType
|
typedef enum DisplayType
|
||||||
|
@ -133,6 +172,7 @@ extern int boot_menu;
|
||||||
extern bool boot_strict;
|
extern bool boot_strict;
|
||||||
extern uint8_t *boot_splash_filedata;
|
extern uint8_t *boot_splash_filedata;
|
||||||
extern size_t boot_splash_filedata_size;
|
extern size_t boot_splash_filedata_size;
|
||||||
|
extern bool enable_mlock;
|
||||||
extern uint8_t qemu_extra_params_fw[2];
|
extern uint8_t qemu_extra_params_fw[2];
|
||||||
extern QEMUClockType rtc_clock;
|
extern QEMUClockType rtc_clock;
|
||||||
extern const char *mem_path;
|
extern const char *mem_path;
|
||||||
|
|
|
@ -1461,7 +1461,6 @@ static int kvm_init(MachineState *ms)
|
||||||
* page size for the system though.
|
* page size for the system though.
|
||||||
*/
|
*/
|
||||||
assert(TARGET_PAGE_SIZE <= getpagesize());
|
assert(TARGET_PAGE_SIZE <= getpagesize());
|
||||||
page_size_init();
|
|
||||||
|
|
||||||
s->sigmask_len = 8;
|
s->sigmask_len = 8;
|
||||||
|
|
||||||
|
|
167
linux-headers/linux/userfaultfd.h
Normal file
167
linux-headers/linux/userfaultfd.h
Normal file
|
@ -0,0 +1,167 @@
|
||||||
|
/*
|
||||||
|
* include/linux/userfaultfd.h
|
||||||
|
*
|
||||||
|
* Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
|
||||||
|
* Copyright (C) 2015 Red Hat, Inc.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _LINUX_USERFAULTFD_H
|
||||||
|
#define _LINUX_USERFAULTFD_H
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
#define UFFD_API ((__u64)0xAA)
|
||||||
|
/*
|
||||||
|
* After implementing the respective features it will become:
|
||||||
|
* #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
|
||||||
|
* UFFD_FEATURE_EVENT_FORK)
|
||||||
|
*/
|
||||||
|
#define UFFD_API_FEATURES (0)
|
||||||
|
#define UFFD_API_IOCTLS \
|
||||||
|
((__u64)1 << _UFFDIO_REGISTER | \
|
||||||
|
(__u64)1 << _UFFDIO_UNREGISTER | \
|
||||||
|
(__u64)1 << _UFFDIO_API)
|
||||||
|
#define UFFD_API_RANGE_IOCTLS \
|
||||||
|
((__u64)1 << _UFFDIO_WAKE | \
|
||||||
|
(__u64)1 << _UFFDIO_COPY | \
|
||||||
|
(__u64)1 << _UFFDIO_ZEROPAGE)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Valid ioctl command number range with this API is from 0x00 to
|
||||||
|
* 0x3F. UFFDIO_API is the fixed number, everything else can be
|
||||||
|
* changed by implementing a different UFFD_API. If sticking to the
|
||||||
|
* same UFFD_API more ioctl can be added and userland will be aware of
|
||||||
|
* which ioctl the running kernel implements through the ioctl command
|
||||||
|
* bitmask written by the UFFDIO_API.
|
||||||
|
*/
|
||||||
|
#define _UFFDIO_REGISTER (0x00)
|
||||||
|
#define _UFFDIO_UNREGISTER (0x01)
|
||||||
|
#define _UFFDIO_WAKE (0x02)
|
||||||
|
#define _UFFDIO_COPY (0x03)
|
||||||
|
#define _UFFDIO_ZEROPAGE (0x04)
|
||||||
|
#define _UFFDIO_API (0x3F)
|
||||||
|
|
||||||
|
/* userfaultfd ioctl ids */
|
||||||
|
#define UFFDIO 0xAA
|
||||||
|
#define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \
|
||||||
|
struct uffdio_api)
|
||||||
|
#define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \
|
||||||
|
struct uffdio_register)
|
||||||
|
#define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \
|
||||||
|
struct uffdio_range)
|
||||||
|
#define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \
|
||||||
|
struct uffdio_range)
|
||||||
|
#define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \
|
||||||
|
struct uffdio_copy)
|
||||||
|
#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \
|
||||||
|
struct uffdio_zeropage)
|
||||||
|
|
||||||
|
/* read() structure */
|
||||||
|
struct uffd_msg {
|
||||||
|
__u8 event;
|
||||||
|
|
||||||
|
__u8 reserved1;
|
||||||
|
__u16 reserved2;
|
||||||
|
__u32 reserved3;
|
||||||
|
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
__u64 flags;
|
||||||
|
__u64 address;
|
||||||
|
} pagefault;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
/* unused reserved fields */
|
||||||
|
__u64 reserved1;
|
||||||
|
__u64 reserved2;
|
||||||
|
__u64 reserved3;
|
||||||
|
} reserved;
|
||||||
|
} arg;
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start at 0x12 and not at 0 to be more strict against bugs.
|
||||||
|
*/
|
||||||
|
#define UFFD_EVENT_PAGEFAULT 0x12
|
||||||
|
#if 0 /* not available yet */
|
||||||
|
#define UFFD_EVENT_FORK 0x13
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* flags for UFFD_EVENT_PAGEFAULT */
|
||||||
|
#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
|
||||||
|
#define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */
|
||||||
|
|
||||||
|
struct uffdio_api {
|
||||||
|
/* userland asks for an API number and the features to enable */
|
||||||
|
__u64 api;
|
||||||
|
/*
|
||||||
|
* Kernel answers below with the all available features for
|
||||||
|
* the API, this notifies userland of which events and/or
|
||||||
|
* which flags for each event are enabled in the current
|
||||||
|
* kernel.
|
||||||
|
*
|
||||||
|
* Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
|
||||||
|
* are to be considered implicitly always enabled in all kernels as
|
||||||
|
* long as the uffdio_api.api requested matches UFFD_API.
|
||||||
|
*/
|
||||||
|
#if 0 /* not available yet */
|
||||||
|
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
|
||||||
|
#define UFFD_FEATURE_EVENT_FORK (1<<1)
|
||||||
|
#endif
|
||||||
|
__u64 features;
|
||||||
|
|
||||||
|
__u64 ioctls;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct uffdio_range {
|
||||||
|
__u64 start;
|
||||||
|
__u64 len;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct uffdio_register {
|
||||||
|
struct uffdio_range range;
|
||||||
|
#define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0)
|
||||||
|
#define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1)
|
||||||
|
__u64 mode;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* kernel answers which ioctl commands are available for the
|
||||||
|
* range, keep at the end as the last 8 bytes aren't read.
|
||||||
|
*/
|
||||||
|
__u64 ioctls;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct uffdio_copy {
|
||||||
|
__u64 dst;
|
||||||
|
__u64 src;
|
||||||
|
__u64 len;
|
||||||
|
/*
|
||||||
|
* There will be a wrprotection flag later that allows to map
|
||||||
|
* pages wrprotected on the fly. And such a flag will be
|
||||||
|
* available if the wrprotection ioctl are implemented for the
|
||||||
|
* range according to the uffdio_register.ioctls.
|
||||||
|
*/
|
||||||
|
#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0)
|
||||||
|
__u64 mode;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* "copy" is written by the ioctl and must be at the end: the
|
||||||
|
* copy_from_user will not read the last 8 bytes.
|
||||||
|
*/
|
||||||
|
__s64 copy;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct uffdio_zeropage {
|
||||||
|
struct uffdio_range range;
|
||||||
|
#define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0)
|
||||||
|
__u64 mode;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* "zeropage" is written by the ioctl and must be at the end:
|
||||||
|
* the copy_from_user will not read the last 8 bytes.
|
||||||
|
*/
|
||||||
|
__s64 zeropage;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* _LINUX_USERFAULTFD_H */
|
|
@ -1,7 +1,7 @@
|
||||||
common-obj-y += migration.o tcp.o
|
common-obj-y += migration.o tcp.o
|
||||||
common-obj-y += vmstate.o
|
common-obj-y += vmstate.o
|
||||||
common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o qemu-file-stdio.o
|
common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o qemu-file-stdio.o
|
||||||
common-obj-y += xbzrle.o
|
common-obj-y += xbzrle.o postcopy-ram.o
|
||||||
|
|
||||||
common-obj-$(CONFIG_RDMA) += rdma.o
|
common-obj-$(CONFIG_RDMA) += rdma.o
|
||||||
common-obj-$(CONFIG_POSIX) += exec.o unix.o fd.o
|
common-obj-$(CONFIG_POSIX) += exec.o unix.o fd.o
|
||||||
|
|
|
@ -748,7 +748,9 @@ static int block_save_complete(QEMUFile *f, void *opaque)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
|
static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
|
||||||
|
uint64_t *non_postcopiable_pending,
|
||||||
|
uint64_t *postcopiable_pending)
|
||||||
{
|
{
|
||||||
/* Estimate pending number of bytes to send */
|
/* Estimate pending number of bytes to send */
|
||||||
uint64_t pending;
|
uint64_t pending;
|
||||||
|
@ -767,7 +769,8 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
|
||||||
qemu_mutex_unlock_iothread();
|
qemu_mutex_unlock_iothread();
|
||||||
|
|
||||||
DPRINTF("Enter save live pending %" PRIu64 "\n", pending);
|
DPRINTF("Enter save live pending %" PRIu64 "\n", pending);
|
||||||
return pending;
|
/* We don't do postcopy */
|
||||||
|
*non_postcopiable_pending += pending;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int block_load(QEMUFile *f, void *opaque, int version_id)
|
static int block_load(QEMUFile *f, void *opaque, int version_id)
|
||||||
|
@ -876,7 +879,7 @@ static SaveVMHandlers savevm_block_handlers = {
|
||||||
.set_params = block_set_params,
|
.set_params = block_set_params,
|
||||||
.save_live_setup = block_save_setup,
|
.save_live_setup = block_save_setup,
|
||||||
.save_live_iterate = block_save_iterate,
|
.save_live_iterate = block_save_iterate,
|
||||||
.save_live_complete = block_save_complete,
|
.save_live_complete_precopy = block_save_complete,
|
||||||
.save_live_pending = block_save_pending,
|
.save_live_pending = block_save_pending,
|
||||||
.load_state = block_load,
|
.load_state = block_load,
|
||||||
.cleanup = block_migration_cleanup,
|
.cleanup = block_migration_cleanup,
|
||||||
|
|
|
@ -21,15 +21,18 @@
|
||||||
#include "sysemu/sysemu.h"
|
#include "sysemu/sysemu.h"
|
||||||
#include "block/block.h"
|
#include "block/block.h"
|
||||||
#include "qapi/qmp/qerror.h"
|
#include "qapi/qmp/qerror.h"
|
||||||
|
#include "qapi/util.h"
|
||||||
#include "qemu/sockets.h"
|
#include "qemu/sockets.h"
|
||||||
#include "qemu/rcu.h"
|
#include "qemu/rcu.h"
|
||||||
#include "migration/block.h"
|
#include "migration/block.h"
|
||||||
|
#include "migration/postcopy-ram.h"
|
||||||
#include "qemu/thread.h"
|
#include "qemu/thread.h"
|
||||||
#include "qmp-commands.h"
|
#include "qmp-commands.h"
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
#include "qapi/util.h"
|
|
||||||
#include "qapi-event.h"
|
#include "qapi-event.h"
|
||||||
#include "qom/cpu.h"
|
#include "qom/cpu.h"
|
||||||
|
#include "exec/memory.h"
|
||||||
|
#include "exec/address-spaces.h"
|
||||||
|
|
||||||
#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */
|
#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */
|
||||||
|
|
||||||
|
@ -57,6 +60,13 @@ static NotifierList migration_state_notifiers =
|
||||||
|
|
||||||
static bool deferred_incoming;
|
static bool deferred_incoming;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Current state of incoming postcopy; note this is not part of
|
||||||
|
* MigrationIncomingState since it's state is used during cleanup
|
||||||
|
* at the end as MIS is being freed.
|
||||||
|
*/
|
||||||
|
static PostcopyState incoming_postcopy_state;
|
||||||
|
|
||||||
/* When we add fault tolerance, we could have several
|
/* When we add fault tolerance, we could have several
|
||||||
migrations at once. For now we don't need to add
|
migrations at once. For now we don't need to add
|
||||||
dynamic creation of migration */
|
dynamic creation of migration */
|
||||||
|
@ -64,6 +74,7 @@ static bool deferred_incoming;
|
||||||
/* For outgoing */
|
/* For outgoing */
|
||||||
MigrationState *migrate_get_current(void)
|
MigrationState *migrate_get_current(void)
|
||||||
{
|
{
|
||||||
|
static bool once;
|
||||||
static MigrationState current_migration = {
|
static MigrationState current_migration = {
|
||||||
.state = MIGRATION_STATUS_NONE,
|
.state = MIGRATION_STATUS_NONE,
|
||||||
.bandwidth_limit = MAX_THROTTLE,
|
.bandwidth_limit = MAX_THROTTLE,
|
||||||
|
@ -81,6 +92,10 @@ MigrationState *migrate_get_current(void)
|
||||||
DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT,
|
DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (!once) {
|
||||||
|
qemu_mutex_init(¤t_migration.src_page_req_mutex);
|
||||||
|
once = true;
|
||||||
|
}
|
||||||
return ¤t_migration;
|
return ¤t_migration;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -95,14 +110,17 @@ MigrationIncomingState *migration_incoming_get_current(void)
|
||||||
MigrationIncomingState *migration_incoming_state_new(QEMUFile* f)
|
MigrationIncomingState *migration_incoming_state_new(QEMUFile* f)
|
||||||
{
|
{
|
||||||
mis_current = g_new0(MigrationIncomingState, 1);
|
mis_current = g_new0(MigrationIncomingState, 1);
|
||||||
mis_current->file = f;
|
mis_current->from_src_file = f;
|
||||||
QLIST_INIT(&mis_current->loadvm_handlers);
|
QLIST_INIT(&mis_current->loadvm_handlers);
|
||||||
|
qemu_mutex_init(&mis_current->rp_mutex);
|
||||||
|
qemu_event_init(&mis_current->main_thread_load_event, false);
|
||||||
|
|
||||||
return mis_current;
|
return mis_current;
|
||||||
}
|
}
|
||||||
|
|
||||||
void migration_incoming_state_destroy(void)
|
void migration_incoming_state_destroy(void)
|
||||||
{
|
{
|
||||||
|
qemu_event_destroy(&mis_current->main_thread_load_event);
|
||||||
loadvm_free_handlers(mis_current);
|
loadvm_free_handlers(mis_current);
|
||||||
g_free(mis_current);
|
g_free(mis_current);
|
||||||
mis_current = NULL;
|
mis_current = NULL;
|
||||||
|
@ -248,6 +266,35 @@ static void deferred_incoming_migration(Error **errp)
|
||||||
deferred_incoming = true;
|
deferred_incoming = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Request a range of pages from the source VM at the given
|
||||||
|
* start address.
|
||||||
|
* rbname: Name of the RAMBlock to request the page in, if NULL it's the same
|
||||||
|
* as the last request (a name must have been given previously)
|
||||||
|
* Start: Address offset within the RB
|
||||||
|
* Len: Length in bytes required - must be a multiple of pagesize
|
||||||
|
*/
|
||||||
|
void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
|
||||||
|
ram_addr_t start, size_t len)
|
||||||
|
{
|
||||||
|
uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname upto 256 */
|
||||||
|
size_t msglen = 12; /* start + len */
|
||||||
|
|
||||||
|
*(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
|
||||||
|
*(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
|
||||||
|
|
||||||
|
if (rbname) {
|
||||||
|
int rbname_len = strlen(rbname);
|
||||||
|
assert(rbname_len < 256);
|
||||||
|
|
||||||
|
bufc[msglen++] = rbname_len;
|
||||||
|
memcpy(bufc + msglen, rbname, rbname_len);
|
||||||
|
msglen += rbname_len;
|
||||||
|
migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES_ID, msglen, bufc);
|
||||||
|
} else {
|
||||||
|
migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES, msglen, bufc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void qemu_start_incoming_migration(const char *uri, Error **errp)
|
void qemu_start_incoming_migration(const char *uri, Error **errp)
|
||||||
{
|
{
|
||||||
const char *p;
|
const char *p;
|
||||||
|
@ -278,12 +325,37 @@ static void process_incoming_migration_co(void *opaque)
|
||||||
{
|
{
|
||||||
QEMUFile *f = opaque;
|
QEMUFile *f = opaque;
|
||||||
Error *local_err = NULL;
|
Error *local_err = NULL;
|
||||||
|
MigrationIncomingState *mis;
|
||||||
|
PostcopyState ps;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
migration_incoming_state_new(f);
|
mis = migration_incoming_state_new(f);
|
||||||
|
postcopy_state_set(POSTCOPY_INCOMING_NONE);
|
||||||
migrate_generate_event(MIGRATION_STATUS_ACTIVE);
|
migrate_generate_event(MIGRATION_STATUS_ACTIVE);
|
||||||
|
|
||||||
ret = qemu_loadvm_state(f);
|
ret = qemu_loadvm_state(f);
|
||||||
|
|
||||||
|
ps = postcopy_state_get();
|
||||||
|
trace_process_incoming_migration_co_end(ret, ps);
|
||||||
|
if (ps != POSTCOPY_INCOMING_NONE) {
|
||||||
|
if (ps == POSTCOPY_INCOMING_ADVISE) {
|
||||||
|
/*
|
||||||
|
* Where a migration had postcopy enabled (and thus went to advise)
|
||||||
|
* but managed to complete within the precopy period, we can use
|
||||||
|
* the normal exit.
|
||||||
|
*/
|
||||||
|
postcopy_ram_incoming_cleanup(mis);
|
||||||
|
} else if (ret >= 0) {
|
||||||
|
/*
|
||||||
|
* Postcopy was started, cleanup should happen at the end of the
|
||||||
|
* postcopy thread.
|
||||||
|
*/
|
||||||
|
trace_process_incoming_migration_co_postcopy_end_main();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
/* Else if something went wrong then just fall out of the normal exit */
|
||||||
|
}
|
||||||
|
|
||||||
qemu_fclose(f);
|
qemu_fclose(f);
|
||||||
free_xbzrle_decoded_buf();
|
free_xbzrle_decoded_buf();
|
||||||
migration_incoming_state_destroy();
|
migration_incoming_state_destroy();
|
||||||
|
@ -344,6 +416,50 @@ void process_incoming_migration(QEMUFile *f)
|
||||||
qemu_coroutine_enter(co, f);
|
qemu_coroutine_enter(co, f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Send a message on the return channel back to the source
|
||||||
|
* of the migration.
|
||||||
|
*/
|
||||||
|
void migrate_send_rp_message(MigrationIncomingState *mis,
|
||||||
|
enum mig_rp_message_type message_type,
|
||||||
|
uint16_t len, void *data)
|
||||||
|
{
|
||||||
|
trace_migrate_send_rp_message((int)message_type, len);
|
||||||
|
qemu_mutex_lock(&mis->rp_mutex);
|
||||||
|
qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
|
||||||
|
qemu_put_be16(mis->to_src_file, len);
|
||||||
|
qemu_put_buffer(mis->to_src_file, data, len);
|
||||||
|
qemu_fflush(mis->to_src_file);
|
||||||
|
qemu_mutex_unlock(&mis->rp_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Send a 'SHUT' message on the return channel with the given value
|
||||||
|
* to indicate that we've finished with the RP. Non-0 value indicates
|
||||||
|
* error.
|
||||||
|
*/
|
||||||
|
void migrate_send_rp_shut(MigrationIncomingState *mis,
|
||||||
|
uint32_t value)
|
||||||
|
{
|
||||||
|
uint32_t buf;
|
||||||
|
|
||||||
|
buf = cpu_to_be32(value);
|
||||||
|
migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Send a 'PONG' message on the return channel with the given value
|
||||||
|
* (normally in response to a 'PING')
|
||||||
|
*/
|
||||||
|
void migrate_send_rp_pong(MigrationIncomingState *mis,
|
||||||
|
uint32_t value)
|
||||||
|
{
|
||||||
|
uint32_t buf;
|
||||||
|
|
||||||
|
buf = cpu_to_be32(value);
|
||||||
|
migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
|
||||||
|
}
|
||||||
|
|
||||||
/* amount of nanoseconds we are willing to wait for migration to be down.
|
/* amount of nanoseconds we are willing to wait for migration to be down.
|
||||||
* the choice of nanoseconds is because it is the maximum resolution that
|
* the choice of nanoseconds is because it is the maximum resolution that
|
||||||
* get_clock() can achieve. It is an internal measure. All user-visible
|
* get_clock() can achieve. It is an internal measure. All user-visible
|
||||||
|
@ -399,6 +515,24 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
|
||||||
return params;
|
return params;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return true if we're already in the middle of a migration
|
||||||
|
* (i.e. any of the active or setup states)
|
||||||
|
*/
|
||||||
|
static bool migration_is_setup_or_active(int state)
|
||||||
|
{
|
||||||
|
switch (state) {
|
||||||
|
case MIGRATION_STATUS_ACTIVE:
|
||||||
|
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
|
||||||
|
case MIGRATION_STATUS_SETUP:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void get_xbzrle_cache_stats(MigrationInfo *info)
|
static void get_xbzrle_cache_stats(MigrationInfo *info)
|
||||||
{
|
{
|
||||||
if (migrate_use_xbzrle()) {
|
if (migrate_use_xbzrle()) {
|
||||||
|
@ -463,6 +597,39 @@ MigrationInfo *qmp_query_migrate(Error **errp)
|
||||||
info->x_cpu_throttle_percentage = cpu_throttle_get_percentage();
|
info->x_cpu_throttle_percentage = cpu_throttle_get_percentage();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
get_xbzrle_cache_stats(info);
|
||||||
|
break;
|
||||||
|
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
|
||||||
|
/* Mostly the same as active; TODO add some postcopy stats */
|
||||||
|
info->has_status = true;
|
||||||
|
info->has_total_time = true;
|
||||||
|
info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
|
||||||
|
- s->total_time;
|
||||||
|
info->has_expected_downtime = true;
|
||||||
|
info->expected_downtime = s->expected_downtime;
|
||||||
|
info->has_setup_time = true;
|
||||||
|
info->setup_time = s->setup_time;
|
||||||
|
|
||||||
|
info->has_ram = true;
|
||||||
|
info->ram = g_malloc0(sizeof(*info->ram));
|
||||||
|
info->ram->transferred = ram_bytes_transferred();
|
||||||
|
info->ram->remaining = ram_bytes_remaining();
|
||||||
|
info->ram->total = ram_bytes_total();
|
||||||
|
info->ram->duplicate = dup_mig_pages_transferred();
|
||||||
|
info->ram->skipped = skipped_mig_pages_transferred();
|
||||||
|
info->ram->normal = norm_mig_pages_transferred();
|
||||||
|
info->ram->normal_bytes = norm_mig_bytes_transferred();
|
||||||
|
info->ram->dirty_pages_rate = s->dirty_pages_rate;
|
||||||
|
info->ram->mbps = s->mbps;
|
||||||
|
|
||||||
|
if (blk_mig_active()) {
|
||||||
|
info->has_disk = true;
|
||||||
|
info->disk = g_malloc0(sizeof(*info->disk));
|
||||||
|
info->disk->transferred = blk_mig_bytes_transferred();
|
||||||
|
info->disk->remaining = blk_mig_bytes_remaining();
|
||||||
|
info->disk->total = blk_mig_bytes_total();
|
||||||
|
}
|
||||||
|
|
||||||
get_xbzrle_cache_stats(info);
|
get_xbzrle_cache_stats(info);
|
||||||
break;
|
break;
|
||||||
case MIGRATION_STATUS_COMPLETED:
|
case MIGRATION_STATUS_COMPLETED:
|
||||||
|
@ -506,8 +673,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
|
||||||
MigrationState *s = migrate_get_current();
|
MigrationState *s = migrate_get_current();
|
||||||
MigrationCapabilityStatusList *cap;
|
MigrationCapabilityStatusList *cap;
|
||||||
|
|
||||||
if (s->state == MIGRATION_STATUS_ACTIVE ||
|
if (migration_is_setup_or_active(s->state)) {
|
||||||
s->state == MIGRATION_STATUS_SETUP) {
|
|
||||||
error_setg(errp, QERR_MIGRATION_ACTIVE);
|
error_setg(errp, QERR_MIGRATION_ACTIVE);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -515,6 +681,20 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
|
||||||
for (cap = params; cap; cap = cap->next) {
|
for (cap = params; cap; cap = cap->next) {
|
||||||
s->enabled_capabilities[cap->value->capability] = cap->value->state;
|
s->enabled_capabilities[cap->value->capability] = cap->value->state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (migrate_postcopy_ram()) {
|
||||||
|
if (migrate_use_compression()) {
|
||||||
|
/* The decompression threads asynchronously write into RAM
|
||||||
|
* rather than use the atomic copies needed to avoid
|
||||||
|
* userfaulting. It should be possible to fix the decompression
|
||||||
|
* threads for compatibility in future.
|
||||||
|
*/
|
||||||
|
error_report("Postcopy is not currently compatible with "
|
||||||
|
"compression");
|
||||||
|
s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM] =
|
||||||
|
false;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void qmp_migrate_set_parameters(bool has_compress_level,
|
void qmp_migrate_set_parameters(bool has_compress_level,
|
||||||
|
@ -583,6 +763,28 @@ void qmp_migrate_set_parameters(bool has_compress_level,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void qmp_migrate_start_postcopy(Error **errp)
|
||||||
|
{
|
||||||
|
MigrationState *s = migrate_get_current();
|
||||||
|
|
||||||
|
if (!migrate_postcopy_ram()) {
|
||||||
|
error_setg(errp, "Enable postcopy with migration_set_capability before"
|
||||||
|
" the start of migration");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s->state == MIGRATION_STATUS_NONE) {
|
||||||
|
error_setg(errp, "Postcopy must be started after migration has been"
|
||||||
|
" started");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* we don't error if migration has finished since that would be racy
|
||||||
|
* with issuing this command.
|
||||||
|
*/
|
||||||
|
atomic_set(&s->start_postcopy, true);
|
||||||
|
}
|
||||||
|
|
||||||
/* shared migration helpers */
|
/* shared migration helpers */
|
||||||
|
|
||||||
static void migrate_set_state(MigrationState *s, int old_state, int new_state)
|
static void migrate_set_state(MigrationState *s, int old_state, int new_state)
|
||||||
|
@ -600,10 +802,15 @@ static void migrate_fd_cleanup(void *opaque)
|
||||||
qemu_bh_delete(s->cleanup_bh);
|
qemu_bh_delete(s->cleanup_bh);
|
||||||
s->cleanup_bh = NULL;
|
s->cleanup_bh = NULL;
|
||||||
|
|
||||||
|
flush_page_queue(s);
|
||||||
|
|
||||||
if (s->file) {
|
if (s->file) {
|
||||||
trace_migrate_fd_cleanup();
|
trace_migrate_fd_cleanup();
|
||||||
qemu_mutex_unlock_iothread();
|
qemu_mutex_unlock_iothread();
|
||||||
|
if (s->migration_thread_running) {
|
||||||
qemu_thread_join(&s->thread);
|
qemu_thread_join(&s->thread);
|
||||||
|
s->migration_thread_running = false;
|
||||||
|
}
|
||||||
qemu_mutex_lock_iothread();
|
qemu_mutex_lock_iothread();
|
||||||
|
|
||||||
migrate_compress_threads_join();
|
migrate_compress_threads_join();
|
||||||
|
@ -611,7 +818,8 @@ static void migrate_fd_cleanup(void *opaque)
|
||||||
s->file = NULL;
|
s->file = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(s->state != MIGRATION_STATUS_ACTIVE);
|
assert((s->state != MIGRATION_STATUS_ACTIVE) &&
|
||||||
|
(s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE));
|
||||||
|
|
||||||
if (s->state == MIGRATION_STATUS_CANCELLING) {
|
if (s->state == MIGRATION_STATUS_CANCELLING) {
|
||||||
migrate_set_state(s, MIGRATION_STATUS_CANCELLING,
|
migrate_set_state(s, MIGRATION_STATUS_CANCELLING,
|
||||||
|
@ -635,10 +843,14 @@ static void migrate_fd_cancel(MigrationState *s)
|
||||||
QEMUFile *f = migrate_get_current()->file;
|
QEMUFile *f = migrate_get_current()->file;
|
||||||
trace_migrate_fd_cancel();
|
trace_migrate_fd_cancel();
|
||||||
|
|
||||||
|
if (s->rp_state.from_dst_file) {
|
||||||
|
/* shutdown the rp socket, so causing the rp thread to shutdown */
|
||||||
|
qemu_file_shutdown(s->rp_state.from_dst_file);
|
||||||
|
}
|
||||||
|
|
||||||
do {
|
do {
|
||||||
old_state = s->state;
|
old_state = s->state;
|
||||||
if (old_state != MIGRATION_STATUS_SETUP &&
|
if (!migration_is_setup_or_active(old_state)) {
|
||||||
old_state != MIGRATION_STATUS_ACTIVE) {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
migrate_set_state(s, old_state, MIGRATION_STATUS_CANCELLING);
|
migrate_set_state(s, old_state, MIGRATION_STATUS_CANCELLING);
|
||||||
|
@ -682,7 +894,12 @@ bool migration_has_failed(MigrationState *s)
|
||||||
s->state == MIGRATION_STATUS_FAILED);
|
s->state == MIGRATION_STATUS_FAILED);
|
||||||
}
|
}
|
||||||
|
|
||||||
static MigrationState *migrate_init(const MigrationParams *params)
|
bool migration_in_postcopy(MigrationState *s)
|
||||||
|
{
|
||||||
|
return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
|
||||||
|
}
|
||||||
|
|
||||||
|
MigrationState *migrate_init(const MigrationParams *params)
|
||||||
{
|
{
|
||||||
MigrationState *s = migrate_get_current();
|
MigrationState *s = migrate_get_current();
|
||||||
int64_t bandwidth_limit = s->bandwidth_limit;
|
int64_t bandwidth_limit = s->bandwidth_limit;
|
||||||
|
@ -719,6 +936,8 @@ static MigrationState *migrate_init(const MigrationParams *params)
|
||||||
s->bandwidth_limit = bandwidth_limit;
|
s->bandwidth_limit = bandwidth_limit;
|
||||||
migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
|
migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
|
||||||
|
|
||||||
|
QSIMPLEQ_INIT(&s->src_page_requests);
|
||||||
|
|
||||||
s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
@ -770,8 +989,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
|
||||||
params.blk = has_blk && blk;
|
params.blk = has_blk && blk;
|
||||||
params.shared = has_inc && inc;
|
params.shared = has_inc && inc;
|
||||||
|
|
||||||
if (s->state == MIGRATION_STATUS_ACTIVE ||
|
if (migration_is_setup_or_active(s->state) ||
|
||||||
s->state == MIGRATION_STATUS_SETUP ||
|
|
||||||
s->state == MIGRATION_STATUS_CANCELLING) {
|
s->state == MIGRATION_STATUS_CANCELLING) {
|
||||||
error_setg(errp, QERR_MIGRATION_ACTIVE);
|
error_setg(errp, QERR_MIGRATION_ACTIVE);
|
||||||
return;
|
return;
|
||||||
|
@ -890,6 +1108,15 @@ void qmp_migrate_set_downtime(double value, Error **errp)
|
||||||
max_downtime = (uint64_t)value;
|
max_downtime = (uint64_t)value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool migrate_postcopy_ram(void)
|
||||||
|
{
|
||||||
|
MigrationState *s;
|
||||||
|
|
||||||
|
s = migrate_get_current();
|
||||||
|
|
||||||
|
return s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM];
|
||||||
|
}
|
||||||
|
|
||||||
bool migrate_auto_converge(void)
|
bool migrate_auto_converge(void)
|
||||||
{
|
{
|
||||||
MigrationState *s;
|
MigrationState *s;
|
||||||
|
@ -971,30 +1198,347 @@ int64_t migrate_xbzrle_cache_size(void)
|
||||||
return s->xbzrle_cache_size;
|
return s->xbzrle_cache_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* migration thread support */
|
||||||
|
/*
|
||||||
|
* Something bad happened to the RP stream, mark an error
|
||||||
|
* The caller shall print or trace something to indicate why
|
||||||
|
*/
|
||||||
|
static void mark_source_rp_bad(MigrationState *s)
|
||||||
|
{
|
||||||
|
s->rp_state.error = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct rp_cmd_args {
|
||||||
|
ssize_t len; /* -1 = variable */
|
||||||
|
const char *name;
|
||||||
|
} rp_cmd_args[] = {
|
||||||
|
[MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" },
|
||||||
|
[MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" },
|
||||||
|
[MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" },
|
||||||
|
[MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" },
|
||||||
|
[MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" },
|
||||||
|
[MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" },
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Process a request for pages received on the return path,
|
||||||
|
* We're allowed to send more than requested (e.g. to round to our page size)
|
||||||
|
* and we don't need to send pages that have already been sent.
|
||||||
|
*/
|
||||||
|
static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
|
||||||
|
ram_addr_t start, size_t len)
|
||||||
|
{
|
||||||
|
long our_host_ps = getpagesize();
|
||||||
|
|
||||||
|
trace_migrate_handle_rp_req_pages(rbname, start, len);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Since we currently insist on matching page sizes, just sanity check
|
||||||
|
* we're being asked for whole host pages.
|
||||||
|
*/
|
||||||
|
if (start & (our_host_ps-1) ||
|
||||||
|
(len & (our_host_ps-1))) {
|
||||||
|
error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
|
||||||
|
" len: %zd", __func__, start, len);
|
||||||
|
mark_source_rp_bad(ms);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ram_save_queue_pages(ms, rbname, start, len)) {
|
||||||
|
mark_source_rp_bad(ms);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handles messages sent on the return path towards the source VM
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static void *source_return_path_thread(void *opaque)
|
||||||
|
{
|
||||||
|
MigrationState *ms = opaque;
|
||||||
|
QEMUFile *rp = ms->rp_state.from_dst_file;
|
||||||
|
uint16_t header_len, header_type;
|
||||||
|
const int max_len = 512;
|
||||||
|
uint8_t buf[max_len];
|
||||||
|
uint32_t tmp32, sibling_error;
|
||||||
|
ram_addr_t start = 0; /* =0 to silence warning */
|
||||||
|
size_t len = 0, expected_len;
|
||||||
|
int res;
|
||||||
|
|
||||||
|
trace_source_return_path_thread_entry();
|
||||||
|
while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
|
||||||
|
migration_is_setup_or_active(ms->state)) {
|
||||||
|
trace_source_return_path_thread_loop_top();
|
||||||
|
header_type = qemu_get_be16(rp);
|
||||||
|
header_len = qemu_get_be16(rp);
|
||||||
|
|
||||||
|
if (header_type >= MIG_RP_MSG_MAX ||
|
||||||
|
header_type == MIG_RP_MSG_INVALID) {
|
||||||
|
error_report("RP: Received invalid message 0x%04x length 0x%04x",
|
||||||
|
header_type, header_len);
|
||||||
|
mark_source_rp_bad(ms);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((rp_cmd_args[header_type].len != -1 &&
|
||||||
|
header_len != rp_cmd_args[header_type].len) ||
|
||||||
|
header_len > max_len) {
|
||||||
|
error_report("RP: Received '%s' message (0x%04x) with"
|
||||||
|
"incorrect length %d expecting %zu",
|
||||||
|
rp_cmd_args[header_type].name, header_type, header_len,
|
||||||
|
(size_t)rp_cmd_args[header_type].len);
|
||||||
|
mark_source_rp_bad(ms);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We know we've got a valid header by this point */
|
||||||
|
res = qemu_get_buffer(rp, buf, header_len);
|
||||||
|
if (res != header_len) {
|
||||||
|
error_report("RP: Failed reading data for message 0x%04x"
|
||||||
|
" read %d expected %d",
|
||||||
|
header_type, res, header_len);
|
||||||
|
mark_source_rp_bad(ms);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* OK, we have the message and the data */
|
||||||
|
switch (header_type) {
|
||||||
|
case MIG_RP_MSG_SHUT:
|
||||||
|
sibling_error = be32_to_cpup((uint32_t *)buf);
|
||||||
|
trace_source_return_path_thread_shut(sibling_error);
|
||||||
|
if (sibling_error) {
|
||||||
|
error_report("RP: Sibling indicated error %d", sibling_error);
|
||||||
|
mark_source_rp_bad(ms);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* We'll let the main thread deal with closing the RP
|
||||||
|
* we could do a shutdown(2) on it, but we're the only user
|
||||||
|
* anyway, so there's nothing gained.
|
||||||
|
*/
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
case MIG_RP_MSG_PONG:
|
||||||
|
tmp32 = be32_to_cpup((uint32_t *)buf);
|
||||||
|
trace_source_return_path_thread_pong(tmp32);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case MIG_RP_MSG_REQ_PAGES:
|
||||||
|
start = be64_to_cpup((uint64_t *)buf);
|
||||||
|
len = be32_to_cpup((uint32_t *)(buf + 8));
|
||||||
|
migrate_handle_rp_req_pages(ms, NULL, start, len);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case MIG_RP_MSG_REQ_PAGES_ID:
|
||||||
|
expected_len = 12 + 1; /* header + termination */
|
||||||
|
|
||||||
|
if (header_len >= expected_len) {
|
||||||
|
start = be64_to_cpup((uint64_t *)buf);
|
||||||
|
len = be32_to_cpup((uint32_t *)(buf + 8));
|
||||||
|
/* Now we expect an idstr */
|
||||||
|
tmp32 = buf[12]; /* Length of the following idstr */
|
||||||
|
buf[13 + tmp32] = '\0';
|
||||||
|
expected_len += tmp32;
|
||||||
|
}
|
||||||
|
if (header_len != expected_len) {
|
||||||
|
error_report("RP: Req_Page_id with length %d expecting %zd",
|
||||||
|
header_len, expected_len);
|
||||||
|
mark_source_rp_bad(ms);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (rp && qemu_file_get_error(rp)) {
|
||||||
|
trace_source_return_path_thread_bad_end();
|
||||||
|
mark_source_rp_bad(ms);
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_source_return_path_thread_end();
|
||||||
|
out:
|
||||||
|
ms->rp_state.from_dst_file = NULL;
|
||||||
|
qemu_fclose(rp);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int open_return_path_on_source(MigrationState *ms)
|
||||||
|
{
|
||||||
|
|
||||||
|
ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->file);
|
||||||
|
if (!ms->rp_state.from_dst_file) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_open_return_path_on_source();
|
||||||
|
qemu_thread_create(&ms->rp_state.rp_thread, "return path",
|
||||||
|
source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
|
||||||
|
|
||||||
|
trace_open_return_path_on_source_continue();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns 0 if the RP was ok, otherwise there was an error on the RP */
|
||||||
|
static int await_return_path_close_on_source(MigrationState *ms)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If this is a normal exit then the destination will send a SHUT and the
|
||||||
|
* rp_thread will exit, however if there's an error we need to cause
|
||||||
|
* it to exit.
|
||||||
|
*/
|
||||||
|
if (qemu_file_get_error(ms->file) && ms->rp_state.from_dst_file) {
|
||||||
|
/*
|
||||||
|
* shutdown(2), if we have it, will cause it to unblock if it's stuck
|
||||||
|
* waiting for the destination.
|
||||||
|
*/
|
||||||
|
qemu_file_shutdown(ms->rp_state.from_dst_file);
|
||||||
|
mark_source_rp_bad(ms);
|
||||||
|
}
|
||||||
|
trace_await_return_path_close_on_source_joining();
|
||||||
|
qemu_thread_join(&ms->rp_state.rp_thread);
|
||||||
|
trace_await_return_path_close_on_source_close();
|
||||||
|
return ms->rp_state.error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Switch from normal iteration to postcopy
|
||||||
|
* Returns non-0 on error
|
||||||
|
*/
|
||||||
|
static int postcopy_start(MigrationState *ms, bool *old_vm_running)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
const QEMUSizedBuffer *qsb;
|
||||||
|
int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||||
|
migrate_set_state(ms, MIGRATION_STATUS_ACTIVE,
|
||||||
|
MIGRATION_STATUS_POSTCOPY_ACTIVE);
|
||||||
|
|
||||||
|
trace_postcopy_start();
|
||||||
|
qemu_mutex_lock_iothread();
|
||||||
|
trace_postcopy_start_set_run();
|
||||||
|
|
||||||
|
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
|
||||||
|
*old_vm_running = runstate_is_running();
|
||||||
|
global_state_store();
|
||||||
|
ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
|
||||||
|
|
||||||
|
if (ret < 0) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* in Finish migrate and with the io-lock held everything should
|
||||||
|
* be quiet, but we've potentially still got dirty pages and we
|
||||||
|
* need to tell the destination to throw any pages it's already received
|
||||||
|
* that are dirty
|
||||||
|
*/
|
||||||
|
if (ram_postcopy_send_discard_bitmap(ms)) {
|
||||||
|
error_report("postcopy send discard bitmap failed");
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* send rest of state - note things that are doing postcopy
|
||||||
|
* will notice we're in POSTCOPY_ACTIVE and not actually
|
||||||
|
* wrap their state up here
|
||||||
|
*/
|
||||||
|
qemu_file_set_rate_limit(ms->file, INT64_MAX);
|
||||||
|
/* Ping just for debugging, helps line traces up */
|
||||||
|
qemu_savevm_send_ping(ms->file, 2);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* While loading the device state we may trigger page transfer
|
||||||
|
* requests and the fd must be free to process those, and thus
|
||||||
|
* the destination must read the whole device state off the fd before
|
||||||
|
* it starts processing it. Unfortunately the ad-hoc migration format
|
||||||
|
* doesn't allow the destination to know the size to read without fully
|
||||||
|
* parsing it through each devices load-state code (especially the open
|
||||||
|
* coded devices that use get/put).
|
||||||
|
* So we wrap the device state up in a package with a length at the start;
|
||||||
|
* to do this we use a qemu_buf to hold the whole of the device state.
|
||||||
|
*/
|
||||||
|
QEMUFile *fb = qemu_bufopen("w", NULL);
|
||||||
|
if (!fb) {
|
||||||
|
error_report("Failed to create buffered file");
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make sure the receiver can get incoming pages before we send the rest
|
||||||
|
* of the state
|
||||||
|
*/
|
||||||
|
qemu_savevm_send_postcopy_listen(fb);
|
||||||
|
|
||||||
|
qemu_savevm_state_complete_precopy(fb);
|
||||||
|
qemu_savevm_send_ping(fb, 3);
|
||||||
|
|
||||||
|
qemu_savevm_send_postcopy_run(fb);
|
||||||
|
|
||||||
|
/* <><> end of stuff going into the package */
|
||||||
|
qsb = qemu_buf_get(fb);
|
||||||
|
|
||||||
|
/* Now send that blob */
|
||||||
|
if (qemu_savevm_send_packaged(ms->file, qsb)) {
|
||||||
|
goto fail_closefb;
|
||||||
|
}
|
||||||
|
qemu_fclose(fb);
|
||||||
|
ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
|
||||||
|
|
||||||
|
qemu_mutex_unlock_iothread();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Although this ping is just for debug, it could potentially be
|
||||||
|
* used for getting a better measurement of downtime at the source.
|
||||||
|
*/
|
||||||
|
qemu_savevm_send_ping(ms->file, 4);
|
||||||
|
|
||||||
|
ret = qemu_file_get_error(ms->file);
|
||||||
|
if (ret) {
|
||||||
|
error_report("postcopy_start: Migration stream errored");
|
||||||
|
migrate_set_state(ms, MIGRATION_STATUS_POSTCOPY_ACTIVE,
|
||||||
|
MIGRATION_STATUS_FAILED);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
fail_closefb:
|
||||||
|
qemu_fclose(fb);
|
||||||
|
fail:
|
||||||
|
migrate_set_state(ms, MIGRATION_STATUS_POSTCOPY_ACTIVE,
|
||||||
|
MIGRATION_STATUS_FAILED);
|
||||||
|
qemu_mutex_unlock_iothread();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* migration_completion: Used by migration_thread when there's not much left.
|
* migration_completion: Used by migration_thread when there's not much left.
|
||||||
* The caller 'breaks' the loop when this returns.
|
* The caller 'breaks' the loop when this returns.
|
||||||
*
|
*
|
||||||
* @s: Current migration state
|
* @s: Current migration state
|
||||||
|
* @current_active_state: The migration state we expect to be in
|
||||||
* @*old_vm_running: Pointer to old_vm_running flag
|
* @*old_vm_running: Pointer to old_vm_running flag
|
||||||
* @*start_time: Pointer to time to update
|
* @*start_time: Pointer to time to update
|
||||||
*/
|
*/
|
||||||
static void migration_completion(MigrationState *s, bool *old_vm_running,
|
static void migration_completion(MigrationState *s, int current_active_state,
|
||||||
|
bool *old_vm_running,
|
||||||
int64_t *start_time)
|
int64_t *start_time)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
if (s->state == MIGRATION_STATUS_ACTIVE) {
|
||||||
qemu_mutex_lock_iothread();
|
qemu_mutex_lock_iothread();
|
||||||
*start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
*start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||||
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
|
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
|
||||||
*old_vm_running = runstate_is_running();
|
*old_vm_running = runstate_is_running();
|
||||||
|
|
||||||
ret = global_state_store();
|
ret = global_state_store();
|
||||||
|
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
|
ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
|
||||||
if (ret >= 0) {
|
if (ret >= 0) {
|
||||||
qemu_file_set_rate_limit(s->file, INT64_MAX);
|
qemu_file_set_rate_limit(s->file, INT64_MAX);
|
||||||
qemu_savevm_state_complete(s->file);
|
qemu_savevm_state_complete_precopy(s->file);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
qemu_mutex_unlock_iothread();
|
qemu_mutex_unlock_iothread();
|
||||||
|
@ -1002,24 +1546,50 @@ static void migration_completion(MigrationState *s, bool *old_vm_running,
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
} else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
|
||||||
|
trace_migration_completion_postcopy_end();
|
||||||
|
|
||||||
|
qemu_savevm_state_complete_postcopy(s->file);
|
||||||
|
trace_migration_completion_postcopy_end_after_complete();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If rp was opened we must clean up the thread before
|
||||||
|
* cleaning everything else up (since if there are no failures
|
||||||
|
* it will wait for the destination to send it's status in
|
||||||
|
* a SHUT command).
|
||||||
|
* Postcopy opens rp if enabled (even if it's not avtivated)
|
||||||
|
*/
|
||||||
|
if (migrate_postcopy_ram()) {
|
||||||
|
int rp_error;
|
||||||
|
trace_migration_completion_postcopy_end_before_rp();
|
||||||
|
rp_error = await_return_path_close_on_source(s);
|
||||||
|
trace_migration_completion_postcopy_end_after_rp(rp_error);
|
||||||
|
if (rp_error) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (qemu_file_get_error(s->file)) {
|
if (qemu_file_get_error(s->file)) {
|
||||||
trace_migration_completion_file_err();
|
trace_migration_completion_file_err();
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COMPLETED);
|
migrate_set_state(s, current_active_state, MIGRATION_STATUS_COMPLETED);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED);
|
migrate_set_state(s, current_active_state, MIGRATION_STATUS_FAILED);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* migration thread support */
|
/*
|
||||||
|
* Master migration thread on the source VM.
|
||||||
|
* It drives the migration and pumps the data down the outgoing channel.
|
||||||
|
*/
|
||||||
static void *migration_thread(void *opaque)
|
static void *migration_thread(void *opaque)
|
||||||
{
|
{
|
||||||
MigrationState *s = opaque;
|
MigrationState *s = opaque;
|
||||||
|
/* Used by the bandwidth calcs, updated later */
|
||||||
int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||||
int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
|
int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
|
||||||
int64_t initial_bytes = 0;
|
int64_t initial_bytes = 0;
|
||||||
|
@ -1027,34 +1597,79 @@ static void *migration_thread(void *opaque)
|
||||||
int64_t start_time = initial_time;
|
int64_t start_time = initial_time;
|
||||||
int64_t end_time;
|
int64_t end_time;
|
||||||
bool old_vm_running = false;
|
bool old_vm_running = false;
|
||||||
|
bool entered_postcopy = false;
|
||||||
|
/* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */
|
||||||
|
enum MigrationStatus current_active_state = MIGRATION_STATUS_ACTIVE;
|
||||||
|
|
||||||
rcu_register_thread();
|
rcu_register_thread();
|
||||||
|
|
||||||
qemu_savevm_state_header(s->file);
|
qemu_savevm_state_header(s->file);
|
||||||
|
|
||||||
|
if (migrate_postcopy_ram()) {
|
||||||
|
/* Now tell the dest that it should open its end so it can reply */
|
||||||
|
qemu_savevm_send_open_return_path(s->file);
|
||||||
|
|
||||||
|
/* And do a ping that will make stuff easier to debug */
|
||||||
|
qemu_savevm_send_ping(s->file, 1);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tell the destination that we *might* want to do postcopy later;
|
||||||
|
* if the other end can't do postcopy it should fail now, nice and
|
||||||
|
* early.
|
||||||
|
*/
|
||||||
|
qemu_savevm_send_postcopy_advise(s->file);
|
||||||
|
}
|
||||||
|
|
||||||
qemu_savevm_state_begin(s->file, &s->params);
|
qemu_savevm_state_begin(s->file, &s->params);
|
||||||
|
|
||||||
s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
|
s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
|
||||||
|
current_active_state = MIGRATION_STATUS_ACTIVE;
|
||||||
migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE);
|
migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE);
|
||||||
|
|
||||||
while (s->state == MIGRATION_STATUS_ACTIVE) {
|
trace_migration_thread_setup_complete();
|
||||||
|
|
||||||
|
while (s->state == MIGRATION_STATUS_ACTIVE ||
|
||||||
|
s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
|
||||||
int64_t current_time;
|
int64_t current_time;
|
||||||
uint64_t pending_size;
|
uint64_t pending_size;
|
||||||
|
|
||||||
if (!qemu_file_rate_limit(s->file)) {
|
if (!qemu_file_rate_limit(s->file)) {
|
||||||
pending_size = qemu_savevm_state_pending(s->file, max_size);
|
uint64_t pend_post, pend_nonpost;
|
||||||
trace_migrate_pending(pending_size, max_size);
|
|
||||||
|
qemu_savevm_state_pending(s->file, max_size, &pend_nonpost,
|
||||||
|
&pend_post);
|
||||||
|
pending_size = pend_nonpost + pend_post;
|
||||||
|
trace_migrate_pending(pending_size, max_size,
|
||||||
|
pend_post, pend_nonpost);
|
||||||
if (pending_size && pending_size >= max_size) {
|
if (pending_size && pending_size >= max_size) {
|
||||||
qemu_savevm_state_iterate(s->file);
|
/* Still a significant amount to transfer */
|
||||||
|
|
||||||
|
current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||||
|
if (migrate_postcopy_ram() &&
|
||||||
|
s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE &&
|
||||||
|
pend_nonpost <= max_size &&
|
||||||
|
atomic_read(&s->start_postcopy)) {
|
||||||
|
|
||||||
|
if (!postcopy_start(s, &old_vm_running)) {
|
||||||
|
current_active_state = MIGRATION_STATUS_POSTCOPY_ACTIVE;
|
||||||
|
entered_postcopy = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* Just another iteration step */
|
||||||
|
qemu_savevm_state_iterate(s->file, entered_postcopy);
|
||||||
} else {
|
} else {
|
||||||
trace_migration_thread_low_pending(pending_size);
|
trace_migration_thread_low_pending(pending_size);
|
||||||
migration_completion(s, &old_vm_running, &start_time);
|
migration_completion(s, current_active_state,
|
||||||
|
&old_vm_running, &start_time);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (qemu_file_get_error(s->file)) {
|
if (qemu_file_get_error(s->file)) {
|
||||||
migrate_set_state(s, MIGRATION_STATUS_ACTIVE,
|
migrate_set_state(s, current_active_state, MIGRATION_STATUS_FAILED);
|
||||||
MIGRATION_STATUS_FAILED);
|
trace_migration_thread_file_err();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||||
|
@ -1085,6 +1700,7 @@ static void *migration_thread(void *opaque)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
trace_migration_thread_after_loop();
|
||||||
/* If we enabled cpu throttling for auto-converge, turn it off. */
|
/* If we enabled cpu throttling for auto-converge, turn it off. */
|
||||||
cpu_throttle_stop();
|
cpu_throttle_stop();
|
||||||
end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||||
|
@ -1094,14 +1710,16 @@ static void *migration_thread(void *opaque)
|
||||||
if (s->state == MIGRATION_STATUS_COMPLETED) {
|
if (s->state == MIGRATION_STATUS_COMPLETED) {
|
||||||
uint64_t transferred_bytes = qemu_ftell(s->file);
|
uint64_t transferred_bytes = qemu_ftell(s->file);
|
||||||
s->total_time = end_time - s->total_time;
|
s->total_time = end_time - s->total_time;
|
||||||
|
if (!entered_postcopy) {
|
||||||
s->downtime = end_time - start_time;
|
s->downtime = end_time - start_time;
|
||||||
|
}
|
||||||
if (s->total_time) {
|
if (s->total_time) {
|
||||||
s->mbps = (((double) transferred_bytes * 8.0) /
|
s->mbps = (((double) transferred_bytes * 8.0) /
|
||||||
((double) s->total_time)) / 1000;
|
((double) s->total_time)) / 1000;
|
||||||
}
|
}
|
||||||
runstate_set(RUN_STATE_POSTMIGRATE);
|
runstate_set(RUN_STATE_POSTMIGRATE);
|
||||||
} else {
|
} else {
|
||||||
if (old_vm_running) {
|
if (old_vm_running && !entered_postcopy) {
|
||||||
vm_start();
|
vm_start();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1124,7 +1742,34 @@ void migrate_fd_connect(MigrationState *s)
|
||||||
/* Notify before starting migration thread */
|
/* Notify before starting migration thread */
|
||||||
notifier_list_notify(&migration_state_notifiers, s);
|
notifier_list_notify(&migration_state_notifiers, s);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Open the return path; currently for postcopy but other things might
|
||||||
|
* also want it.
|
||||||
|
*/
|
||||||
|
if (migrate_postcopy_ram()) {
|
||||||
|
if (open_return_path_on_source(s)) {
|
||||||
|
error_report("Unable to open return-path for postcopy");
|
||||||
|
migrate_set_state(s, MIGRATION_STATUS_SETUP,
|
||||||
|
MIGRATION_STATUS_FAILED);
|
||||||
|
migrate_fd_cleanup(s);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
migrate_compress_threads_create();
|
migrate_compress_threads_create();
|
||||||
qemu_thread_create(&s->thread, "migration", migration_thread, s,
|
qemu_thread_create(&s->thread, "migration", migration_thread, s,
|
||||||
QEMU_THREAD_JOINABLE);
|
QEMU_THREAD_JOINABLE);
|
||||||
|
s->migration_thread_running = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PostcopyState postcopy_state_get(void)
|
||||||
|
{
|
||||||
|
return atomic_mb_read(&incoming_postcopy_state);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set the state and return the old state */
|
||||||
|
PostcopyState postcopy_state_set(PostcopyState new_state)
|
||||||
|
{
|
||||||
|
return atomic_xchg(&incoming_postcopy_state, new_state);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
767
migration/postcopy-ram.c
Normal file
767
migration/postcopy-ram.c
Normal file
|
@ -0,0 +1,767 @@
|
||||||
|
/*
|
||||||
|
* Postcopy migration for RAM
|
||||||
|
*
|
||||||
|
* Copyright 2013-2015 Red Hat, Inc. and/or its affiliates
|
||||||
|
*
|
||||||
|
* Authors:
|
||||||
|
* Dave Gilbert <dgilbert@redhat.com>
|
||||||
|
*
|
||||||
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||||
|
* See the COPYING file in the top-level directory.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Postcopy is a migration technique where the execution flips from the
|
||||||
|
* source to the destination before all the data has been copied.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <glib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "qemu-common.h"
|
||||||
|
#include "migration/migration.h"
|
||||||
|
#include "migration/postcopy-ram.h"
|
||||||
|
#include "sysemu/sysemu.h"
|
||||||
|
#include "sysemu/balloon.h"
|
||||||
|
#include "qemu/error-report.h"
|
||||||
|
#include "trace.h"
|
||||||
|
|
||||||
|
/* Arbitrary limit on size of each discard command,
|
||||||
|
* keeps them around ~200 bytes
|
||||||
|
*/
|
||||||
|
#define MAX_DISCARDS_PER_COMMAND 12
|
||||||
|
|
||||||
|
struct PostcopyDiscardState {
|
||||||
|
const char *ramblock_name;
|
||||||
|
uint64_t offset; /* Bitmap entry for the 1st bit of this RAMBlock */
|
||||||
|
uint16_t cur_entry;
|
||||||
|
/*
|
||||||
|
* Start and length of a discard range (bytes)
|
||||||
|
*/
|
||||||
|
uint64_t start_list[MAX_DISCARDS_PER_COMMAND];
|
||||||
|
uint64_t length_list[MAX_DISCARDS_PER_COMMAND];
|
||||||
|
unsigned int nsentwords;
|
||||||
|
unsigned int nsentcmds;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Postcopy needs to detect accesses to pages that haven't yet been copied
|
||||||
|
* across, and efficiently map new pages in, the techniques for doing this
|
||||||
|
* are target OS specific.
|
||||||
|
*/
|
||||||
|
#if defined(__linux__)
|
||||||
|
|
||||||
|
#include <poll.h>
|
||||||
|
#include <sys/eventfd.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <asm/types.h> /* for __u64 */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__linux__) && defined(__NR_userfaultfd)
|
||||||
|
#include <linux/userfaultfd.h>
|
||||||
|
|
||||||
|
static bool ufd_version_check(int ufd)
|
||||||
|
{
|
||||||
|
struct uffdio_api api_struct;
|
||||||
|
uint64_t ioctl_mask;
|
||||||
|
|
||||||
|
api_struct.api = UFFD_API;
|
||||||
|
api_struct.features = 0;
|
||||||
|
if (ioctl(ufd, UFFDIO_API, &api_struct)) {
|
||||||
|
error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s",
|
||||||
|
strerror(errno));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
|
||||||
|
(__u64)1 << _UFFDIO_UNREGISTER;
|
||||||
|
if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
|
||||||
|
error_report("Missing userfault features: %" PRIx64,
|
||||||
|
(uint64_t)(~api_struct.ioctls & ioctl_mask));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note: This has the side effect of munlock'ing all of RAM, that's
|
||||||
|
* normally fine since if the postcopy succeeds it gets turned back on at the
|
||||||
|
* end.
|
||||||
|
*/
|
||||||
|
bool postcopy_ram_supported_by_host(void)
|
||||||
|
{
|
||||||
|
long pagesize = getpagesize();
|
||||||
|
int ufd = -1;
|
||||||
|
bool ret = false; /* Error unless we change it */
|
||||||
|
void *testarea = NULL;
|
||||||
|
struct uffdio_register reg_struct;
|
||||||
|
struct uffdio_range range_struct;
|
||||||
|
uint64_t feature_mask;
|
||||||
|
|
||||||
|
if ((1ul << qemu_target_page_bits()) > pagesize) {
|
||||||
|
error_report("Target page size bigger than host page size");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
|
||||||
|
if (ufd == -1) {
|
||||||
|
error_report("%s: userfaultfd not available: %s", __func__,
|
||||||
|
strerror(errno));
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Version and features check */
|
||||||
|
if (!ufd_version_check(ufd)) {
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* userfault and mlock don't go together; we'll put it back later if
|
||||||
|
* it was enabled.
|
||||||
|
*/
|
||||||
|
if (munlockall()) {
|
||||||
|
error_report("%s: munlockall: %s", __func__, strerror(errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to check that the ops we need are supported on anon memory
|
||||||
|
* To do that we need to register a chunk and see the flags that
|
||||||
|
* are returned.
|
||||||
|
*/
|
||||||
|
testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE |
|
||||||
|
MAP_ANONYMOUS, -1, 0);
|
||||||
|
if (testarea == MAP_FAILED) {
|
||||||
|
error_report("%s: Failed to map test area: %s", __func__,
|
||||||
|
strerror(errno));
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
g_assert(((size_t)testarea & (pagesize-1)) == 0);
|
||||||
|
|
||||||
|
reg_struct.range.start = (uintptr_t)testarea;
|
||||||
|
reg_struct.range.len = pagesize;
|
||||||
|
reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
|
||||||
|
|
||||||
|
if (ioctl(ufd, UFFDIO_REGISTER, ®_struct)) {
|
||||||
|
error_report("%s userfault register: %s", __func__, strerror(errno));
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
range_struct.start = (uintptr_t)testarea;
|
||||||
|
range_struct.len = pagesize;
|
||||||
|
if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) {
|
||||||
|
error_report("%s userfault unregister: %s", __func__, strerror(errno));
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
feature_mask = (__u64)1 << _UFFDIO_WAKE |
|
||||||
|
(__u64)1 << _UFFDIO_COPY |
|
||||||
|
(__u64)1 << _UFFDIO_ZEROPAGE;
|
||||||
|
if ((reg_struct.ioctls & feature_mask) != feature_mask) {
|
||||||
|
error_report("Missing userfault map features: %" PRIx64,
|
||||||
|
(uint64_t)(~reg_struct.ioctls & feature_mask));
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Success! */
|
||||||
|
ret = true;
|
||||||
|
out:
|
||||||
|
if (testarea) {
|
||||||
|
munmap(testarea, pagesize);
|
||||||
|
}
|
||||||
|
if (ufd != -1) {
|
||||||
|
close(ufd);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* postcopy_ram_discard_range: Discard a range of memory.
|
||||||
|
* We can assume that if we've been called postcopy_ram_hosttest returned true.
|
||||||
|
*
|
||||||
|
* @mis: Current incoming migration state.
|
||||||
|
* @start, @length: range of memory to discard.
|
||||||
|
*
|
||||||
|
* returns: 0 on success.
|
||||||
|
*/
|
||||||
|
int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
|
||||||
|
size_t length)
|
||||||
|
{
|
||||||
|
trace_postcopy_ram_discard_range(start, length);
|
||||||
|
if (madvise(start, length, MADV_DONTNEED)) {
|
||||||
|
error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setup an area of RAM so that it *can* be used for postcopy later; this
|
||||||
|
* must be done right at the start prior to pre-copy.
|
||||||
|
* opaque should be the MIS.
|
||||||
|
*/
|
||||||
|
static int init_range(const char *block_name, void *host_addr,
|
||||||
|
ram_addr_t offset, ram_addr_t length, void *opaque)
|
||||||
|
{
|
||||||
|
MigrationIncomingState *mis = opaque;
|
||||||
|
|
||||||
|
trace_postcopy_init_range(block_name, host_addr, offset, length);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need the whole of RAM to be truly empty for postcopy, so things
|
||||||
|
* like ROMs and any data tables built during init must be zero'd
|
||||||
|
* - we're going to get the copy from the source anyway.
|
||||||
|
* (Precopy will just overwrite this data, so doesn't need the discard)
|
||||||
|
*/
|
||||||
|
if (postcopy_ram_discard_range(mis, host_addr, length)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At the end of migration, undo the effects of init_range
|
||||||
|
* opaque should be the MIS.
|
||||||
|
*/
|
||||||
|
static int cleanup_range(const char *block_name, void *host_addr,
|
||||||
|
ram_addr_t offset, ram_addr_t length, void *opaque)
|
||||||
|
{
|
||||||
|
MigrationIncomingState *mis = opaque;
|
||||||
|
struct uffdio_range range_struct;
|
||||||
|
trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We turned off hugepage for the precopy stage with postcopy enabled
|
||||||
|
* we can turn it back on now.
|
||||||
|
*/
|
||||||
|
if (qemu_madvise(host_addr, length, QEMU_MADV_HUGEPAGE)) {
|
||||||
|
error_report("%s HUGEPAGE: %s", __func__, strerror(errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We can also turn off userfault now since we should have all the
|
||||||
|
* pages. It can be useful to leave it on to debug postcopy
|
||||||
|
* if you're not sure it's always getting every page.
|
||||||
|
*/
|
||||||
|
range_struct.start = (uintptr_t)host_addr;
|
||||||
|
range_struct.len = length;
|
||||||
|
|
||||||
|
if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) {
|
||||||
|
error_report("%s: userfault unregister %s", __func__, strerror(errno));
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialise postcopy-ram, setting the RAM to a state where we can go into
|
||||||
|
* postcopy later; must be called prior to any precopy.
|
||||||
|
* called from arch_init's similarly named ram_postcopy_incoming_init
|
||||||
|
*/
|
||||||
|
int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
|
||||||
|
{
|
||||||
|
if (qemu_ram_foreach_block(init_range, mis)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At the end of a migration where postcopy_ram_incoming_init was called.
|
||||||
|
*/
|
||||||
|
int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
|
||||||
|
{
|
||||||
|
trace_postcopy_ram_incoming_cleanup_entry();
|
||||||
|
|
||||||
|
if (mis->have_fault_thread) {
|
||||||
|
uint64_t tmp64;
|
||||||
|
|
||||||
|
if (qemu_ram_foreach_block(cleanup_range, mis)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Tell the fault_thread to exit, it's an eventfd that should
|
||||||
|
* currently be at 0, we're going to increment it to 1
|
||||||
|
*/
|
||||||
|
tmp64 = 1;
|
||||||
|
if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) {
|
||||||
|
trace_postcopy_ram_incoming_cleanup_join();
|
||||||
|
qemu_thread_join(&mis->fault_thread);
|
||||||
|
} else {
|
||||||
|
/* Not much we can do here, but may as well report it */
|
||||||
|
error_report("%s: incrementing userfault_quit_fd: %s", __func__,
|
||||||
|
strerror(errno));
|
||||||
|
}
|
||||||
|
trace_postcopy_ram_incoming_cleanup_closeuf();
|
||||||
|
close(mis->userfault_fd);
|
||||||
|
close(mis->userfault_quit_fd);
|
||||||
|
mis->have_fault_thread = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
qemu_balloon_inhibit(false);
|
||||||
|
|
||||||
|
if (enable_mlock) {
|
||||||
|
if (os_mlock() < 0) {
|
||||||
|
error_report("mlock: %s", strerror(errno));
|
||||||
|
/*
|
||||||
|
* It doesn't feel right to fail at this point, we have a valid
|
||||||
|
* VM state.
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
postcopy_state_set(POSTCOPY_INCOMING_END);
|
||||||
|
migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
|
||||||
|
|
||||||
|
if (mis->postcopy_tmp_page) {
|
||||||
|
munmap(mis->postcopy_tmp_page, getpagesize());
|
||||||
|
mis->postcopy_tmp_page = NULL;
|
||||||
|
}
|
||||||
|
trace_postcopy_ram_incoming_cleanup_exit();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Disable huge pages on an area
|
||||||
|
*/
|
||||||
|
static int nhp_range(const char *block_name, void *host_addr,
|
||||||
|
ram_addr_t offset, ram_addr_t length, void *opaque)
|
||||||
|
{
|
||||||
|
trace_postcopy_nhp_range(block_name, host_addr, offset, length);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Before we do discards we need to ensure those discards really
|
||||||
|
* do delete areas of the page, even if THP thinks a hugepage would
|
||||||
|
* be a good idea, so force hugepages off.
|
||||||
|
*/
|
||||||
|
if (qemu_madvise(host_addr, length, QEMU_MADV_NOHUGEPAGE)) {
|
||||||
|
error_report("%s: NOHUGEPAGE: %s", __func__, strerror(errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
|
||||||
|
* however leaving it until after precopy means that most of the precopy
|
||||||
|
* data is still THPd
|
||||||
|
*/
|
||||||
|
int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
|
||||||
|
{
|
||||||
|
if (qemu_ram_foreach_block(nhp_range, mis)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
postcopy_state_set(POSTCOPY_INCOMING_DISCARD);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Mark the given area of RAM as requiring notification to unwritten areas
|
||||||
|
* Used as a callback on qemu_ram_foreach_block.
|
||||||
|
* host_addr: Base of area to mark
|
||||||
|
* offset: Offset in the whole ram arena
|
||||||
|
* length: Length of the section
|
||||||
|
* opaque: MigrationIncomingState pointer
|
||||||
|
* Returns 0 on success
|
||||||
|
*/
|
||||||
|
static int ram_block_enable_notify(const char *block_name, void *host_addr,
|
||||||
|
ram_addr_t offset, ram_addr_t length,
|
||||||
|
void *opaque)
|
||||||
|
{
|
||||||
|
MigrationIncomingState *mis = opaque;
|
||||||
|
struct uffdio_register reg_struct;
|
||||||
|
|
||||||
|
reg_struct.range.start = (uintptr_t)host_addr;
|
||||||
|
reg_struct.range.len = length;
|
||||||
|
reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
|
||||||
|
|
||||||
|
/* Now tell our userfault_fd that it's responsible for this area */
|
||||||
|
if (ioctl(mis->userfault_fd, UFFDIO_REGISTER, ®_struct)) {
|
||||||
|
error_report("%s userfault register: %s", __func__, strerror(errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle faults detected by the USERFAULT markings
|
||||||
|
*/
|
||||||
|
static void *postcopy_ram_fault_thread(void *opaque)
|
||||||
|
{
|
||||||
|
MigrationIncomingState *mis = opaque;
|
||||||
|
struct uffd_msg msg;
|
||||||
|
int ret;
|
||||||
|
size_t hostpagesize = getpagesize();
|
||||||
|
RAMBlock *rb = NULL;
|
||||||
|
RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
|
||||||
|
|
||||||
|
trace_postcopy_ram_fault_thread_entry();
|
||||||
|
qemu_sem_post(&mis->fault_thread_sem);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
ram_addr_t rb_offset;
|
||||||
|
ram_addr_t in_raspace;
|
||||||
|
struct pollfd pfd[2];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We're mainly waiting for the kernel to give us a faulting HVA,
|
||||||
|
* however we can be told to quit via userfault_quit_fd which is
|
||||||
|
* an eventfd
|
||||||
|
*/
|
||||||
|
pfd[0].fd = mis->userfault_fd;
|
||||||
|
pfd[0].events = POLLIN;
|
||||||
|
pfd[0].revents = 0;
|
||||||
|
pfd[1].fd = mis->userfault_quit_fd;
|
||||||
|
pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
|
||||||
|
pfd[1].revents = 0;
|
||||||
|
|
||||||
|
if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
|
||||||
|
error_report("%s: userfault poll: %s", __func__, strerror(errno));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pfd[1].revents) {
|
||||||
|
trace_postcopy_ram_fault_thread_quit();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = read(mis->userfault_fd, &msg, sizeof(msg));
|
||||||
|
if (ret != sizeof(msg)) {
|
||||||
|
if (errno == EAGAIN) {
|
||||||
|
/*
|
||||||
|
* if a wake up happens on the other thread just after
|
||||||
|
* the poll, there is nothing to read.
|
||||||
|
*/
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (ret < 0) {
|
||||||
|
error_report("%s: Failed to read full userfault message: %s",
|
||||||
|
__func__, strerror(errno));
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
error_report("%s: Read %d bytes from userfaultfd expected %zd",
|
||||||
|
__func__, ret, sizeof(msg));
|
||||||
|
break; /* Lost alignment, don't know what we'd read next */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (msg.event != UFFD_EVENT_PAGEFAULT) {
|
||||||
|
error_report("%s: Read unexpected event %ud from userfaultfd",
|
||||||
|
__func__, msg.event);
|
||||||
|
continue; /* It's not a page fault, shouldn't happen */
|
||||||
|
}
|
||||||
|
|
||||||
|
rb = qemu_ram_block_from_host(
|
||||||
|
(void *)(uintptr_t)msg.arg.pagefault.address,
|
||||||
|
true, &in_raspace, &rb_offset);
|
||||||
|
if (!rb) {
|
||||||
|
error_report("postcopy_ram_fault_thread: Fault outside guest: %"
|
||||||
|
PRIx64, (uint64_t)msg.arg.pagefault.address);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
rb_offset &= ~(hostpagesize - 1);
|
||||||
|
trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
|
||||||
|
qemu_ram_get_idstr(rb),
|
||||||
|
rb_offset);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Send the request to the source - we want to request one
|
||||||
|
* of our host page sizes (which is >= TPS)
|
||||||
|
*/
|
||||||
|
if (rb != last_rb) {
|
||||||
|
last_rb = rb;
|
||||||
|
migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
|
||||||
|
rb_offset, hostpagesize);
|
||||||
|
} else {
|
||||||
|
/* Save some space */
|
||||||
|
migrate_send_rp_req_pages(mis, NULL,
|
||||||
|
rb_offset, hostpagesize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
trace_postcopy_ram_fault_thread_exit();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int postcopy_ram_enable_notify(MigrationIncomingState *mis)
|
||||||
|
{
|
||||||
|
/* Open the fd for the kernel to give us userfaults */
|
||||||
|
mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
|
||||||
|
if (mis->userfault_fd == -1) {
|
||||||
|
error_report("%s: Failed to open userfault fd: %s", __func__,
|
||||||
|
strerror(errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Although the host check already tested the API, we need to
|
||||||
|
* do the check again as an ABI handshake on the new fd.
|
||||||
|
*/
|
||||||
|
if (!ufd_version_check(mis->userfault_fd)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now an eventfd we use to tell the fault-thread to quit */
|
||||||
|
mis->userfault_quit_fd = eventfd(0, EFD_CLOEXEC);
|
||||||
|
if (mis->userfault_quit_fd == -1) {
|
||||||
|
error_report("%s: Opening userfault_quit_fd: %s", __func__,
|
||||||
|
strerror(errno));
|
||||||
|
close(mis->userfault_fd);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
qemu_sem_init(&mis->fault_thread_sem, 0);
|
||||||
|
qemu_thread_create(&mis->fault_thread, "postcopy/fault",
|
||||||
|
postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
|
||||||
|
qemu_sem_wait(&mis->fault_thread_sem);
|
||||||
|
qemu_sem_destroy(&mis->fault_thread_sem);
|
||||||
|
mis->have_fault_thread = true;
|
||||||
|
|
||||||
|
/* Mark so that we get notified of accesses to unwritten areas */
|
||||||
|
if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ballooning can mark pages as absent while we're postcopying
|
||||||
|
* that would cause false userfaults.
|
||||||
|
*/
|
||||||
|
qemu_balloon_inhibit(true);
|
||||||
|
|
||||||
|
trace_postcopy_ram_enable_notify();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Place a host page (from) at (host) atomically
|
||||||
|
* returns 0 on success
|
||||||
|
*/
|
||||||
|
int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
|
||||||
|
{
|
||||||
|
struct uffdio_copy copy_struct;
|
||||||
|
|
||||||
|
copy_struct.dst = (uint64_t)(uintptr_t)host;
|
||||||
|
copy_struct.src = (uint64_t)(uintptr_t)from;
|
||||||
|
copy_struct.len = getpagesize();
|
||||||
|
copy_struct.mode = 0;
|
||||||
|
|
||||||
|
/* copy also acks to the kernel waking the stalled thread up
|
||||||
|
* TODO: We can inhibit that ack and only do it if it was requested
|
||||||
|
* which would be slightly cheaper, but we'd have to be careful
|
||||||
|
* of the order of updating our page state.
|
||||||
|
*/
|
||||||
|
if (ioctl(mis->userfault_fd, UFFDIO_COPY, ©_struct)) {
|
||||||
|
int e = errno;
|
||||||
|
error_report("%s: %s copy host: %p from: %p",
|
||||||
|
__func__, strerror(e), host, from);
|
||||||
|
|
||||||
|
return -e;
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_postcopy_place_page(host);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Place a zero page at (host) atomically
|
||||||
|
* returns 0 on success
|
||||||
|
*/
|
||||||
|
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
|
||||||
|
{
|
||||||
|
struct uffdio_zeropage zero_struct;
|
||||||
|
|
||||||
|
zero_struct.range.start = (uint64_t)(uintptr_t)host;
|
||||||
|
zero_struct.range.len = getpagesize();
|
||||||
|
zero_struct.mode = 0;
|
||||||
|
|
||||||
|
if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
|
||||||
|
int e = errno;
|
||||||
|
error_report("%s: %s zero host: %p",
|
||||||
|
__func__, strerror(e), host);
|
||||||
|
|
||||||
|
return -e;
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_postcopy_place_page_zero(host);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns a target page of memory that can be mapped at a later point in time
|
||||||
|
* using postcopy_place_page
|
||||||
|
* The same address is used repeatedly, postcopy_place_page just takes the
|
||||||
|
* backing page away.
|
||||||
|
* Returns: Pointer to allocated page
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void *postcopy_get_tmp_page(MigrationIncomingState *mis)
|
||||||
|
{
|
||||||
|
if (!mis->postcopy_tmp_page) {
|
||||||
|
mis->postcopy_tmp_page = mmap(NULL, getpagesize(),
|
||||||
|
PROT_READ | PROT_WRITE, MAP_PRIVATE |
|
||||||
|
MAP_ANONYMOUS, -1, 0);
|
||||||
|
if (!mis->postcopy_tmp_page) {
|
||||||
|
error_report("%s: %s", __func__, strerror(errno));
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return mis->postcopy_tmp_page;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
/* No target OS support, stubs just fail */
|
||||||
|
bool postcopy_ram_supported_by_host(void)
|
||||||
|
{
|
||||||
|
error_report("%s: No OS support", __func__);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
|
||||||
|
{
|
||||||
|
error_report("postcopy_ram_incoming_init: No OS support");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
|
||||||
|
size_t length)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int postcopy_ram_enable_notify(MigrationIncomingState *mis)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *postcopy_get_tmp_page(MigrationIncomingState *mis)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* postcopy_discard_send_init: Called at the start of each RAMBlock before
|
||||||
|
* asking to discard individual ranges.
|
||||||
|
*
|
||||||
|
* @ms: The current migration state.
|
||||||
|
* @offset: the bitmap offset of the named RAMBlock in the migration
|
||||||
|
* bitmap.
|
||||||
|
* @name: RAMBlock that discards will operate on.
|
||||||
|
*
|
||||||
|
* returns: a new PDS.
|
||||||
|
*/
|
||||||
|
PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
|
||||||
|
unsigned long offset,
|
||||||
|
const char *name)
|
||||||
|
{
|
||||||
|
PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState));
|
||||||
|
|
||||||
|
if (res) {
|
||||||
|
res->ramblock_name = name;
|
||||||
|
res->offset = offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* postcopy_discard_send_range: Called by the bitmap code for each chunk to
|
||||||
|
* discard. May send a discard message, may just leave it queued to
|
||||||
|
* be sent later.
|
||||||
|
*
|
||||||
|
* @ms: Current migration state.
|
||||||
|
* @pds: Structure initialised by postcopy_discard_send_init().
|
||||||
|
* @start,@length: a range of pages in the migration bitmap in the
|
||||||
|
* RAM block passed to postcopy_discard_send_init() (length=1 is one page)
|
||||||
|
*/
|
||||||
|
void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
|
||||||
|
unsigned long start, unsigned long length)
|
||||||
|
{
|
||||||
|
size_t tp_bits = qemu_target_page_bits();
|
||||||
|
/* Convert to byte offsets within the RAM block */
|
||||||
|
pds->start_list[pds->cur_entry] = (start - pds->offset) << tp_bits;
|
||||||
|
pds->length_list[pds->cur_entry] = length << tp_bits;
|
||||||
|
trace_postcopy_discard_send_range(pds->ramblock_name, start, length);
|
||||||
|
pds->cur_entry++;
|
||||||
|
pds->nsentwords++;
|
||||||
|
|
||||||
|
if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) {
|
||||||
|
/* Full set, ship it! */
|
||||||
|
qemu_savevm_send_postcopy_ram_discard(ms->file, pds->ramblock_name,
|
||||||
|
pds->cur_entry,
|
||||||
|
pds->start_list,
|
||||||
|
pds->length_list);
|
||||||
|
pds->nsentcmds++;
|
||||||
|
pds->cur_entry = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* postcopy_discard_send_finish: Called at the end of each RAMBlock by the
|
||||||
|
* bitmap code. Sends any outstanding discard messages, frees the PDS
|
||||||
|
*
|
||||||
|
* @ms: Current migration state.
|
||||||
|
* @pds: Structure initialised by postcopy_discard_send_init().
|
||||||
|
*/
|
||||||
|
void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds)
|
||||||
|
{
|
||||||
|
/* Anything unsent? */
|
||||||
|
if (pds->cur_entry) {
|
||||||
|
qemu_savevm_send_postcopy_ram_discard(ms->file, pds->ramblock_name,
|
||||||
|
pds->cur_entry,
|
||||||
|
pds->start_list,
|
||||||
|
pds->length_list);
|
||||||
|
pds->nsentcmds++;
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords,
|
||||||
|
pds->nsentcmds);
|
||||||
|
|
||||||
|
g_free(pds);
|
||||||
|
}
|
|
@ -22,6 +22,7 @@
|
||||||
* THE SOFTWARE.
|
* THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
#include "qemu-common.h"
|
#include "qemu-common.h"
|
||||||
|
#include "qemu/error-report.h"
|
||||||
#include "qemu/iov.h"
|
#include "qemu/iov.h"
|
||||||
#include "qemu/sockets.h"
|
#include "qemu/sockets.h"
|
||||||
#include "qemu/coroutine.h"
|
#include "qemu/coroutine.h"
|
||||||
|
@ -39,12 +40,43 @@ static ssize_t socket_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
|
||||||
QEMUFileSocket *s = opaque;
|
QEMUFileSocket *s = opaque;
|
||||||
ssize_t len;
|
ssize_t len;
|
||||||
ssize_t size = iov_size(iov, iovcnt);
|
ssize_t size = iov_size(iov, iovcnt);
|
||||||
|
ssize_t offset = 0;
|
||||||
|
int err;
|
||||||
|
|
||||||
len = iov_send(s->fd, iov, iovcnt, 0, size);
|
while (size > 0) {
|
||||||
if (len < size) {
|
len = iov_send(s->fd, iov, iovcnt, offset, size);
|
||||||
len = -socket_error();
|
|
||||||
|
if (len > 0) {
|
||||||
|
size -= len;
|
||||||
|
offset += len;
|
||||||
}
|
}
|
||||||
return len;
|
|
||||||
|
if (size > 0) {
|
||||||
|
err = socket_error();
|
||||||
|
|
||||||
|
if (err != EAGAIN && err != EWOULDBLOCK) {
|
||||||
|
error_report("socket_writev_buffer: Got err=%d for (%zu/%zu)",
|
||||||
|
err, (size_t)size, (size_t)len);
|
||||||
|
/*
|
||||||
|
* If I've already sent some but only just got the error, I
|
||||||
|
* could return the amount validly sent so far and wait for the
|
||||||
|
* next call to report the error, but I'd rather flag the error
|
||||||
|
* immediately.
|
||||||
|
*/
|
||||||
|
return -err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Emulate blocking */
|
||||||
|
GPollFD pfd;
|
||||||
|
|
||||||
|
pfd.fd = s->fd;
|
||||||
|
pfd.events = G_IO_OUT | G_IO_ERR;
|
||||||
|
pfd.revents = 0;
|
||||||
|
g_poll(&pfd, 1 /* 1 fd */, -1 /* no timeout */);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int socket_get_fd(void *opaque)
|
static int socket_get_fd(void *opaque)
|
||||||
|
@ -97,6 +129,56 @@ static int socket_shutdown(void *opaque, bool rd, bool wr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int socket_return_close(void *opaque)
|
||||||
|
{
|
||||||
|
QEMUFileSocket *s = opaque;
|
||||||
|
/*
|
||||||
|
* Note: We don't close the socket, that should be done by the forward
|
||||||
|
* path.
|
||||||
|
*/
|
||||||
|
g_free(s);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const QEMUFileOps socket_return_read_ops = {
|
||||||
|
.get_fd = socket_get_fd,
|
||||||
|
.get_buffer = socket_get_buffer,
|
||||||
|
.close = socket_return_close,
|
||||||
|
.shut_down = socket_shutdown,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const QEMUFileOps socket_return_write_ops = {
|
||||||
|
.get_fd = socket_get_fd,
|
||||||
|
.writev_buffer = socket_writev_buffer,
|
||||||
|
.close = socket_return_close,
|
||||||
|
.shut_down = socket_shutdown,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Give a QEMUFile* off the same socket but data in the opposite
|
||||||
|
* direction.
|
||||||
|
*/
|
||||||
|
static QEMUFile *socket_get_return_path(void *opaque)
|
||||||
|
{
|
||||||
|
QEMUFileSocket *forward = opaque;
|
||||||
|
QEMUFileSocket *reverse;
|
||||||
|
|
||||||
|
if (qemu_file_get_error(forward->file)) {
|
||||||
|
/* If the forward file is in error, don't try and open a return */
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
reverse = g_malloc0(sizeof(QEMUFileSocket));
|
||||||
|
reverse->fd = forward->fd;
|
||||||
|
/* I don't think there's a better way to tell which direction 'this' is */
|
||||||
|
if (forward->file->ops->get_buffer != NULL) {
|
||||||
|
/* being called from the read side, so we need to be able to write */
|
||||||
|
return qemu_fopen_ops(reverse, &socket_return_write_ops);
|
||||||
|
} else {
|
||||||
|
return qemu_fopen_ops(reverse, &socket_return_read_ops);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static ssize_t unix_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
|
static ssize_t unix_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
|
||||||
int64_t pos)
|
int64_t pos)
|
||||||
{
|
{
|
||||||
|
@ -209,15 +291,16 @@ static const QEMUFileOps socket_read_ops = {
|
||||||
.get_fd = socket_get_fd,
|
.get_fd = socket_get_fd,
|
||||||
.get_buffer = socket_get_buffer,
|
.get_buffer = socket_get_buffer,
|
||||||
.close = socket_close,
|
.close = socket_close,
|
||||||
.shut_down = socket_shutdown
|
.shut_down = socket_shutdown,
|
||||||
|
.get_return_path = socket_get_return_path
|
||||||
};
|
};
|
||||||
|
|
||||||
static const QEMUFileOps socket_write_ops = {
|
static const QEMUFileOps socket_write_ops = {
|
||||||
.get_fd = socket_get_fd,
|
.get_fd = socket_get_fd,
|
||||||
.writev_buffer = socket_writev_buffer,
|
.writev_buffer = socket_writev_buffer,
|
||||||
.close = socket_close,
|
.close = socket_close,
|
||||||
.shut_down = socket_shutdown
|
.shut_down = socket_shutdown,
|
||||||
|
.get_return_path = socket_get_return_path
|
||||||
};
|
};
|
||||||
|
|
||||||
QEMUFile *qemu_fopen_socket(int fd, const char *mode)
|
QEMUFile *qemu_fopen_socket(int fd, const char *mode)
|
||||||
|
|
|
@ -44,6 +44,18 @@ int qemu_file_shutdown(QEMUFile *f)
|
||||||
return f->ops->shut_down(f->opaque, true, true);
|
return f->ops->shut_down(f->opaque, true, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Result: QEMUFile* for a 'return path' for comms in the opposite direction
|
||||||
|
* NULL if not available
|
||||||
|
*/
|
||||||
|
QEMUFile *qemu_file_get_return_path(QEMUFile *f)
|
||||||
|
{
|
||||||
|
if (!f->ops->get_return_path) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return f->ops->get_return_path(f->opaque);
|
||||||
|
}
|
||||||
|
|
||||||
bool qemu_file_mode_is_not_valid(const char *mode)
|
bool qemu_file_mode_is_not_valid(const char *mode)
|
||||||
{
|
{
|
||||||
if (mode == NULL ||
|
if (mode == NULL ||
|
||||||
|
@ -433,6 +445,43 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
|
||||||
return done;
|
return done;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read 'size' bytes of data from the file.
|
||||||
|
* 'size' can be larger than the internal buffer.
|
||||||
|
*
|
||||||
|
* The data:
|
||||||
|
* may be held on an internal buffer (in which case *buf is updated
|
||||||
|
* to point to it) that is valid until the next qemu_file operation.
|
||||||
|
* OR
|
||||||
|
* will be copied to the *buf that was passed in.
|
||||||
|
*
|
||||||
|
* The code tries to avoid the copy if possible.
|
||||||
|
*
|
||||||
|
* It will return size bytes unless there was an error, in which case it will
|
||||||
|
* return as many as it managed to read (assuming blocking fd's which
|
||||||
|
* all current QEMUFile are)
|
||||||
|
*
|
||||||
|
* Note: Since **buf may get changed, the caller should take care to
|
||||||
|
* keep a pointer to the original buffer if it needs to deallocate it.
|
||||||
|
*/
|
||||||
|
size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
|
||||||
|
{
|
||||||
|
if (size < IO_BUF_SIZE) {
|
||||||
|
size_t res;
|
||||||
|
uint8_t *src;
|
||||||
|
|
||||||
|
res = qemu_peek_buffer(f, &src, size, 0);
|
||||||
|
|
||||||
|
if (res == size) {
|
||||||
|
qemu_file_skip(f, res);
|
||||||
|
*buf = src;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return qemu_get_buffer(f, *buf, size);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Peeks a single byte from the buffer; this isn't guaranteed to work if
|
* Peeks a single byte from the buffer; this isn't guaranteed to work if
|
||||||
* offset leaves a gap after the previous read/peeked data.
|
* offset leaves a gap after the previous read/peeked data.
|
||||||
|
@ -611,3 +660,18 @@ size_t qemu_get_counted_string(QEMUFile *f, char buf[256])
|
||||||
|
|
||||||
return res == len ? res : 0;
|
return res == len ? res : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set the blocking state of the QEMUFile.
|
||||||
|
* Note: On some transports the OS only keeps a single blocking state for
|
||||||
|
* both directions, and thus changing the blocking on the main
|
||||||
|
* QEMUFile can also affect the return path.
|
||||||
|
*/
|
||||||
|
void qemu_file_set_blocking(QEMUFile *f, bool block)
|
||||||
|
{
|
||||||
|
if (block) {
|
||||||
|
qemu_set_block(qemu_get_fd(f));
|
||||||
|
} else {
|
||||||
|
qemu_set_nonblock(qemu_get_fd(f));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
983
migration/ram.c
983
migration/ram.c
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -430,6 +430,8 @@
|
||||||
#
|
#
|
||||||
# @active: in the process of doing migration.
|
# @active: in the process of doing migration.
|
||||||
#
|
#
|
||||||
|
# @postcopy-active: like active, but now in postcopy mode. (since 2.5)
|
||||||
|
#
|
||||||
# @completed: migration is finished.
|
# @completed: migration is finished.
|
||||||
#
|
#
|
||||||
# @failed: some error occurred during migration process.
|
# @failed: some error occurred during migration process.
|
||||||
|
@ -439,7 +441,7 @@
|
||||||
##
|
##
|
||||||
{ 'enum': 'MigrationStatus',
|
{ 'enum': 'MigrationStatus',
|
||||||
'data': [ 'none', 'setup', 'cancelling', 'cancelled',
|
'data': [ 'none', 'setup', 'cancelling', 'cancelled',
|
||||||
'active', 'completed', 'failed' ] }
|
'active', 'postcopy-active', 'completed', 'failed' ] }
|
||||||
|
|
||||||
##
|
##
|
||||||
# @MigrationInfo
|
# @MigrationInfo
|
||||||
|
@ -540,11 +542,15 @@
|
||||||
# @auto-converge: If enabled, QEMU will automatically throttle down the guest
|
# @auto-converge: If enabled, QEMU will automatically throttle down the guest
|
||||||
# to speed up convergence of RAM migration. (since 1.6)
|
# to speed up convergence of RAM migration. (since 1.6)
|
||||||
#
|
#
|
||||||
|
# @x-postcopy-ram: Start executing on the migration target before all of RAM has
|
||||||
|
# been migrated, pulling the remaining pages along as needed. NOTE: If
|
||||||
|
# the migration fails during postcopy the VM will fail. (since 2.5)
|
||||||
|
#
|
||||||
# Since: 1.2
|
# Since: 1.2
|
||||||
##
|
##
|
||||||
{ 'enum': 'MigrationCapability',
|
{ 'enum': 'MigrationCapability',
|
||||||
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
|
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
|
||||||
'compress', 'events'] }
|
'compress', 'events', 'x-postcopy-ram'] }
|
||||||
|
|
||||||
##
|
##
|
||||||
# @MigrationCapabilityStatus
|
# @MigrationCapabilityStatus
|
||||||
|
@ -697,6 +703,14 @@
|
||||||
'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int',
|
'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int',
|
||||||
'*tls-port': 'int', '*cert-subject': 'str' } }
|
'*tls-port': 'int', '*cert-subject': 'str' } }
|
||||||
|
|
||||||
|
##
|
||||||
|
# @migrate-start-postcopy
|
||||||
|
#
|
||||||
|
# Switch migration to postcopy mode
|
||||||
|
#
|
||||||
|
# Since: 2.5
|
||||||
|
{ 'command': 'migrate-start-postcopy' }
|
||||||
|
|
||||||
##
|
##
|
||||||
# @MouseInfo:
|
# @MouseInfo:
|
||||||
#
|
#
|
||||||
|
|
|
@ -717,6 +717,25 @@ Example:
|
||||||
<- { "return": {} }
|
<- { "return": {} }
|
||||||
|
|
||||||
EQMP
|
EQMP
|
||||||
|
{
|
||||||
|
.name = "migrate-start-postcopy",
|
||||||
|
.args_type = "",
|
||||||
|
.mhandler.cmd_new = qmp_marshal_migrate_start_postcopy,
|
||||||
|
},
|
||||||
|
|
||||||
|
SQMP
|
||||||
|
migrate-start-postcopy
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
Switch an in-progress migration to postcopy mode. Ignored after the end of
|
||||||
|
migration (or once already in postcopy).
|
||||||
|
|
||||||
|
Example:
|
||||||
|
-> { "execute": "migrate-start-postcopy" }
|
||||||
|
<- { "return": {} }
|
||||||
|
|
||||||
|
EQMP
|
||||||
|
|
||||||
{
|
{
|
||||||
.name = "query-migrate-cache-size",
|
.name = "query-migrate-cache-size",
|
||||||
.args_type = "",
|
.args_type = "",
|
||||||
|
|
1
qtest.c
1
qtest.c
|
@ -657,7 +657,6 @@ void qtest_init(const char *qtest_chrdev, const char *qtest_log, Error **errp)
|
||||||
|
|
||||||
inbuf = g_string_new("");
|
inbuf = g_string_new("");
|
||||||
qtest_chr = chr;
|
qtest_chr = chr;
|
||||||
page_size_init();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool qtest_driver(void)
|
bool qtest_driver(void)
|
||||||
|
|
84
trace-events
84
trace-events
|
@ -1202,16 +1202,43 @@ virtio_gpu_fence_resp(uint64_t fence) "fence 0x%" PRIx64
|
||||||
|
|
||||||
# migration/savevm.c
|
# migration/savevm.c
|
||||||
qemu_loadvm_state_section(unsigned int section_type) "%d"
|
qemu_loadvm_state_section(unsigned int section_type) "%d"
|
||||||
|
qemu_loadvm_state_section_command(int ret) "%d"
|
||||||
qemu_loadvm_state_section_partend(uint32_t section_id) "%u"
|
qemu_loadvm_state_section_partend(uint32_t section_id) "%u"
|
||||||
|
qemu_loadvm_state_main(void) ""
|
||||||
|
qemu_loadvm_state_main_quit_parent(void) ""
|
||||||
|
qemu_loadvm_state_post_main(int ret) "%d"
|
||||||
qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u"
|
qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u"
|
||||||
|
qemu_savevm_send_packaged(void) ""
|
||||||
|
loadvm_handle_cmd_packaged(unsigned int length) "%u"
|
||||||
|
loadvm_handle_cmd_packaged_main(int ret) "%d"
|
||||||
|
loadvm_handle_cmd_packaged_received(int ret) "%d"
|
||||||
|
loadvm_postcopy_handle_advise(void) ""
|
||||||
|
loadvm_postcopy_handle_listen(void) ""
|
||||||
|
loadvm_postcopy_handle_run(void) ""
|
||||||
|
loadvm_postcopy_handle_run_cpu_sync(void) ""
|
||||||
|
loadvm_postcopy_handle_run_vmstart(void) ""
|
||||||
|
loadvm_postcopy_ram_handle_discard(void) ""
|
||||||
|
loadvm_postcopy_ram_handle_discard_end(void) ""
|
||||||
|
loadvm_postcopy_ram_handle_discard_header(const char *ramid, uint16_t len) "%s: %ud"
|
||||||
|
loadvm_process_command(uint16_t com, uint16_t len) "com=0x%x len=%d"
|
||||||
|
loadvm_process_command_ping(uint32_t val) "%x"
|
||||||
|
postcopy_ram_listen_thread_exit(void) ""
|
||||||
|
postcopy_ram_listen_thread_start(void) ""
|
||||||
|
qemu_savevm_send_postcopy_advise(void) ""
|
||||||
|
qemu_savevm_send_postcopy_ram_discard(const char *id, uint16_t len) "%s: %ud"
|
||||||
|
savevm_command_send(uint16_t command, uint16_t len) "com=0x%x len=%d"
|
||||||
savevm_section_start(const char *id, unsigned int section_id) "%s, section_id %u"
|
savevm_section_start(const char *id, unsigned int section_id) "%s, section_id %u"
|
||||||
savevm_section_end(const char *id, unsigned int section_id, int ret) "%s, section_id %u -> %d"
|
savevm_section_end(const char *id, unsigned int section_id, int ret) "%s, section_id %u -> %d"
|
||||||
savevm_section_skip(const char *id, unsigned int section_id) "%s, section_id %u"
|
savevm_section_skip(const char *id, unsigned int section_id) "%s, section_id %u"
|
||||||
|
savevm_send_open_return_path(void) ""
|
||||||
|
savevm_send_ping(uint32_t val) "%x"
|
||||||
|
savevm_send_postcopy_listen(void) ""
|
||||||
|
savevm_send_postcopy_run(void) ""
|
||||||
savevm_state_begin(void) ""
|
savevm_state_begin(void) ""
|
||||||
savevm_state_header(void) ""
|
savevm_state_header(void) ""
|
||||||
savevm_state_iterate(void) ""
|
savevm_state_iterate(void) ""
|
||||||
savevm_state_complete(void) ""
|
|
||||||
savevm_state_cleanup(void) ""
|
savevm_state_cleanup(void) ""
|
||||||
|
savevm_state_complete_precopy(void) ""
|
||||||
vmstate_save(const char *idstr, const char *vmsd_name) "%s, %s"
|
vmstate_save(const char *idstr, const char *vmsd_name) "%s, %s"
|
||||||
vmstate_load(const char *idstr, const char *vmsd_name) "%s, %s"
|
vmstate_load(const char *idstr, const char *vmsd_name) "%s, %s"
|
||||||
qemu_announce_self_iter(const char *mac) "%s"
|
qemu_announce_self_iter(const char *mac) "%s"
|
||||||
|
@ -1229,9 +1256,14 @@ vmstate_subsection_load_good(const char *parent) "%s"
|
||||||
qemu_file_fclose(void) ""
|
qemu_file_fclose(void) ""
|
||||||
|
|
||||||
# migration/ram.c
|
# migration/ram.c
|
||||||
|
get_queued_page(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr) "%s/%" PRIx64 " ram_addr=%" PRIx64
|
||||||
|
get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr, int sent) "%s/%" PRIx64 " ram_addr=%" PRIx64 " (sent=%d)"
|
||||||
migration_bitmap_sync_start(void) ""
|
migration_bitmap_sync_start(void) ""
|
||||||
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
|
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
|
||||||
migration_throttle(void) ""
|
migration_throttle(void) ""
|
||||||
|
ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x"
|
||||||
|
ram_postcopy_send_discard_bitmap(void) ""
|
||||||
|
ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: start: %zx len: %zx"
|
||||||
|
|
||||||
# hw/display/qxl.c
|
# hw/display/qxl.c
|
||||||
disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"
|
disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"
|
||||||
|
@ -1421,17 +1453,40 @@ flic_no_device_api(int err) "flic: no Device Contral API support %d"
|
||||||
flic_reset_failed(int err) "flic: reset failed %d"
|
flic_reset_failed(int err) "flic: reset failed %d"
|
||||||
|
|
||||||
# migration.c
|
# migration.c
|
||||||
|
await_return_path_close_on_source_close(void) ""
|
||||||
|
await_return_path_close_on_source_joining(void) ""
|
||||||
migrate_set_state(int new_state) "new state %d"
|
migrate_set_state(int new_state) "new state %d"
|
||||||
migrate_fd_cleanup(void) ""
|
migrate_fd_cleanup(void) ""
|
||||||
migrate_fd_error(void) ""
|
migrate_fd_error(void) ""
|
||||||
migrate_fd_cancel(void) ""
|
migrate_fd_cancel(void) ""
|
||||||
migrate_pending(uint64_t size, uint64_t max) "pending size %" PRIu64 " max %" PRIu64
|
migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) "in %s at %zx len %zx"
|
||||||
migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64
|
migrate_pending(uint64_t size, uint64_t max, uint64_t post, uint64_t nonpost) "pending size %" PRIu64 " max %" PRIu64 " (post=%" PRIu64 " nonpost=%" PRIu64 ")"
|
||||||
migrate_state_too_big(void) ""
|
migrate_send_rp_message(int msg_type, uint16_t len) "%d: len %d"
|
||||||
|
migration_completion_file_err(void) ""
|
||||||
|
migration_completion_postcopy_end(void) ""
|
||||||
|
migration_completion_postcopy_end_after_complete(void) ""
|
||||||
|
migration_completion_postcopy_end_before_rp(void) ""
|
||||||
|
migration_completion_postcopy_end_after_rp(int rp_error) "%d"
|
||||||
|
migration_thread_after_loop(void) ""
|
||||||
|
migration_thread_file_err(void) ""
|
||||||
|
migration_thread_setup_complete(void) ""
|
||||||
|
open_return_path_on_source(void) ""
|
||||||
|
open_return_path_on_source_continue(void) ""
|
||||||
|
postcopy_start(void) ""
|
||||||
|
postcopy_start_set_run(void) ""
|
||||||
|
source_return_path_thread_bad_end(void) ""
|
||||||
|
source_return_path_thread_end(void) ""
|
||||||
|
source_return_path_thread_entry(void) ""
|
||||||
|
source_return_path_thread_loop_top(void) ""
|
||||||
|
source_return_path_thread_pong(uint32_t val) "%x"
|
||||||
|
source_return_path_thread_shut(uint32_t val) "%x"
|
||||||
migrate_global_state_post_load(const char *state) "loaded state: %s"
|
migrate_global_state_post_load(const char *state) "loaded state: %s"
|
||||||
migrate_global_state_pre_save(const char *state) "saved state: %s"
|
migrate_global_state_pre_save(const char *state) "saved state: %s"
|
||||||
migration_completion_file_err(void) ""
|
|
||||||
migration_thread_low_pending(uint64_t pending) "%" PRIu64
|
migration_thread_low_pending(uint64_t pending) "%" PRIu64
|
||||||
|
migrate_state_too_big(void) ""
|
||||||
|
migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64
|
||||||
|
process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d"
|
||||||
|
process_incoming_migration_co_postcopy_end_main(void) ""
|
||||||
|
|
||||||
# migration/rdma.c
|
# migration/rdma.c
|
||||||
qemu_rdma_accept_incoming_migration(void) ""
|
qemu_rdma_accept_incoming_migration(void) ""
|
||||||
|
@ -1497,6 +1552,25 @@ rdma_start_incoming_migration_after_rdma_listen(void) ""
|
||||||
rdma_start_outgoing_migration_after_rdma_connect(void) ""
|
rdma_start_outgoing_migration_after_rdma_connect(void) ""
|
||||||
rdma_start_outgoing_migration_after_rdma_source_init(void) ""
|
rdma_start_outgoing_migration_after_rdma_source_init(void) ""
|
||||||
|
|
||||||
|
# migration/postcopy-ram.c
|
||||||
|
postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands"
|
||||||
|
postcopy_discard_send_range(const char *ramblock, unsigned long start, unsigned long length) "%s:%lx/%lx"
|
||||||
|
postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx"
|
||||||
|
postcopy_cleanup_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
|
||||||
|
postcopy_init_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
|
||||||
|
postcopy_nhp_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
|
||||||
|
postcopy_place_page(void *host_addr) "host=%p"
|
||||||
|
postcopy_place_page_zero(void *host_addr) "host=%p"
|
||||||
|
postcopy_ram_enable_notify(void) ""
|
||||||
|
postcopy_ram_fault_thread_entry(void) ""
|
||||||
|
postcopy_ram_fault_thread_exit(void) ""
|
||||||
|
postcopy_ram_fault_thread_quit(void) ""
|
||||||
|
postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset) "Request for HVA=%" PRIx64 " rb=%s offset=%zx"
|
||||||
|
postcopy_ram_incoming_cleanup_closeuf(void) ""
|
||||||
|
postcopy_ram_incoming_cleanup_entry(void) ""
|
||||||
|
postcopy_ram_incoming_cleanup_exit(void) ""
|
||||||
|
postcopy_ram_incoming_cleanup_join(void) ""
|
||||||
|
|
||||||
# kvm-all.c
|
# kvm-all.c
|
||||||
kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
|
kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
|
||||||
kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
|
kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
|
||||||
|
|
Loading…
Reference in a new issue