Pull request

v2:
  * Drop merge failure from a previous pull request that broke virtio-blk on ARM
    guests
  * Add Parallels XML patch series
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJaZgqoAAoJEJykq7OBq3PIwNIIAKD8OwjeQdtznD88ikMGd5CF
 PvBHOIXIX7GCaKdAFEP1MMB0xaTN93zhphPZfcQxnnbi2LrnzuP2WCSunKPPcGQJ
 ToMRxYV+OkS0Rm8/us9fQpmBf2PKouIlNXP4jZZuEYAmyZgoU7YBQHYP1sw2K6RE
 MHPBdKMKzb+S8u+HBx+oy1LQ0cKKRjCYXhdQ4p7rlWkXc5irQQh9d12W/EHS++cd
 5wgX5V3aj4rXD4XwXY/kgDXtSG37sgNOHx77W7gDO3KuBolEBVvPvbP7yrCZzfcC
 pGLPKwXnHTxOKlwLQ0weD+uIsJWv6XGKLhgX+MFWCVU1PvPuSTWcqfBplgTUU6s=
 =mjmD
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging

Pull request

v2:
 * Drop merge failure from a previous pull request that broke virtio-blk on ARM
   guests
 * Add Parallels XML patch series

# gpg: Signature made Mon 22 Jan 2018 16:00:40 GMT
# gpg:                using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  block/parallels: add backing support to readv/writev
  block/parallels: replace some magic numbers
  block/parallels: move some structures into header
  configure: add dependency
  docs/interop/prl-xml: description of Parallels Disk format
  block: add block_set_io_throttle virtio-blk-pci QMP example

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2018-01-24 15:28:36 +00:00
commit 25bfd5a75f
7 changed files with 343 additions and 61 deletions

View file

@ -47,3 +47,5 @@ block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o
dmg-bz2.o-libs := $(BZIP2_LIBS)
qcow.o-libs := -lz
linux-aio.o-libs := -laio
parallels.o-cflags := $(LIBXML2_CFLAGS)
parallels.o-libs := $(LIBXML2_LIBS)

View file

@ -36,6 +36,7 @@
#include "qemu/bswap.h"
#include "qemu/bitmap.h"
#include "migration/blocker.h"
#include "parallels.h"
/**************************************************************/
@ -45,30 +46,6 @@
#define HEADER_INUSE_MAGIC (0x746F6E59)
#define MAX_PARALLELS_IMAGE_FACTOR (1ull << 32)
#define DEFAULT_CLUSTER_SIZE 1048576 /* 1 MiB */
// always little-endian
typedef struct ParallelsHeader {
char magic[16]; // "WithoutFreeSpace"
uint32_t version;
uint32_t heads;
uint32_t cylinders;
uint32_t tracks;
uint32_t bat_entries;
uint64_t nb_sectors;
uint32_t inuse;
uint32_t data_off;
char padding[12];
} QEMU_PACKED ParallelsHeader;
typedef enum ParallelsPreallocMode {
PRL_PREALLOC_MODE_FALLOCATE = 0,
PRL_PREALLOC_MODE_TRUNCATE = 1,
PRL_PREALLOC_MODE__MAX = 2,
} ParallelsPreallocMode;
static QEnumLookup prealloc_mode_lookup = {
.array = (const char *const[]) {
"falloc",
@ -77,34 +54,6 @@ static QEnumLookup prealloc_mode_lookup = {
.size = PRL_PREALLOC_MODE__MAX
};
typedef struct BDRVParallelsState {
/** Locking is conservative, the lock protects
* - image file extending (truncate, fallocate)
* - any access to block allocation table
*/
CoMutex lock;
ParallelsHeader *header;
uint32_t header_size;
bool header_unclean;
unsigned long *bat_dirty_bmap;
unsigned int bat_dirty_block;
uint32_t *bat_bitmap;
unsigned int bat_size;
int64_t data_end;
uint64_t prealloc_size;
ParallelsPreallocMode prealloc_mode;
unsigned int tracks;
unsigned int off_multiplier;
Error *migration_blocker;
} BDRVParallelsState;
#define PARALLELS_OPT_PREALLOC_MODE "prealloc-mode"
#define PARALLELS_OPT_PREALLOC_SIZE "prealloc-size"
@ -193,6 +142,7 @@ static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum)
{
int ret;
BDRVParallelsState *s = bs->opaque;
int64_t pos, space, idx, to_allocate, i, len;
@ -221,7 +171,6 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
return len;
}
if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) {
int ret;
space += s->prealloc_size;
if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
ret = bdrv_pwrite_zeroes(bs->file,
@ -237,6 +186,37 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
}
}
/* Try to read from backing to fill empty clusters
* FIXME: 1. previous write_zeroes may be redundant
* 2. most of data we read from backing will be rewritten by
* parallels_co_writev. On aligned-to-cluster write we do not need
* this read at all.
* 3. it would be good to combine write of data from backing and new
* data into one write call */
if (bs->backing) {
int64_t nb_cow_sectors = to_allocate * s->tracks;
int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS;
QEMUIOVector qiov;
struct iovec iov = {
.iov_len = nb_cow_bytes,
.iov_base = qemu_blockalign(bs, nb_cow_bytes)
};
qemu_iovec_init_external(&qiov, &iov, 1);
ret = bdrv_co_readv(bs->backing, idx * s->tracks, nb_cow_sectors,
&qiov);
if (ret < 0) {
qemu_vfree(iov.iov_base);
return ret;
}
ret = bdrv_co_writev(bs->file, s->data_end, nb_cow_sectors, &qiov);
qemu_vfree(iov.iov_base);
if (ret < 0) {
return ret;
}
}
for (i = 0; i < to_allocate; i++) {
s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
s->data_end += s->tracks;
@ -360,12 +340,19 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
nbytes = n << BDRV_SECTOR_BITS;
if (position < 0) {
qemu_iovec_memset(qiov, bytes_done, 0, nbytes);
} else {
qemu_iovec_reset(&hd_qiov);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
qemu_iovec_reset(&hd_qiov);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
if (position < 0) {
if (bs->backing) {
ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov);
if (ret < 0) {
break;
}
} else {
qemu_iovec_memset(&hd_qiov, 0, 0, nbytes);
}
} else {
ret = bdrv_co_readv(bs->file, position, n, &hd_qiov);
if (ret < 0) {
break;
@ -527,8 +514,9 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic));
header.version = cpu_to_le32(HEADER_VERSION);
/* don't care much about geometry, it is not used on image level */
header.heads = cpu_to_le32(16);
header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE / 16 / 32);
header.heads = cpu_to_le32(HEADS_NUMBER);
header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE
/ HEADS_NUMBER / SEC_IN_CYL);
header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS);
header.bat_entries = cpu_to_le32(bat_entries);
header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE));
@ -798,7 +786,7 @@ static BlockDriver bdrv_parallels = {
.bdrv_co_flush_to_os = parallels_co_flush_to_os,
.bdrv_co_readv = parallels_co_readv,
.bdrv_co_writev = parallels_co_writev,
.supports_backing = true,
.bdrv_create = parallels_create,
.bdrv_check = parallels_check,
.create_opts = &parallels_create_opts,

88
block/parallels.h Normal file
View file

@ -0,0 +1,88 @@
/*
* Block driver for Parallels disk image format
*
* Copyright (c) 2015-2017 Virtuozzo, Inc.
* Authors:
* 2016-2017 Klim S. Kireev <klim.kireev@virtuozzo.com>
* 2015 Denis V. Lunev <den@openvz.org>
*
* This code was originally based on comparing different disk images created
* by Parallels. Currently it is based on opened OpenVZ sources
* available at
* https://github.com/OpenVZ/ploop
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef BLOCK_PARALLELS_H
#define BLOCK_PARALLELS_H
#include "qemu/coroutine.h"
#include "qemu/typedefs.h"
#define HEADS_NUMBER 16
#define SEC_IN_CYL 32
#define DEFAULT_CLUSTER_SIZE 1048576 /* 1 MiB */
/* always little-endian */
typedef struct ParallelsHeader {
char magic[16]; /* "WithoutFreeSpace" */
uint32_t version;
uint32_t heads;
uint32_t cylinders;
uint32_t tracks;
uint32_t bat_entries;
uint64_t nb_sectors;
uint32_t inuse;
uint32_t data_off;
char padding[12];
} QEMU_PACKED ParallelsHeader;
typedef enum ParallelsPreallocMode {
PRL_PREALLOC_MODE_FALLOCATE = 0,
PRL_PREALLOC_MODE_TRUNCATE = 1,
PRL_PREALLOC_MODE__MAX = 2,
} ParallelsPreallocMode;
typedef struct BDRVParallelsState {
/** Locking is conservative, the lock protects
* - image file extending (truncate, fallocate)
* - any access to block allocation table
*/
CoMutex lock;
ParallelsHeader *header;
uint32_t header_size;
bool header_unclean;
unsigned long *bat_dirty_bmap;
unsigned int bat_dirty_block;
uint32_t *bat_bitmap;
unsigned int bat_size;
int64_t data_end;
uint64_t prealloc_size;
ParallelsPreallocMode prealloc_mode;
unsigned int tracks;
unsigned int off_multiplier;
Error *migration_blocker;
} BDRVParallelsState;
#endif

27
configure vendored
View file

@ -435,6 +435,7 @@ tcmalloc="no"
jemalloc="no"
replication="yes"
vxhs=""
libxml2=""
supported_cpu="no"
supported_os="no"
@ -1298,6 +1299,10 @@ for opt do
;;
--enable-numa) numa="yes"
;;
--disable-libxml2) libxml2="no"
;;
--enable-libxml2) libxml2="yes"
;;
--disable-tcmalloc) tcmalloc="no"
;;
--enable-tcmalloc) tcmalloc="yes"
@ -1573,6 +1578,7 @@ disabled with --disable-FEATURE, default is enabled if available:
tpm TPM support
libssh2 ssh block device support
numa libnuma support
libxml2 for Parallels image format
tcmalloc tcmalloc support
jemalloc jemalloc support
replication replication support
@ -3748,6 +3754,20 @@ EOF
fi
fi
##########################################
# libxml2 probe
if test "$libxml2" != "no" ; then
if $pkg_config --exists libxml-2.0; then
libxml2="yes"
libxml2_cflags=$($pkg_config --cflags libxml-2.0)
libxml2_libs=$($pkg_config --libs libxml-2.0)
else
if test "$libxml2" = "yes"; then
feature_not_found "libxml2" "Install libxml2 devel"
fi
libxml2="no"
fi
fi
##########################################
# glusterfs probe
@ -5630,6 +5650,7 @@ echo "lzo support $lzo"
echo "snappy support $snappy"
echo "bzip2 support $bzip2"
echo "NUMA host support $numa"
echo "libxml2 $libxml2"
echo "tcmalloc support $tcmalloc"
echo "jemalloc support $jemalloc"
echo "avx2 optimization $avx2_opt"
@ -6299,6 +6320,12 @@ if test "$have_rtnetlink" = "yes" ; then
echo "CONFIG_RTNETLINK=y" >> $config_host_mak
fi
if test "$libxml2" = "yes" ; then
echo "CONFIG_LIBXML2=y" >> $config_host_mak
echo "LIBXML2_CFLAGS=$libxml2_cflags" >> $config_host_mak
echo "LIBXML2_LIBS=$libxml2_libs" >> $config_host_mak
fi
if test "$replication" = "yes" ; then
echo "CONFIG_REPLICATION=y" >> $config_host_mak
fi

158
docs/interop/prl-xml.txt Normal file
View file

@ -0,0 +1,158 @@
= License =
Copyright (c) 2015-2017, Virtuozzo, Inc.
Authors:
2015 Denis Lunev <den@openvz.org>
2015 Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2016-2017 Klim Kireev <klim.kireev@virtuozzo.com>
2016-2017 Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com>
This work is licensed under the terms of the GNU GPL, version 2 or later.
See the COPYING file in the top-level directory.
This specification contains minimal information about Parallels Disk Format,
which is enough to proper work with QEMU. Nevertheless, Parallels Cloud Server
and Parallels Desktop are able to add some unspecified nodes to xml and use
them, but they are for internal work and don't affect functionality. Also it
uses auxiliary xml "Snapshot.xml", which allows to store optional snapshot
information, but it doesn't influence open/read/write functionality. QEMU and
other software should not use fields not covered in this document and
Snapshot.xml file and must leave them as is.
= Parallels Disk Format =
Parallels disk consists of two parts: the set of snapshots and the disk
descriptor file, which stores information about all files and snapshots.
== Definitions ==
Snapshot a record of the contents captured at a particular time,
capable of storing current state. A snapshot has UUID and
parent UUID.
Snapshot image an overlay representing the difference between this
snapshot and some earlier snapshot.
Overlay an image storing the different sectors between two captured
states.
Root image snapshot image with no parent, the root of snapshot tree.
Storage the backing storage for a subset of the virtual disk. When
there is more than one storage in a Parallels disk then that
is referred to as a split image. In this case every storage
covers specific address space area of the disk and has its
particular root image. Split images are not considered here
and are not supported. Each storage consists of disk
parameters and a list of images. The list of images always
contains a root image and may also contain overlays. The
root image can be an expandable Parallels image file or
plain. Overlays must be expandable.
Description DiskDescriptor.xml stores information about disk parameters,
file snapshots, storages.
Top The overlay between actual state and some previous snapshot.
Snapshot It is not a snapshot in the classical sense because it
serves as the active image that the guest writes to.
Sector a 512-byte data chunk.
== Description file ==
All information is placed in a single XML element Parallels_disk_image.
The element has only one attribute "Version", that must be 1.0.
Schema of DiskDescriptor.xml:
<Parallels_disk_image Version="1.0">
<Disk_Parameters>
...
</Disk_Parameters>
<StorageData>
...
</StorageData>
<Snapshots>
...
</Snapshots>
</Parallels_disk_image>
== Disk_Parameters element ==
The Disk_Parameters element describes the physical layout of the virtual disk
and some general settings.
The Disk_Parameters element MUST contain the following child elements:
* Disk_size - number of sectors in the disk,
desired size of the disk.
* Cylinders - number of the disk cylinders.
* Heads - number of the disk heads.
* Sectors - number of the disk sectors per cylinder
(sector size is 512 bytes)
Limitation: Product of the Heads, Sectors and Cylinders
values MUST be equal to the value of the Disk_size parameter.
* Padding - must be 0. Parallels Cloud Server and Parallels Desktop may
use padding set to 1, however this case is not covered
by this spec, QEMU and other software should not open
such disks and should not create them.
== StorageData element ==
This element of the file describes the root image and all snapshot images.
The StorageData element consists of the Storage child element, as shown below:
<StorageData>
<Storage>
...
</Storage>
</StorageData>
A Storage element has following child elements:
* Start - start sector of the storage, in case of non split storage
equals to 0.
* End - number of sector following the last sector, in case of non
split storage equals to Disk_size.
* Blocksize - storage cluster size, number of sectors per one cluster.
Cluster size for each "Compressed" (see below) image in
parallels disk must be equal to this field. Note: cluster
size for Parallels Expandable Image is in 'tracks' field of
its header (see docs/interop/parallels.txt).
* Several Image child elements.
Each Image element has following child elements:
* GUID - image identifier, UUID in curly brackets.
For instance, {12345678-9abc-def1-2345-6789abcdef12}.
The GUID is used by the Snapshots element to reference images
(see below)
* Type - image type of the element. It can be:
"Plain" for raw files.
"Compressed" for expanding disks.
* File - path to image file. Path can be relative to DiskDecriptor.xml or
absolute.
== Snapshots element ==
The Snapshots element describes the snapshot relations with the snapshot tree.
The element contains the set of Shot child elements, as shown below:
<Snapshots>
<TopGUID> ... </TopGUID> /* Optional child element */
<Shot>
...
</Shot>
<Shot>
...
</Shot>
...
</Snapshots>
Each Shot element contains the following child elements:
* GUID - an image GUID.
* ParentGUID - GUID of the image of the parent snapshot.
The software may traverse snapshots from child to parent using <ParentGUID>
field as reference. ParentGUID of root snapshot is
{00000000-0000-0000-0000-000000000000}. There should be only one root
snapshot. Top snapshot could be described via two ways: via TopGUID child
element of the Snapshots element or via predefined GUID
{5fbaabe3-6958-40ff-92a7-860e329aab41}. If TopGUID is defined, predefined GUID is
interpreted as usual GUID. All snapshot images (except Top Snapshot) should be
opened read-only. There is another predefined GUID,
BackupID = {704718e1-2314-44c8-9087-d78ed36b0f4e}, which is used by original and
some third-party software for backup, QEMU and other software may operate with
images with GUID = BackupID as usual, however, it is not recommended to use this
GUID for new disks. Top snapshot cannot have this GUID.

View file

@ -1799,6 +1799,24 @@
# Example:
#
# -> { "execute": "block_set_io_throttle",
# "arguments": { "id": "virtio-blk-pci0/virtio-backend",
# "bps": 0,
# "bps_rd": 0,
# "bps_wr": 0,
# "iops": 512,
# "iops_rd": 0,
# "iops_wr": 0,
# "bps_max": 0,
# "bps_rd_max": 0,
# "bps_wr_max": 0,
# "iops_max": 0,
# "iops_rd_max": 0,
# "iops_wr_max": 0,
# "bps_max_length": 0,
# "iops_size": 0 } }
# <- { "return": {} }
#
# -> { "execute": "block_set_io_throttle",
# "arguments": { "id": "ide0-1-0",
# "bps": 1000000,
# "bps_rd": 0,

View file

@ -265,6 +265,7 @@ our @typeList = (
qr{${Ident}_handler_fn},
qr{target_(?:u)?long},
qr{hwaddr},
qr{xml${Ident}},
);
# This can be modified by sub possible. Since it can be empty, be careful