Merge remote-tracking branch 'kwolf/for-anthony' into staging

* kwolf/for-anthony:
  dataplane: handle misaligned virtio-blk requests
  dataplane: extract virtio-blk read/write processing into do_rdwr_cmd()
  block: make qiov_is_aligned() public
  raw-posix: fix bdrv_aio_ioctl
  sheepdog: implement direct write semantics
  block: do not probe zero-sized disks

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
Anthony Liguori 2013-01-14 10:26:26 -06:00
commit da758bd7a3
5 changed files with 120 additions and 68 deletions

18
block.c
View file

@ -527,7 +527,7 @@ static int find_image_format(BlockDriverState *bs, const char *filename,
int ret = 0;
/* Return the raw BlockDriver * to scsi-generic devices or empty drives */
if (bs->sg || !bdrv_is_inserted(bs)) {
if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
drv = bdrv_find_format("raw");
if (!drv) {
ret = -ENOENT;
@ -4313,6 +4313,22 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size)
return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
}
/*
* Check if all memory in this vector is sector aligned.
*/
bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
{
int i;
for (i = 0; i < qiov->niov; i++) {
if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
return false;
}
}
return true;
}
void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
{
int64_t bitmap_size;

View file

@ -430,22 +430,6 @@ static void raw_reopen_abort(BDRVReopenState *state)
#endif
*/
/*
* Check if all memory in this vector is sector aligned.
*/
static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
{
int i;
for (i = 0; i < qiov->niov; i++) {
if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
return 0;
}
}
return 1;
}
static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
{
int ret;
@ -455,15 +439,7 @@ static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
return -errno;
}
/*
* This looks weird, but the aio code only considers a request
* successful if it has written the full number of bytes.
*
* Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command,
* so in fact we return the ioctl command here to make posix_aio_read()
* happy..
*/
return aiocb->aio_nbytes;
return 0;
}
static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
@ -722,7 +698,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
* driver that it needs to copy the buffer.
*/
if ((bs->open_flags & BDRV_O_NOCACHE)) {
if (!qiov_is_aligned(bs, qiov)) {
if (!bdrv_qiov_is_aligned(bs, qiov)) {
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
} else if (s->use_aio) {

View file

@ -36,7 +36,8 @@
#define SD_FLAG_CMD_WRITE 0x01
#define SD_FLAG_CMD_COW 0x02
#define SD_FLAG_CMD_CACHE 0x04
#define SD_FLAG_CMD_CACHE 0x04 /* Writeback mode for cache */
#define SD_FLAG_CMD_DIRECT 0x08 /* Don't use cache */
#define SD_RES_SUCCESS 0x00 /* Success */
#define SD_RES_UNKNOWN 0x01 /* Unknown error */
@ -293,7 +294,7 @@ typedef struct BDRVSheepdogState {
char name[SD_MAX_VDI_LEN];
bool is_snapshot;
bool cache_enabled;
uint32_t cache_flags;
char *addr;
char *port;
@ -977,8 +978,8 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
hdr.flags = SD_FLAG_CMD_WRITE | flags;
}
if (s->cache_enabled) {
hdr.flags |= SD_FLAG_CMD_CACHE;
if (s->cache_flags) {
hdr.flags |= s->cache_flags;
}
hdr.oid = oid;
@ -1023,7 +1024,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
unsigned int datalen, uint64_t offset,
bool write, bool create, bool cache)
bool write, bool create, uint32_t cache_flags)
{
SheepdogObjReq hdr;
SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
@ -1047,9 +1048,7 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
hdr.opcode = SD_OP_READ_OBJ;
}
if (cache) {
hdr.flags |= SD_FLAG_CMD_CACHE;
}
hdr.flags |= cache_flags;
hdr.oid = oid;
hdr.data_length = datalen;
@ -1072,18 +1071,19 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
}
static int read_object(int fd, char *buf, uint64_t oid, int copies,
unsigned int datalen, uint64_t offset, bool cache)
unsigned int datalen, uint64_t offset,
uint32_t cache_flags)
{
return read_write_object(fd, buf, oid, copies, datalen, offset, false,
false, cache);
false, cache_flags);
}
static int write_object(int fd, char *buf, uint64_t oid, int copies,
unsigned int datalen, uint64_t offset, bool create,
bool cache)
uint32_t cache_flags)
{
return read_write_object(fd, buf, oid, copies, datalen, offset, true,
create, cache);
create, cache_flags);
}
static int sd_open(BlockDriverState *bs, const char *filename, int flags)
@ -1118,12 +1118,22 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
goto out;
}
s->cache_enabled = true;
s->flush_fd = connect_to_sdog(s->addr, s->port);
if (s->flush_fd < 0) {
error_report("failed to connect");
ret = s->flush_fd;
goto out;
/*
* QEMU block layer emulates writethrough cache as 'writeback + flush', so
* we always set SD_FLAG_CMD_CACHE (writeback cache) as default.
*/
s->cache_flags = SD_FLAG_CMD_CACHE;
if (flags & BDRV_O_NOCACHE) {
s->cache_flags = SD_FLAG_CMD_DIRECT;
}
if (s->cache_flags == SD_FLAG_CMD_CACHE) {
s->flush_fd = connect_to_sdog(s->addr, s->port);
if (s->flush_fd < 0) {
error_report("failed to connect");
ret = s->flush_fd;
goto out;
}
}
if (snapid || tag[0] != '\0') {
@ -1140,7 +1150,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
buf = g_malloc(SD_INODE_SIZE);
ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0,
s->cache_enabled);
s->cache_flags);
closesocket(fd);
@ -1387,7 +1397,7 @@ static void sd_close(BlockDriverState *bs)
qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL);
closesocket(s->fd);
if (s->cache_enabled) {
if (s->cache_flags) {
closesocket(s->flush_fd);
}
g_free(s->addr);
@ -1423,7 +1433,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
s->inode.vdi_size = offset;
ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
s->inode.nr_copies, datalen, 0, false, s->cache_enabled);
s->inode.nr_copies, datalen, 0, false, s->cache_flags);
close(fd);
if (ret < 0) {
@ -1506,7 +1516,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
}
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
SD_INODE_SIZE, 0, s->cache_enabled);
SD_INODE_SIZE, 0, s->cache_flags);
closesocket(fd);
@ -1707,7 +1717,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
int ret;
unsigned int wlen = 0, rlen = 0;
if (!s->cache_enabled) {
if (s->cache_flags != SD_FLAG_CMD_CACHE) {
return 0;
}
@ -1723,7 +1733,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
if (rsp->result == SD_RES_INVALID_PARMS) {
dprintf("disable write cache since the server doesn't support it\n");
s->cache_enabled = false;
s->cache_flags = SD_FLAG_CMD_DIRECT;
closesocket(s->flush_fd);
return 0;
}
@ -1774,7 +1784,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
}
ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
s->inode.nr_copies, datalen, 0, false, s->cache_enabled);
s->inode.nr_copies, datalen, 0, false, s->cache_flags);
if (ret < 0) {
error_report("failed to write snapshot's inode.");
goto cleanup;
@ -1791,7 +1801,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
inode = (SheepdogInode *)g_malloc(datalen);
ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid),
s->inode.nr_copies, datalen, 0, s->cache_enabled);
s->inode.nr_copies, datalen, 0, s->cache_flags);
if (ret < 0) {
error_report("failed to read new inode info. %s", strerror(errno));
@ -1845,7 +1855,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
buf = g_malloc(SD_INODE_SIZE);
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
SD_INODE_SIZE, 0, s->cache_enabled);
SD_INODE_SIZE, 0, s->cache_flags);
closesocket(fd);
@ -1942,7 +1952,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
/* we don't need to read entire object */
ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid),
0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
s->cache_enabled);
s->cache_flags);
if (ret) {
continue;
@ -2003,11 +2013,11 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
if (load) {
ret = read_object(fd, (char *)data, vmstate_oid,
s->inode.nr_copies, data_len, offset,
s->cache_enabled);
s->cache_flags);
} else {
ret = write_object(fd, (char *)data, vmstate_oid,
s->inode.nr_copies, data_len, offset, create,
s->cache_enabled);
s->cache_flags);
}
if (ret < 0) {

View file

@ -34,6 +34,8 @@ typedef struct {
struct iocb iocb; /* Linux AIO control block */
QEMUIOVector *inhdr; /* iovecs for virtio_blk_inhdr */
unsigned int head; /* vring descriptor index */
struct iovec *bounce_iov; /* used if guest buffers are unaligned */
QEMUIOVector *read_qiov; /* for read completion /w bounce buffer */
} VirtIOBlockRequest;
struct VirtIOBlockDataPlane {
@ -89,6 +91,18 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
trace_virtio_blk_data_plane_complete_request(s, req->head, ret);
if (req->read_qiov) {
assert(req->bounce_iov);
qemu_iovec_from_buf(req->read_qiov, 0, req->bounce_iov->iov_base, len);
qemu_iovec_destroy(req->read_qiov);
g_slice_free(QEMUIOVector, req->read_qiov);
}
if (req->bounce_iov) {
qemu_vfree(req->bounce_iov->iov_base);
g_slice_free(struct iovec, req->bounce_iov);
}
qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr));
qemu_iovec_destroy(req->inhdr);
g_slice_free(QEMUIOVector, req->inhdr);
@ -130,6 +144,48 @@ static void do_get_id_cmd(VirtIOBlockDataPlane *s,
complete_request_early(s, head, inhdr, VIRTIO_BLK_S_OK);
}
static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
struct iovec *iov, unsigned int iov_cnt,
long long offset, unsigned int head,
QEMUIOVector *inhdr)
{
struct iocb *iocb;
QEMUIOVector qiov;
struct iovec *bounce_iov = NULL;
QEMUIOVector *read_qiov = NULL;
qemu_iovec_init_external(&qiov, iov, iov_cnt);
if (!bdrv_qiov_is_aligned(s->blk->conf.bs, &qiov)) {
void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov.size);
if (read) {
/* Need to copy back from bounce buffer on completion */
read_qiov = g_slice_new(QEMUIOVector);
qemu_iovec_init(read_qiov, iov_cnt);
qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size);
} else {
qemu_iovec_to_buf(&qiov, 0, bounce_buffer, qiov.size);
}
/* Redirect I/O to aligned bounce buffer */
bounce_iov = g_slice_new(struct iovec);
bounce_iov->iov_base = bounce_buffer;
bounce_iov->iov_len = qiov.size;
iov = bounce_iov;
iov_cnt = 1;
}
iocb = ioq_rdwr(&s->ioqueue, read, iov, iov_cnt, offset);
/* Fill in virtio block metadata needed for completion */
VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
req->head = head;
req->inhdr = inhdr;
req->bounce_iov = bounce_iov;
req->read_qiov = read_qiov;
return 0;
}
static int process_request(IOQueue *ioq, struct iovec iov[],
unsigned int out_num, unsigned int in_num,
unsigned int head)
@ -139,7 +195,6 @@ static int process_request(IOQueue *ioq, struct iovec iov[],
struct virtio_blk_outhdr outhdr;
QEMUIOVector *inhdr;
size_t in_size;
struct iocb *iocb;
/* Copy in outhdr */
if (unlikely(iov_to_buf(iov, out_num, 0, &outhdr,
@ -167,12 +222,12 @@ static int process_request(IOQueue *ioq, struct iovec iov[],
switch (outhdr.type) {
case VIRTIO_BLK_T_IN:
iocb = ioq_rdwr(ioq, true, in_iov, in_num, outhdr.sector * 512);
break;
do_rdwr_cmd(s, true, in_iov, in_num, outhdr.sector * 512, head, inhdr);
return 0;
case VIRTIO_BLK_T_OUT:
iocb = ioq_rdwr(ioq, false, iov, out_num, outhdr.sector * 512);
break;
do_rdwr_cmd(s, false, iov, out_num, outhdr.sector * 512, head, inhdr);
return 0;
case VIRTIO_BLK_T_SCSI_CMD:
/* TODO support SCSI commands */
@ -198,12 +253,6 @@ static int process_request(IOQueue *ioq, struct iovec iov[],
g_slice_free(QEMUIOVector, inhdr);
return -EFAULT;
}
/* Fill in virtio block metadata needed for completion */
VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
req->head = head;
req->inhdr = inhdr;
return 0;
}
static void handle_notify(EventHandler *handler)

View file

@ -349,6 +349,7 @@ void bdrv_img_create(const char *filename, const char *fmt,
void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
void *qemu_blockalign(BlockDriverState *bs, size_t size);
bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
#define BDRV_SECTORS_PER_DIRTY_CHUNK 2048