From 1dde716ed6719c341c1bfa427781f0715af90cbc Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Thu, 21 Feb 2013 16:15:54 +0100 Subject: [PATCH 1/6] iscsi: retry read, write, flush and unmap on unit attention check conditions the storage might return a check condition status for various reasons. (e.g. bus reset, capacity change, thin-provisioning info etc.) currently all these informative status responses lead to an I/O error which is populated to the guest. this patch introduces a retry mechanism to avoid this. Signed-off-by: Peter Lieven Signed-off-by: Paolo Bonzini --- block/iscsi.c | 291 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 203 insertions(+), 88 deletions(-) diff --git a/block/iscsi.c b/block/iscsi.c index deb3b68890..439af6f217 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -60,8 +60,11 @@ typedef struct IscsiAIOCB { uint8_t *buf; int status; int canceled; + int retries; size_t read_size; size_t read_offset; + int64_t sector_num; + int nb_sectors; #ifdef __linux__ sg_io_hdr_t *ioh; #endif @@ -69,6 +72,7 @@ typedef struct IscsiAIOCB { #define NOP_INTERVAL 5000 #define MAX_NOP_FAILURES 3 +#define ISCSI_CMD_RETRIES 5 static void iscsi_bh_cb(void *p) @@ -191,6 +195,8 @@ iscsi_process_write(void *arg) iscsi_set_events(iscsilun); } +static int +iscsi_aio_writev_acb(IscsiAIOCB *acb); static void iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status, @@ -208,7 +214,19 @@ iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status, } acb->status = 0; - if (status < 0) { + if (status != 0) { + if (status == SCSI_STATUS_CHECK_CONDITION + && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION + && acb->retries-- > 0) { + if (acb->task != NULL) { + scsi_free_scsi_task(acb->task); + acb->task = NULL; + } + if (iscsi_aio_writev_acb(acb) == 0) { + iscsi_set_events(acb->iscsilun); + return; + } + } error_report("Failed to write16 data to iSCSI lun. %s", iscsi_get_error(iscsi)); acb->status = -EIO; @@ -222,15 +240,10 @@ static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun) return sector * BDRV_SECTOR_SIZE / iscsilun->block_size; } -static BlockDriverAIOCB * -iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, - void *opaque) +static int +iscsi_aio_writev_acb(IscsiAIOCB *acb) { - IscsiLun *iscsilun = bs->opaque; - struct iscsi_context *iscsi = iscsilun->iscsi; - IscsiAIOCB *acb; + struct iscsi_context *iscsi = acb->iscsilun->iscsi; size_t size; uint32_t num_sectors; uint64_t lba; @@ -239,19 +252,13 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num, #endif int ret; - acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); - trace_iscsi_aio_writev(iscsi, sector_num, nb_sectors, opaque, acb); - - acb->iscsilun = iscsilun; - acb->qiov = qiov; - acb->canceled = 0; acb->bh = NULL; acb->status = -EINPROGRESS; acb->buf = NULL; /* this will allow us to get rid of 'buf' completely */ - size = nb_sectors * BDRV_SECTOR_SIZE; + size = acb->nb_sectors * BDRV_SECTOR_SIZE; #if !defined(LIBISCSI_FEATURE_IOVECTOR) data.size = MIN(size, acb->qiov->size); @@ -270,48 +277,76 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num, if (acb->task == NULL) { error_report("iSCSI: Failed to allocate task for scsi WRITE16 " "command. %s", iscsi_get_error(iscsi)); - qemu_aio_release(acb); - return NULL; + return -1; } memset(acb->task, 0, sizeof(struct scsi_task)); acb->task->xfer_dir = SCSI_XFER_WRITE; acb->task->cdb_size = 16; acb->task->cdb[0] = 0x8a; - lba = sector_qemu2lun(sector_num, iscsilun); + lba = sector_qemu2lun(acb->sector_num, acb->iscsilun); *(uint32_t *)&acb->task->cdb[2] = htonl(lba >> 32); *(uint32_t *)&acb->task->cdb[6] = htonl(lba & 0xffffffff); - num_sectors = size / iscsilun->block_size; + num_sectors = size / acb->iscsilun->block_size; *(uint32_t *)&acb->task->cdb[10] = htonl(num_sectors); acb->task->expxferlen = size; #if defined(LIBISCSI_FEATURE_IOVECTOR) - ret = iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task, + ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task, iscsi_aio_write16_cb, NULL, acb); #else - ret = iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task, + ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task, iscsi_aio_write16_cb, &data, acb); #endif if (ret != 0) { - scsi_free_scsi_task(acb->task); g_free(acb->buf); - qemu_aio_release(acb); - return NULL; + return -1; } #if defined(LIBISCSI_FEATURE_IOVECTOR) scsi_task_set_iov_out(acb->task, (struct scsi_iovec*) acb->qiov->iov, acb->qiov->niov); #endif - iscsi_set_events(iscsilun); + return 0; +} +static BlockDriverAIOCB * +iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num, + QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque) +{ + IscsiLun *iscsilun = bs->opaque; + IscsiAIOCB *acb; + + acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); + trace_iscsi_aio_writev(iscsilun->iscsi, sector_num, nb_sectors, opaque, acb); + + acb->iscsilun = iscsilun; + acb->qiov = qiov; + acb->nb_sectors = nb_sectors; + acb->sector_num = sector_num; + acb->retries = ISCSI_CMD_RETRIES; + + if (iscsi_aio_writev_acb(acb) != 0) { + if (acb->task) { + scsi_free_scsi_task(acb->task); + } + qemu_aio_release(acb); + return NULL; + } + + iscsi_set_events(iscsilun); return &acb->common; } +static int +iscsi_aio_readv_acb(IscsiAIOCB *acb); + static void iscsi_aio_read16_cb(struct iscsi_context *iscsi, int status, void *command_data, void *opaque) @@ -326,6 +361,18 @@ iscsi_aio_read16_cb(struct iscsi_context *iscsi, int status, acb->status = 0; if (status != 0) { + if (status == SCSI_STATUS_CHECK_CONDITION + && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION + && acb->retries-- > 0) { + if (acb->task != NULL) { + scsi_free_scsi_task(acb->task); + acb->task = NULL; + } + if (iscsi_aio_readv_acb(acb) == 0) { + iscsi_set_events(acb->iscsilun); + return; + } + } error_report("Failed to read16 data from iSCSI lun. %s", iscsi_get_error(iscsi)); acb->status = -EIO; @@ -334,35 +381,20 @@ iscsi_aio_read16_cb(struct iscsi_context *iscsi, int status, iscsi_schedule_bh(acb); } -static BlockDriverAIOCB * -iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, - void *opaque) +static int +iscsi_aio_readv_acb(IscsiAIOCB *acb) { - IscsiLun *iscsilun = bs->opaque; - struct iscsi_context *iscsi = iscsilun->iscsi; - IscsiAIOCB *acb; - size_t qemu_read_size; + struct iscsi_context *iscsi = acb->iscsilun->iscsi; + uint64_t lba; + uint32_t num_sectors; + int ret; #if !defined(LIBISCSI_FEATURE_IOVECTOR) int i; #endif - int ret; - uint64_t lba; - uint32_t num_sectors; - - qemu_read_size = BDRV_SECTOR_SIZE * (size_t)nb_sectors; - - acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); - trace_iscsi_aio_readv(iscsi, sector_num, nb_sectors, opaque, acb); - - acb->iscsilun = iscsilun; - acb->qiov = qiov; acb->canceled = 0; acb->bh = NULL; acb->status = -EINPROGRESS; - acb->read_size = qemu_read_size; acb->buf = NULL; /* If LUN blocksize is bigger than BDRV_BLOCK_SIZE a read from QEMU @@ -370,30 +402,29 @@ iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num, * data. */ acb->read_offset = 0; - if (iscsilun->block_size > BDRV_SECTOR_SIZE) { - uint64_t bdrv_offset = BDRV_SECTOR_SIZE * sector_num; + if (acb->iscsilun->block_size > BDRV_SECTOR_SIZE) { + uint64_t bdrv_offset = BDRV_SECTOR_SIZE * acb->sector_num; - acb->read_offset = bdrv_offset % iscsilun->block_size; + acb->read_offset = bdrv_offset % acb->iscsilun->block_size; } - num_sectors = (qemu_read_size + iscsilun->block_size + num_sectors = (acb->read_size + acb->iscsilun->block_size + acb->read_offset - 1) - / iscsilun->block_size; + / acb->iscsilun->block_size; acb->task = malloc(sizeof(struct scsi_task)); if (acb->task == NULL) { error_report("iSCSI: Failed to allocate task for scsi READ16 " "command. %s", iscsi_get_error(iscsi)); - qemu_aio_release(acb); - return NULL; + return -1; } memset(acb->task, 0, sizeof(struct scsi_task)); acb->task->xfer_dir = SCSI_XFER_READ; - lba = sector_qemu2lun(sector_num, iscsilun); - acb->task->expxferlen = qemu_read_size; + lba = sector_qemu2lun(acb->sector_num, acb->iscsilun); + acb->task->expxferlen = acb->read_size; - switch (iscsilun->type) { + switch (acb->iscsilun->type) { case TYPE_DISK: acb->task->cdb_size = 16; acb->task->cdb[0] = 0x88; @@ -409,14 +440,12 @@ iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num, break; } - ret = iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task, + ret = iscsi_scsi_command_async(iscsi, acb->iscsilun->lun, acb->task, iscsi_aio_read16_cb, NULL, acb); if (ret != 0) { - scsi_free_scsi_task(acb->task); - qemu_aio_release(acb); - return NULL; + return -1; } #if defined(LIBISCSI_FEATURE_IOVECTOR) @@ -428,12 +457,42 @@ iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num, acb->qiov->iov[i].iov_base); } #endif + return 0; +} + +static BlockDriverAIOCB * +iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num, + QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque) +{ + IscsiLun *iscsilun = bs->opaque; + IscsiAIOCB *acb; + + acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); + trace_iscsi_aio_readv(iscsilun->iscsi, sector_num, nb_sectors, opaque, acb); + + acb->nb_sectors = nb_sectors; + acb->sector_num = sector_num; + acb->iscsilun = iscsilun; + acb->qiov = qiov; + acb->read_size = BDRV_SECTOR_SIZE * (size_t)acb->nb_sectors; + acb->retries = ISCSI_CMD_RETRIES; + + if (iscsi_aio_readv_acb(acb) != 0) { + if (acb->task) { + scsi_free_scsi_task(acb->task); + } + qemu_aio_release(acb); + return NULL; + } iscsi_set_events(iscsilun); - return &acb->common; } +static int +iscsi_aio_flush_acb(IscsiAIOCB *acb); static void iscsi_synccache10_cb(struct iscsi_context *iscsi, int status, @@ -446,7 +505,19 @@ iscsi_synccache10_cb(struct iscsi_context *iscsi, int status, } acb->status = 0; - if (status < 0) { + if (status != 0) { + if (status == SCSI_STATUS_CHECK_CONDITION + && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION + && acb->retries-- > 0) { + if (acb->task != NULL) { + scsi_free_scsi_task(acb->task); + acb->task = NULL; + } + if (iscsi_aio_flush_acb(acb) == 0) { + iscsi_set_events(acb->iscsilun); + return; + } + } error_report("Failed to sync10 data on iSCSI lun. %s", iscsi_get_error(iscsi)); acb->status = -EIO; @@ -455,29 +526,43 @@ iscsi_synccache10_cb(struct iscsi_context *iscsi, int status, iscsi_schedule_bh(acb); } -static BlockDriverAIOCB * -iscsi_aio_flush(BlockDriverState *bs, - BlockDriverCompletionFunc *cb, void *opaque) +static int +iscsi_aio_flush_acb(IscsiAIOCB *acb) { - IscsiLun *iscsilun = bs->opaque; - struct iscsi_context *iscsi = iscsilun->iscsi; - IscsiAIOCB *acb; + struct iscsi_context *iscsi = acb->iscsilun->iscsi; - acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); - - acb->iscsilun = iscsilun; acb->canceled = 0; acb->bh = NULL; acb->status = -EINPROGRESS; acb->buf = NULL; - acb->task = iscsi_synchronizecache10_task(iscsi, iscsilun->lun, + acb->task = iscsi_synchronizecache10_task(iscsi, acb->iscsilun->lun, 0, 0, 0, 0, iscsi_synccache10_cb, acb); if (acb->task == NULL) { error_report("iSCSI: Failed to send synchronizecache10 command. %s", iscsi_get_error(iscsi)); + return -1; + } + + return 0; +} + +static BlockDriverAIOCB * +iscsi_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + IscsiLun *iscsilun = bs->opaque; + + IscsiAIOCB *acb; + + acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); + + acb->iscsilun = iscsilun; + acb->retries = ISCSI_CMD_RETRIES; + + if (iscsi_aio_flush_acb(acb) != 0) { qemu_aio_release(acb); return NULL; } @@ -487,6 +572,8 @@ iscsi_aio_flush(BlockDriverState *bs, return &acb->common; } +static int iscsi_aio_discard_acb(IscsiAIOCB *acb); + static void iscsi_unmap_cb(struct iscsi_context *iscsi, int status, void *command_data, void *opaque) @@ -498,7 +585,19 @@ iscsi_unmap_cb(struct iscsi_context *iscsi, int status, } acb->status = 0; - if (status < 0) { + if (status != 0) { + if (status == SCSI_STATUS_CHECK_CONDITION + && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION + && acb->retries-- > 0) { + if (acb->task != NULL) { + scsi_free_scsi_task(acb->task); + acb->task = NULL; + } + if (iscsi_aio_discard_acb(acb) == 0) { + iscsi_set_events(acb->iscsilun); + return; + } + } error_report("Failed to unmap data on iSCSI lun. %s", iscsi_get_error(iscsi)); acb->status = -EIO; @@ -507,34 +606,50 @@ iscsi_unmap_cb(struct iscsi_context *iscsi, int status, iscsi_schedule_bh(acb); } -static BlockDriverAIOCB * -iscsi_aio_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) -{ - IscsiLun *iscsilun = bs->opaque; - struct iscsi_context *iscsi = iscsilun->iscsi; - IscsiAIOCB *acb; +static int iscsi_aio_discard_acb(IscsiAIOCB *acb) { + struct iscsi_context *iscsi = acb->iscsilun->iscsi; struct unmap_list list[1]; - acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); - - acb->iscsilun = iscsilun; acb->canceled = 0; acb->bh = NULL; acb->status = -EINPROGRESS; acb->buf = NULL; - list[0].lba = sector_qemu2lun(sector_num, iscsilun); - list[0].num = nb_sectors * BDRV_SECTOR_SIZE / iscsilun->block_size; + list[0].lba = sector_qemu2lun(acb->sector_num, acb->iscsilun); + list[0].num = acb->nb_sectors * BDRV_SECTOR_SIZE / acb->iscsilun->block_size; - acb->task = iscsi_unmap_task(iscsi, iscsilun->lun, + acb->task = iscsi_unmap_task(iscsi, acb->iscsilun->lun, 0, 0, &list[0], 1, iscsi_unmap_cb, acb); if (acb->task == NULL) { error_report("iSCSI: Failed to send unmap command. %s", iscsi_get_error(iscsi)); + return -1; + } + + return 0; +} + +static BlockDriverAIOCB * +iscsi_aio_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + IscsiLun *iscsilun = bs->opaque; + IscsiAIOCB *acb; + + acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); + + acb->iscsilun = iscsilun; + acb->nb_sectors = nb_sectors; + acb->sector_num = sector_num; + acb->retries = ISCSI_CMD_RETRIES; + + if (iscsi_aio_discard_acb(acb) != 0) { + if (acb->task) { + scsi_free_scsi_task(acb->task); + } qemu_aio_release(acb); return NULL; } From cb1b83e740384b4e0d950f3d7c81c02b8ce86c2e Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Mon, 18 Feb 2013 14:50:46 +0100 Subject: [PATCH 2/6] iscsi: add iscsi_truncate support this patch adds iscsi_truncate which effectively allows for online resizing of iscsi volumes. for this to work you have to resize the volume on your storage and then call block_resize command in qemu which will issue a readcapacity16 to update the capacity. v4: - factor out complete readcapacity logic into a separate function - handle capacity change check condition in readcapacity function (this happens if the block_resize cmd is the first iscsi task executed after a resize on the storage) v3: - remove switch statement in iscsi_open - create separate patch for brdv_drain_all() in bdrv_truncate() v2: - add a general bdrv_drain_all() before bdrv_truncate() to avoid in-flight AIOs while the device is truncated - since no AIOs are in flight we can use a sync libiscsi call to re-read the capacity - factor out the readcapacity16 logic as it is redundant to iscsi_open() and iscsi_truncate(). Signed-off-by: Peter Lieven [allow any type of unit attention check condition in iscsi_readcapacity_sync(), as in Message-ID: <51263A2A.6070304@dlhnet.de> - Paolo] Signed-off-by: Paolo Bonzini --- block/iscsi.c | 135 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 88 insertions(+), 47 deletions(-) diff --git a/block/iscsi.c b/block/iscsi.c index 439af6f217..3d529213ff 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -938,6 +938,71 @@ static void iscsi_nop_timed_event(void *opaque) } #endif +static int iscsi_readcapacity_sync(IscsiLun *iscsilun) +{ + struct scsi_task *task = NULL; + struct scsi_readcapacity10 *rc10 = NULL; + struct scsi_readcapacity16 *rc16 = NULL; + int ret = 0; + int retries = ISCSI_CMD_RETRIES; + +try_again: + switch (iscsilun->type) { + case TYPE_DISK: + task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun); + if (task == NULL || task->status != SCSI_STATUS_GOOD) { + if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION + && task->sense.key == SCSI_SENSE_UNIT_ATTENTION + && retries-- > 0) { + scsi_free_scsi_task(task); + goto try_again; + } + error_report("iSCSI: failed to send readcapacity16 command."); + ret = -EINVAL; + goto out; + } + rc16 = scsi_datain_unmarshall(task); + if (rc16 == NULL) { + error_report("iSCSI: Failed to unmarshall readcapacity16 data."); + ret = -EINVAL; + goto out; + } + iscsilun->block_size = rc16->block_length; + iscsilun->num_blocks = rc16->returned_lba + 1; + break; + case TYPE_ROM: + task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0); + if (task == NULL || task->status != SCSI_STATUS_GOOD) { + error_report("iSCSI: failed to send readcapacity10 command."); + ret = -EINVAL; + goto out; + } + rc10 = scsi_datain_unmarshall(task); + if (rc10 == NULL) { + error_report("iSCSI: Failed to unmarshall readcapacity10 data."); + ret = -EINVAL; + goto out; + } + iscsilun->block_size = rc10->block_size; + if (rc10->lba == 0) { + /* blank disk loaded */ + iscsilun->num_blocks = 0; + } else { + iscsilun->num_blocks = rc10->lba + 1; + } + break; + default: + break; + } + +out: + if (task) { + scsi_free_scsi_task(task); + } + + return ret; +} + /* * We support iscsi url's on the form * iscsi://[%@][:]// @@ -949,8 +1014,6 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags) struct iscsi_url *iscsi_url = NULL; struct scsi_task *task = NULL; struct scsi_inquiry_standard *inq = NULL; - struct scsi_readcapacity10 *rc10 = NULL; - struct scsi_readcapacity16 *rc16 = NULL; char *initiator_name = NULL; int ret; @@ -1040,50 +1103,9 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags) iscsilun->type = inq->periperal_device_type; - scsi_free_scsi_task(task); - - switch (iscsilun->type) { - case TYPE_DISK: - task = iscsi_readcapacity16_sync(iscsi, iscsilun->lun); - if (task == NULL || task->status != SCSI_STATUS_GOOD) { - error_report("iSCSI: failed to send readcapacity16 command."); - ret = -EINVAL; - goto out; - } - rc16 = scsi_datain_unmarshall(task); - if (rc16 == NULL) { - error_report("iSCSI: Failed to unmarshall readcapacity16 data."); - ret = -EINVAL; - goto out; - } - iscsilun->block_size = rc16->block_length; - iscsilun->num_blocks = rc16->returned_lba + 1; - break; - case TYPE_ROM: - task = iscsi_readcapacity10_sync(iscsi, iscsilun->lun, 0, 0); - if (task == NULL || task->status != SCSI_STATUS_GOOD) { - error_report("iSCSI: failed to send readcapacity10 command."); - ret = -EINVAL; - goto out; - } - rc10 = scsi_datain_unmarshall(task); - if (rc10 == NULL) { - error_report("iSCSI: Failed to unmarshall readcapacity10 data."); - ret = -EINVAL; - goto out; - } - iscsilun->block_size = rc10->block_size; - if (rc10->lba == 0) { - /* blank disk loaded */ - iscsilun->num_blocks = 0; - } else { - iscsilun->num_blocks = rc10->lba + 1; - } - break; - default: - break; + if ((ret = iscsi_readcapacity_sync(iscsilun)) != 0) { + goto out; } - bs->total_sectors = iscsilun->num_blocks * iscsilun->block_size / BDRV_SECTOR_SIZE ; @@ -1096,8 +1118,6 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags) bs->sg = 1; } - ret = 0; - #if defined(LIBISCSI_FEATURE_NOP_COUNTER) /* Set up a timer for sending out iSCSI NOPs */ iscsilun->nop_timer = qemu_new_timer_ms(rt_clock, iscsi_nop_timed_event, iscsilun); @@ -1138,6 +1158,26 @@ static void iscsi_close(BlockDriverState *bs) memset(iscsilun, 0, sizeof(IscsiLun)); } +static int iscsi_truncate(BlockDriverState *bs, int64_t offset) +{ + IscsiLun *iscsilun = bs->opaque; + int ret = 0; + + if (iscsilun->type != TYPE_DISK) { + return -ENOTSUP; + } + + if ((ret = iscsi_readcapacity_sync(iscsilun)) != 0) { + return ret; + } + + if (offset > iscsi_getlength(bs)) { + return -EINVAL; + } + + return 0; +} + static int iscsi_has_zero_init(BlockDriverState *bs) { return 0; @@ -1208,6 +1248,7 @@ static BlockDriver bdrv_iscsi = { .create_options = iscsi_create_options, .bdrv_getlength = iscsi_getlength, + .bdrv_truncate = iscsi_truncate, .bdrv_aio_readv = iscsi_aio_readv, .bdrv_aio_writev = iscsi_aio_writev, From 3c33ea9640758bb625e110a77673e5abfd184e54 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 18:14:28 +0100 Subject: [PATCH 3/6] iscsi: look for pkg-config file too Due to library conflicts, Fedora will have to put libiscsi in /usr/lib/iscsi. Simplify configuration by using a pkg-config file. The Fedora package will distribute one, and the patch to add it has been sent to upstream libiscsi as well. Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- configure | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/configure b/configure index 2f98c5a242..a9a7c9971b 100755 --- a/configure +++ b/configure @@ -2803,7 +2803,13 @@ if test "$libiscsi" != "no" ; then #include int main(void) { iscsi_unmap_sync(NULL,0,0,0,NULL,0); return 0; } EOF - if compile_prog "" "-liscsi" ; then + if $pkg_config --atleast-version=1.7.0 libiscsi --modversion >/dev/null 2>&1; then + libiscsi="yes" + libiscsi_cflags=$($pkg_config --cflags libiscsi 2>/dev/null) + libiscsi_libs=$($pkg_config --libs libiscsi 2>/dev/null) + CFLAGS="$CFLAGS $libiscsi_cflags" + LIBS="$LIBS $libiscsi_libs" + elif compile_prog "" "-liscsi" ; then libiscsi="yes" LIBS="$LIBS -liscsi" else From 6f6710aa99ac53b59ff0f14380830cb9ab6bdc14 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 25 Feb 2013 12:12:58 +0100 Subject: [PATCH 4/6] scsi: do not call scsi_read_data/scsi_write_data for a canceled request Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- hw/scsi-bus.c | 4 ++++ trace-events | 1 + 2 files changed, 5 insertions(+) diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c index a97f1cdc1c..01e1dec4ca 100644 --- a/hw/scsi-bus.c +++ b/hw/scsi-bus.c @@ -1508,6 +1508,10 @@ void scsi_req_unref(SCSIRequest *req) will start the next chunk or complete the command. */ void scsi_req_continue(SCSIRequest *req) { + if (req->io_canceled) { + trace_scsi_req_continue_canceled(req->dev->id, req->lun, req->tag); + return; + } trace_scsi_req_continue(req->dev->id, req->lun, req->tag); if (req->cmd.mode == SCSI_XFER_TO_DEV) { req->ops->write_data(req); diff --git a/trace-events b/trace-events index a27ae43032..3064fc7767 100644 --- a/trace-events +++ b/trace-events @@ -460,6 +460,7 @@ scsi_req_data(int target, int lun, int tag, int len) "target %d lun %d tag %d le scsi_req_data_canceled(int target, int lun, int tag, int len) "target %d lun %d tag %d len %d" scsi_req_dequeue(int target, int lun, int tag) "target %d lun %d tag %d" scsi_req_continue(int target, int lun, int tag) "target %d lun %d tag %d" +scsi_req_continue_canceled(int target, int lun, int tag) "target %d lun %d tag %d" scsi_req_parsed(int target, int lun, int tag, int cmd, int mode, int xfer) "target %d lun %d tag %d command %d dir %d length %d" scsi_req_parsed_lba(int target, int lun, int tag, int cmd, uint64_t lba) "target %d lun %d tag %d command %d lba %"PRIu64 scsi_req_parse_bad(int target, int lun, int tag, int cmd) "target %d lun %d tag %d command %d" From d0242eadc5bba4f3abe34bc5d536bbfb81aa9891 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 25 Feb 2013 12:14:34 +0100 Subject: [PATCH 5/6] scsi-disk: do not complete canceled UNMAP requests Canceled requests should never be completed, and doing that could cause accesses to a NULL hba_private field. Cc: qemu-stable@nongnu.org Reported-by: Stefan Priebe Tested-by: Stefan Priebe Signed-off-by: Paolo Bonzini --- hw/scsi-disk.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index d41158693e..6c0ddff2fb 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -1478,13 +1478,17 @@ static void scsi_unmap_complete(void *opaque, int ret) uint32_t nb_sectors; r->req.aiocb = NULL; + if (r->req.io_canceled) { + goto done; + } + if (ret < 0) { if (scsi_handle_rw_error(r, -ret)) { goto done; } } - if (data->count > 0 && !r->req.io_canceled) { + if (data->count > 0) { sector_num = ldq_be_p(&data->inbuf[0]); nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL; if (!check_lba_range(s, sector_num, nb_sectors)) { @@ -1501,10 +1505,9 @@ static void scsi_unmap_complete(void *opaque, int ret) return; } + scsi_req_complete(&r->req, GOOD); + done: - if (data->count == 0) { - scsi_req_complete(&r->req, GOOD); - } if (!r->req.io_canceled) { scsi_req_unref(&r->req); } From 0c92e0e6b64c9061f7365a2712b9055ea35b52f9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 25 Feb 2013 12:16:05 +0100 Subject: [PATCH 6/6] scsi-disk: handle io_canceled uniformly and correctly Always check it immediately after calling bdrv_acct_done, and always do a "goto done" in case the "done" label has to free some memory---as is the case for scsi_unmap_complete in the previous patch. This patch could fix problems that happen when a request is split into multiple parts, and one of them is canceled. Then the next part is fired, but the HBA's cancellation callbacks have fired already. Whether this happens or not, depends on how the block/ driver implements AIO cancellation. It it does a simple bdrv_drain_all() or similar, then it will not have a problem. If it only cancels the given AIOCB, this scenario could happen. Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- hw/scsi-disk.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index 6c0ddff2fb..4a0673c0bf 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -178,6 +178,9 @@ static void scsi_aio_complete(void *opaque, int ret) assert(r->req.aiocb != NULL); r->req.aiocb = NULL; bdrv_acct_done(s->qdev.conf.bs, &r->acct); + if (r->req.io_canceled) { + goto done; + } if (ret < 0) { if (scsi_handle_rw_error(r, -ret)) { @@ -223,6 +226,10 @@ static void scsi_write_do_fua(SCSIDiskReq *r) { SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + if (r->req.io_canceled) { + goto done; + } + if (scsi_is_cmd_fua(&r->req.cmd)) { bdrv_acct_start(s->qdev.conf.bs, &r->acct, 0, BDRV_ACCT_FLUSH); r->req.aiocb = bdrv_aio_flush(s->qdev.conf.bs, scsi_aio_complete, r); @@ -230,6 +237,8 @@ static void scsi_write_do_fua(SCSIDiskReq *r) } scsi_req_complete(&r->req, GOOD); + +done: if (!r->req.io_canceled) { scsi_req_unref(&r->req); } @@ -243,6 +252,9 @@ static void scsi_dma_complete(void *opaque, int ret) assert(r->req.aiocb != NULL); r->req.aiocb = NULL; bdrv_acct_done(s->qdev.conf.bs, &r->acct); + if (r->req.io_canceled) { + goto done; + } if (ret < 0) { if (scsi_handle_rw_error(r, -ret)) { @@ -274,6 +286,9 @@ static void scsi_read_complete(void * opaque, int ret) assert(r->req.aiocb != NULL); r->req.aiocb = NULL; bdrv_acct_done(s->qdev.conf.bs, &r->acct); + if (r->req.io_canceled) { + goto done; + } if (ret < 0) { if (scsi_handle_rw_error(r, -ret)) { @@ -305,6 +320,9 @@ static void scsi_do_read(void *opaque, int ret) r->req.aiocb = NULL; bdrv_acct_done(s->qdev.conf.bs, &r->acct); } + if (r->req.io_canceled) { + goto done; + } if (ret < 0) { if (scsi_handle_rw_error(r, -ret)) { @@ -312,10 +330,6 @@ static void scsi_do_read(void *opaque, int ret) } } - if (r->req.io_canceled) { - return; - } - /* The request is used as the AIO opaque value, so add a ref. */ scsi_req_ref(&r->req); @@ -423,6 +437,9 @@ static void scsi_write_complete(void * opaque, int ret) r->req.aiocb = NULL; bdrv_acct_done(s->qdev.conf.bs, &r->acct); } + if (r->req.io_canceled) { + goto done; + } if (ret < 0) { if (scsi_handle_rw_error(r, -ret)) {