From c2ebb05e25dcb3acbcdaa541234746151d0ff6b1 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Sun, 14 Sep 2014 20:29:59 +0100 Subject: [PATCH 01/59] block/vhdx.c: Mark parent_vhdx_guid variable as unused The parent_vhdx_guid variable is defined but never used, which provokes complaints from newer versions of clang. Since the variable definition is here acting as documentation of the image format, mark it with the 'unused' attribute to keep the compiler happy rather than simply deleting it. Signed-off-by: Peter Maydell Reviewed-by: Fam Zheng Reviewed-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/vhdx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/vhdx.c b/block/vhdx.c index 796b7bd884..d9aa2c14ed 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -99,7 +99,8 @@ static const MSGUID logical_sector_guid = { .data1 = 0x8141bf1d, /* Each parent type must have a valid GUID; this is for parent images * of type 'VHDX'. If we were to allow e.g. a QCOW2 parent, we would * need to make up our own QCOW2 GUID type */ -static const MSGUID parent_vhdx_guid = { .data1 = 0xb04aefb7, +static const MSGUID parent_vhdx_guid __attribute__((unused)) + = { .data1 = 0xb04aefb7, .data2 = 0xd19e, .data3 = 0x4a81, .data4 = { 0xb7, 0x89, 0x25, 0xb8, From d735b620b58f2fdfddc8e641e9feac3c9671a49d Mon Sep 17 00:00:00 2001 From: John Snow Date: Fri, 12 Sep 2014 23:51:12 -0400 Subject: [PATCH 02/59] ide/atapi: Mark non-data commands as complete When the command completion code in IDE and AHCI was unified to put all command completion inside of a callback, "cmd_done," we neglected to ensure that all AHCI/ATAPI command paths would eventually register as finished. for the PCI interface to IDE this is not a problem because cmd_done is a nop, but the AHCI implementation needs to send a D2H_REG_FIS and interrupt back to the guest to inform of completion. This patch adds calls to ide_stop_transfer, which calls ide_cmd_done, inside of ide_atapi_cmd_ok and ide_atapi_cmd_error. This fixes regressions observed by trying to boot QEMU with a Fedora 20 live CD under Q35/AHCI, which uses ATAPI command 0x00, which is a status check that may cause a hang because we never complete, and ATAPI command 0x56, which is unsupported by our current implementation and results in an error that we never report back to the guest. Signed-off-by: John Snow Reviewed-by: Paolo Bonzini Signed-off-by: Stefan Hajnoczi --- hw/ide/atapi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index 6d52cda4cc..10218dfa3a 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -136,6 +136,7 @@ void ide_atapi_cmd_ok(IDEState *s) s->error = 0; s->status = READY_STAT | SEEK_STAT; s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD; + ide_transfer_stop(s); ide_set_irq(s->bus); } @@ -149,6 +150,7 @@ void ide_atapi_cmd_error(IDEState *s, int sense_key, int asc) s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD; s->sense_key = sense_key; s->asc = asc; + ide_transfer_stop(s); ide_set_irq(s->bus); } @@ -176,9 +178,7 @@ void ide_atapi_cmd_reply_end(IDEState *s) #endif if (s->packet_transfer_size <= 0) { /* end of transfer */ - s->status = READY_STAT | SEEK_STAT; - s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD; - ide_transfer_stop(s); + ide_atapi_cmd_ok(s); ide_set_irq(s->bus); #ifdef DEBUG_IDE_ATAPI printf("status=0x%x\n", s->status); @@ -188,7 +188,6 @@ void ide_atapi_cmd_reply_end(IDEState *s) if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) { ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size); if (ret < 0) { - ide_transfer_stop(s); ide_atapi_io_error(s, ret); return; } From 6698c5bed290f3852c9f200d197b5d99211bc3cd Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 12 Sep 2014 12:08:49 +0200 Subject: [PATCH 03/59] aio-win32: fix uninitialized use of have_select_revents Always initialize it with the return value of aio_prepare. Reported-by: TeLeMan Signed-off-by: Paolo Bonzini Reviewed-by: Stefan Hajnoczi Reviewed-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- aio-win32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aio-win32.c b/aio-win32.c index 61e3d2ddfe..7daeae18f1 100644 --- a/aio-win32.c +++ b/aio-win32.c @@ -283,9 +283,9 @@ bool aio_poll(AioContext *ctx, bool blocking) int count; int timeout; - if (aio_prepare(ctx)) { + have_select_revents = aio_prepare(ctx); + if (have_select_revents) { blocking = false; - have_select_revents = true; } was_dispatching = ctx->dispatching; From 0d910cfeaf2076b116b4517166d5deb0fea76394 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:07 +0800 Subject: [PATCH 04/59] ide/ahci: Check for -ECANCELED in aio callbacks Before, bdrv_aio_cancel will either complete the request (like normal) and call CB with an actual return code, or skip calling the request (for example when the IO req is not submitted by thread pool yet). We will change bdrv_aio_cancel to do it differently: always call CB before return, with either [1] a normal req completion ret code, or [2] ret == -ECANCELED. So the callers' callback must accept both cases. The existing logic works with case [1], but not [2]. The simplest transition of callback code is do nothing in case [2], just as if the CB is not called by the bdrv_aio_cancel() call. Suggested-by: Paolo Bonzini Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- hw/ide/ahci.c | 3 +++ hw/ide/core.c | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index ba69de30e0..9e1a265127 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -791,6 +791,9 @@ static void ncq_cb(void *opaque, int ret) NCQTransferState *ncq_tfs = (NCQTransferState *)opaque; IDEState *ide_state = &ncq_tfs->drive->port.ifs[0]; + if (ret == -ECANCELED) { + return; + } /* Clear bit for this tag in SActive */ ncq_tfs->drive->port_regs.scr_act &= ~(1 << ncq_tfs->tag); diff --git a/hw/ide/core.c b/hw/ide/core.c index 6fba056783..f15c026a26 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -568,6 +568,9 @@ static void ide_sector_read_cb(void *opaque, int ret) s->pio_aiocb = NULL; s->status &= ~BUSY_STAT; + if (ret == -ECANCELED) { + return; + } block_acct_done(bdrv_get_stats(s->bs), &s->acct); if (ret != 0) { if (ide_handle_rw_error(s, -ret, IDE_RETRY_PIO | @@ -678,6 +681,9 @@ void ide_dma_cb(void *opaque, int ret) int64_t sector_num; bool stay_active = false; + if (ret == -ECANCELED) { + return; + } if (ret < 0) { int op = IDE_RETRY_DMA; @@ -803,6 +809,9 @@ static void ide_sector_write_cb(void *opaque, int ret) IDEState *s = opaque; int n; + if (ret == -ECANCELED) { + return; + } block_acct_done(bdrv_get_stats(s->bs), &s->acct); s->pio_aiocb = NULL; @@ -882,6 +891,9 @@ static void ide_flush_cb(void *opaque, int ret) s->pio_aiocb = NULL; + if (ret == -ECANCELED) { + return; + } if (ret < 0) { /* XXX: What sector number to set here? */ if (ide_handle_rw_error(s, -ret, IDE_RETRY_FLUSH)) { From f197fe2b2c77259b6570620f288d905bfa38e2da Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:08 +0800 Subject: [PATCH 05/59] block: Add refcnt in BlockDriverAIOCB This will be useful in synchronous cancel emulation with bdrv_aio_cancel_async. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block.c | 12 +++++++++++- include/block/aio.h | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index bcd952a4cb..7f73ff0af1 100644 --- a/block.c +++ b/block.c @@ -4891,13 +4891,23 @@ void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, acb->bs = bs; acb->cb = cb; acb->opaque = opaque; + acb->refcnt = 1; return acb; } +void qemu_aio_ref(void *p) +{ + BlockDriverAIOCB *acb = p; + acb->refcnt++; +} + void qemu_aio_release(void *p) { BlockDriverAIOCB *acb = p; - g_slice_free1(acb->aiocb_info->aiocb_size, acb); + assert(acb->refcnt > 0); + if (--acb->refcnt == 0) { + g_slice_free1(acb->aiocb_info->aiocb_size, acb); + } } /**************************************************************/ diff --git a/include/block/aio.h b/include/block/aio.h index 4603c0f066..2626fc745f 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -35,11 +35,13 @@ struct BlockDriverAIOCB { BlockDriverState *bs; BlockDriverCompletionFunc *cb; void *opaque; + int refcnt; }; void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque); void qemu_aio_release(void *p); +void qemu_aio_ref(void *p); typedef struct AioHandler AioHandler; typedef void QEMUBHFunc(void *opaque); From 02c50efe08736116048d5fc355043080f4d5859c Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:09 +0800 Subject: [PATCH 06/59] block: Add bdrv_aio_cancel_async This is the async version of bdrv_aio_cancel, which doesn't block the caller. It guarantees that the cb is called either before returning or some time later. bdrv_aio_cancel can base on bdrv_aio_cancel_async, later we can convert all .io_cancel implementations to .io_cancel_async, and the aio_poll is the common logic. In the end, .io_cancel can be dropped. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block.c | 27 ++++++++++++++++++++++++++- include/block/aio.h | 2 ++ include/block/block.h | 1 + 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 7f73ff0af1..a9a48df6d9 100644 --- a/block.c +++ b/block.c @@ -4640,7 +4640,32 @@ int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs) void bdrv_aio_cancel(BlockDriverAIOCB *acb) { - acb->aiocb_info->cancel(acb); + if (acb->aiocb_info->cancel) { + acb->aiocb_info->cancel(acb); + } else { + qemu_aio_ref(acb); + bdrv_aio_cancel_async(acb); + while (acb->refcnt > 1) { + if (acb->aiocb_info->get_aio_context) { + aio_poll(acb->aiocb_info->get_aio_context(acb), true); + } else if (acb->bs) { + aio_poll(bdrv_get_aio_context(acb->bs), true); + } else { + abort(); + } + } + qemu_aio_release(acb); + } +} + +/* Async version of aio cancel. The caller is not blocked if the acb implements + * cancel_async, otherwise we do nothing and let the request normally complete. + * In either case the completion callback must be called. */ +void bdrv_aio_cancel_async(BlockDriverAIOCB *acb) +{ + if (acb->aiocb_info->cancel_async) { + acb->aiocb_info->cancel_async(acb); + } } /**************************************************************/ diff --git a/include/block/aio.h b/include/block/aio.h index 2626fc745f..ad361e34c7 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -27,6 +27,8 @@ typedef void BlockDriverCompletionFunc(void *opaque, int ret); typedef struct AIOCBInfo { void (*cancel)(BlockDriverAIOCB *acb); + void (*cancel_async)(BlockDriverAIOCB *acb); + AioContext *(*get_aio_context)(BlockDriverAIOCB *acb); size_t aiocb_size; } AIOCBInfo; diff --git a/include/block/block.h b/include/block/block.h index 07d6d8e67e..3318f0dfbe 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -337,6 +337,7 @@ BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); void bdrv_aio_cancel(BlockDriverAIOCB *acb); +void bdrv_aio_cancel_async(BlockDriverAIOCB *acb); typedef struct BlockRequest { /* Fields to be filled by multiwrite caller */ From 3acabd685e2d2b153044e5bf472343e7f4c8177b Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:10 +0800 Subject: [PATCH 07/59] block: Drop bdrv_em_co_aiocb_info.cancel Also drop the now unused ->done pointer. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/block.c b/block.c index a9a48df6d9..1ce7b99082 100644 --- a/block.c +++ b/block.c @@ -4763,22 +4763,8 @@ typedef struct BlockDriverAIOCBCoroutine { QEMUBH* bh; } BlockDriverAIOCBCoroutine; -static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb) -{ - AioContext *aio_context = bdrv_get_aio_context(blockacb->bs); - BlockDriverAIOCBCoroutine *acb = - container_of(blockacb, BlockDriverAIOCBCoroutine, common); - bool done = false; - - acb->done = &done; - while (!done) { - aio_poll(aio_context, true); - } -} - static const AIOCBInfo bdrv_em_co_aiocb_info = { .aiocb_size = sizeof(BlockDriverAIOCBCoroutine), - .cancel = bdrv_aio_co_cancel_em, }; static void bdrv_co_em_bh(void *opaque) @@ -4787,10 +4773,6 @@ static void bdrv_co_em_bh(void *opaque) acb->common.cb(acb->common.opaque, acb->req.error); - if (acb->done) { - *acb->done = true; - } - qemu_bh_delete(acb->bh); qemu_aio_release(acb); } @@ -4831,7 +4813,6 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, acb->req.qiov = qiov; acb->req.flags = flags; acb->is_write = is_write; - acb->done = NULL; co = qemu_coroutine_create(bdrv_co_do_rw); qemu_coroutine_enter(co, acb); @@ -4858,7 +4839,6 @@ BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, BlockDriverAIOCBCoroutine *acb; acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); - acb->done = NULL; co = qemu_coroutine_create(bdrv_aio_flush_co_entry); qemu_coroutine_enter(co, acb); @@ -4888,7 +4868,6 @@ BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs, acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); acb->req.sector = sector_num; acb->req.nb_sectors = nb_sectors; - acb->done = NULL; co = qemu_coroutine_create(bdrv_aio_discard_co_entry); qemu_coroutine_enter(co, acb); From f600ac19027e67dd73f1adad50a67a2cdb4f3218 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:11 +0800 Subject: [PATCH 08/59] block: Drop bdrv_em_aiocb_info.cancel Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/block.c b/block.c index 1ce7b99082..45d3a5ba88 100644 --- a/block.c +++ b/block.c @@ -4681,18 +4681,8 @@ typedef struct BlockDriverAIOCBSync { int is_write; } BlockDriverAIOCBSync; -static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb) -{ - BlockDriverAIOCBSync *acb = - container_of(blockacb, BlockDriverAIOCBSync, common); - qemu_bh_delete(acb->bh); - acb->bh = NULL; - qemu_aio_release(acb); -} - static const AIOCBInfo bdrv_em_aiocb_info = { .aiocb_size = sizeof(BlockDriverAIOCBSync), - .cancel = bdrv_aio_cancel_em, }; static void bdrv_aio_bh_cb(void *opaque) From 3391f5e51c4b55bf72fd33d9bc542a49114e48cd Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:12 +0800 Subject: [PATCH 09/59] thread-pool: Convert thread_pool_aiocb_info.cancel to cancel_async The .cancel_async shares the same the first half with .cancel: try to steal the request if not submitted yet. In this case set the elem to THREAD_DONE status and ret to -ECANCELED, which means thread_pool_completion_bh will call the cb with -ECANCELED. If the request is already submitted, do nothing, as we know the normal completion will happen in the future. Testing code update: Before, done_cb is only called if the request is already submitted by thread pool. Now done_cb is always called, even before it is submitted, because we emulate bdrv_aio_cancel with bdrv_aio_cancel_async. So also update the test criteria accordingly. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- tests/test-thread-pool.c | 34 ++++++++++++++++++++++++++-------- thread-pool.c | 32 ++++++++++++++------------------ 2 files changed, 40 insertions(+), 26 deletions(-) diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c index f40b7fc170..ed2b25b8eb 100644 --- a/tests/test-thread-pool.c +++ b/tests/test-thread-pool.c @@ -33,7 +33,7 @@ static int long_cb(void *opaque) static void done_cb(void *opaque, int ret) { WorkerTestData *data = opaque; - g_assert_cmpint(data->ret, ==, -EINPROGRESS); + g_assert(data->ret == -EINPROGRESS || data->ret == -ECANCELED); data->ret = ret; data->aiocb = NULL; @@ -132,7 +132,7 @@ static void test_submit_many(void) } } -static void test_cancel(void) +static void do_test_cancel(bool sync) { WorkerTestData data[100]; int num_canceled; @@ -170,18 +170,25 @@ static void test_cancel(void) for (i = 0; i < 100; i++) { if (atomic_cmpxchg(&data[i].n, 0, 3) == 0) { data[i].ret = -ECANCELED; - bdrv_aio_cancel(data[i].aiocb); - active--; + if (sync) { + bdrv_aio_cancel(data[i].aiocb); + } else { + bdrv_aio_cancel_async(data[i].aiocb); + } num_canceled++; } } g_assert_cmpint(active, >, 0); g_assert_cmpint(num_canceled, <, 100); - /* Canceling the others will be a blocking operation. */ for (i = 0; i < 100; i++) { if (data[i].aiocb && data[i].n != 3) { - bdrv_aio_cancel(data[i].aiocb); + if (sync) { + /* Canceling the others will be a blocking operation. */ + bdrv_aio_cancel(data[i].aiocb); + } else { + bdrv_aio_cancel_async(data[i].aiocb); + } } } @@ -193,15 +200,25 @@ static void test_cancel(void) for (i = 0; i < 100; i++) { if (data[i].n == 3) { g_assert_cmpint(data[i].ret, ==, -ECANCELED); - g_assert(data[i].aiocb != NULL); + g_assert(data[i].aiocb == NULL); } else { g_assert_cmpint(data[i].n, ==, 2); - g_assert_cmpint(data[i].ret, ==, 0); + g_assert(data[i].ret == 0 || data[i].ret == -ECANCELED); g_assert(data[i].aiocb == NULL); } } } +static void test_cancel(void) +{ + do_test_cancel(true); +} + +static void test_cancel_async(void) +{ + do_test_cancel(false); +} + int main(int argc, char **argv) { int ret; @@ -217,6 +234,7 @@ int main(int argc, char **argv) g_test_add_func("/thread-pool/submit-co", test_submit_co); g_test_add_func("/thread-pool/submit-many", test_submit_many); g_test_add_func("/thread-pool/cancel", test_cancel); + g_test_add_func("/thread-pool/cancel-async", test_cancel_async); ret = g_test_run(); diff --git a/thread-pool.c b/thread-pool.c index bc07d7a1c9..bb1395cff8 100644 --- a/thread-pool.c +++ b/thread-pool.c @@ -31,7 +31,6 @@ enum ThreadState { THREAD_QUEUED, THREAD_ACTIVE, THREAD_DONE, - THREAD_CANCELED, }; struct ThreadPoolElement { @@ -58,7 +57,6 @@ struct ThreadPool { AioContext *ctx; QEMUBH *completion_bh; QemuMutex lock; - QemuCond check_cancel; QemuCond worker_stopped; QemuSemaphore sem; int max_threads; @@ -73,7 +71,6 @@ struct ThreadPool { int idle_threads; int new_threads; /* backlog of threads we need to create */ int pending_threads; /* threads created but not running yet */ - int pending_cancellations; /* whether we need a cond_broadcast */ bool stopping; }; @@ -113,9 +110,6 @@ static void *worker_thread(void *opaque) req->state = THREAD_DONE; qemu_mutex_lock(&pool->lock); - if (pool->pending_cancellations) { - qemu_cond_broadcast(&pool->check_cancel); - } qemu_bh_schedule(pool->completion_bh); } @@ -173,7 +167,7 @@ static void thread_pool_completion_bh(void *opaque) restart: QLIST_FOREACH_SAFE(elem, &pool->head, all, next) { - if (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) { + if (elem->state != THREAD_DONE) { continue; } if (elem->state == THREAD_DONE) { @@ -217,22 +211,26 @@ static void thread_pool_cancel(BlockDriverAIOCB *acb) */ qemu_sem_timedwait(&pool->sem, 0) == 0) { QTAILQ_REMOVE(&pool->request_list, elem, reqs); - elem->state = THREAD_CANCELED; qemu_bh_schedule(pool->completion_bh); - } else { - pool->pending_cancellations++; - while (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) { - qemu_cond_wait(&pool->check_cancel, &pool->lock); - } - pool->pending_cancellations--; + + elem->state = THREAD_DONE; + elem->ret = -ECANCELED; } + qemu_mutex_unlock(&pool->lock); - thread_pool_completion_bh(pool); +} + +static AioContext *thread_pool_get_aio_context(BlockDriverAIOCB *acb) +{ + ThreadPoolElement *elem = (ThreadPoolElement *)acb; + ThreadPool *pool = elem->pool; + return pool->ctx; } static const AIOCBInfo thread_pool_aiocb_info = { .aiocb_size = sizeof(ThreadPoolElement), - .cancel = thread_pool_cancel, + .cancel_async = thread_pool_cancel, + .get_aio_context = thread_pool_get_aio_context, }; BlockDriverAIOCB *thread_pool_submit_aio(ThreadPool *pool, @@ -299,7 +297,6 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) pool->ctx = ctx; pool->completion_bh = aio_bh_new(ctx, thread_pool_completion_bh, pool); qemu_mutex_init(&pool->lock); - qemu_cond_init(&pool->check_cancel); qemu_cond_init(&pool->worker_stopped); qemu_sem_init(&pool->sem, 0); pool->max_threads = 64; @@ -342,7 +339,6 @@ void thread_pool_free(ThreadPool *pool) qemu_bh_delete(pool->completion_bh); qemu_sem_destroy(&pool->sem); - qemu_cond_destroy(&pool->check_cancel); qemu_cond_destroy(&pool->worker_stopped); qemu_mutex_destroy(&pool->lock); g_free(pool); From 771b64daf9c73be98d223d3ab101a61f02cac277 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:13 +0800 Subject: [PATCH 10/59] linux-aio: Convert laio_aiocb_info.cancel to .cancel_async Just call io_cancel (2), if it fails, it means the request is not canceled, so the event loop will eventually call qemu_laio_process_completion. In qemu_laio_process_completion, change to call the cb unconditionally. It is required by bdrv_aio_cancel_async. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block/linux-aio.c | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/block/linux-aio.c b/block/linux-aio.c index 9aca758b10..b67f56cda3 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -85,9 +85,8 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s, ret = -EINVAL; } } - - laiocb->common.cb(laiocb->common.opaque, ret); } + laiocb->common.cb(laiocb->common.opaque, ret); qemu_aio_release(laiocb); } @@ -153,35 +152,22 @@ static void laio_cancel(BlockDriverAIOCB *blockacb) struct io_event event; int ret; - if (laiocb->ret != -EINPROGRESS) + if (laiocb->ret != -EINPROGRESS) { return; - - /* - * Note that as of Linux 2.6.31 neither the block device code nor any - * filesystem implements cancellation of AIO request. - * Thus the polling loop below is the normal code path. - */ + } ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event); - if (ret == 0) { - laiocb->ret = -ECANCELED; + laiocb->ret = -ECANCELED; + if (ret != 0) { + /* iocb is not cancelled, cb will be called by the event loop later */ return; } - /* - * We have to wait for the iocb to finish. - * - * The only way to get the iocb status update is by polling the io context. - * We might be able to do this slightly more optimal by removing the - * O_NONBLOCK flag. - */ - while (laiocb->ret == -EINPROGRESS) { - qemu_laio_completion_cb(&laiocb->ctx->e); - } + laiocb->common.cb(laiocb->common.opaque, laiocb->ret); } static const AIOCBInfo laio_aiocb_info = { .aiocb_size = sizeof(struct qemu_laiocb), - .cancel = laio_cancel, + .cancel_async = laio_cancel, }; static void ioq_init(LaioQueue *io_q) From 9bb9da46d6639ded9406e4d8902033c9d97b0006 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:14 +0800 Subject: [PATCH 11/59] dma: Convert dma_aiocb_info.cancel to .cancel_async Just forward the request to bdrv_aio_cancel_async. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- dma-helpers.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/dma-helpers.c b/dma-helpers.c index f6811fa86b..207b21ef6f 100644 --- a/dma-helpers.c +++ b/dma-helpers.c @@ -73,7 +73,6 @@ typedef struct { QEMUSGList *sg; uint64_t sector_num; DMADirection dir; - bool in_cancel; int sg_cur_index; dma_addr_t sg_cur_byte; QEMUIOVector iov; @@ -125,12 +124,7 @@ static void dma_complete(DMAAIOCB *dbs, int ret) qemu_bh_delete(dbs->bh); dbs->bh = NULL; } - if (!dbs->in_cancel) { - /* Requests may complete while dma_aio_cancel is in progress. In - * this case, the AIOCB should not be released because it is still - * referenced by dma_aio_cancel. */ - qemu_aio_release(dbs); - } + qemu_aio_release(dbs); } static void dma_bdrv_cb(void *opaque, int ret) @@ -186,19 +180,14 @@ static void dma_aio_cancel(BlockDriverAIOCB *acb) trace_dma_aio_cancel(dbs); if (dbs->acb) { - BlockDriverAIOCB *acb = dbs->acb; - dbs->acb = NULL; - dbs->in_cancel = true; - bdrv_aio_cancel(acb); - dbs->in_cancel = false; + bdrv_aio_cancel_async(dbs->acb); } - dbs->common.cb = NULL; - dma_complete(dbs, 0); } + static const AIOCBInfo dma_aiocb_info = { .aiocb_size = sizeof(DMAAIOCB), - .cancel = dma_aio_cancel, + .cancel_async = dma_aio_cancel, }; BlockDriverAIOCB *dma_bdrv_io( @@ -217,7 +206,6 @@ BlockDriverAIOCB *dma_bdrv_io( dbs->sg_cur_index = 0; dbs->sg_cur_byte = 0; dbs->dir = dir; - dbs->in_cancel = false; dbs->io_func = io_func; dbs->bh = NULL; qemu_iovec_init(&dbs->iov, sg->nsg); From 722d93335f7cda3065efc17763fd44dd6b244ed1 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:15 +0800 Subject: [PATCH 12/59] iscsi: Convert iscsi_aiocb_info.cancel to .cancel_async Also drop the unused field "canceled". Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block/iscsi.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/block/iscsi.c b/block/iscsi.c index 84bcae89fa..f8328d64eb 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -88,7 +88,6 @@ typedef struct IscsiAIOCB { struct scsi_task *task; uint8_t *buf; int status; - int canceled; int64_t sector_num; int nb_sectors; #ifdef __linux__ @@ -120,9 +119,7 @@ iscsi_bh_cb(void *p) g_free(acb->buf); acb->buf = NULL; - if (acb->canceled == 0) { - acb->common.cb(acb->common.opaque, acb->status); - } + acb->common.cb(acb->common.opaque, acb->status); if (acb->task != NULL) { scsi_free_scsi_task(acb->task); @@ -240,20 +237,15 @@ iscsi_aio_cancel(BlockDriverAIOCB *blockacb) return; } - acb->canceled = 1; - /* send a task mgmt call to the target to cancel the task on the target */ iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task, iscsi_abort_task_cb, acb); - while (acb->status == -EINPROGRESS) { - aio_poll(iscsilun->aio_context, true); - } } static const AIOCBInfo iscsi_aiocb_info = { .aiocb_size = sizeof(IscsiAIOCB), - .cancel = iscsi_aio_cancel, + .cancel_async = iscsi_aio_cancel, }; @@ -638,10 +630,6 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status, g_free(acb->buf); acb->buf = NULL; - if (acb->canceled != 0) { - return; - } - acb->status = 0; if (status < 0) { error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s", @@ -683,7 +671,6 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); acb->iscsilun = iscsilun; - acb->canceled = 0; acb->bh = NULL; acb->status = -EINPROGRESS; acb->buf = NULL; From 4743b42a1b1947d76ad21ba7119c5638fa28c798 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:16 +0800 Subject: [PATCH 13/59] archipelago: Drop archipelago_aiocb_info.cancel The cancelled flag is no longer useful. Later the request will complete as before, and cb will be called. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block/archipelago.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/block/archipelago.c b/block/archipelago.c index 93fb7c0634..435efa79c8 100644 --- a/block/archipelago.c +++ b/block/archipelago.c @@ -91,7 +91,6 @@ typedef struct ArchipelagoAIOCB { struct BDRVArchipelagoState *s; QEMUIOVector *qiov; ARCHIPCmd cmd; - bool cancelled; int status; int64_t size; int64_t ret; @@ -318,9 +317,7 @@ static void qemu_archipelago_complete_aio(void *opaque) aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret); aio_cb->status = 0; - if (!aio_cb->cancelled) { - qemu_aio_release(aio_cb); - } + qemu_aio_release(aio_cb); g_free(reqdata); } @@ -725,19 +722,8 @@ static int qemu_archipelago_create(const char *filename, return ret; } -static void qemu_archipelago_aio_cancel(BlockDriverAIOCB *blockacb) -{ - ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) blockacb; - aio_cb->cancelled = true; - while (aio_cb->status == -EINPROGRESS) { - aio_poll(bdrv_get_aio_context(aio_cb->common.bs), true); - } - qemu_aio_release(aio_cb); -} - static const AIOCBInfo archipelago_aiocb_info = { .aiocb_size = sizeof(ArchipelagoAIOCB), - .cancel = qemu_archipelago_aio_cancel, }; static int archipelago_submit_request(BDRVArchipelagoState *s, @@ -889,7 +875,6 @@ static BlockDriverAIOCB *qemu_archipelago_aio_rw(BlockDriverState *bs, aio_cb->ret = 0; aio_cb->s = s; - aio_cb->cancelled = false; aio_cb->status = -EINPROGRESS; off = sector_num * BDRV_SECTOR_SIZE; From 4c781717c53d606bfa666d606c2875f9952a3155 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:17 +0800 Subject: [PATCH 14/59] blkdebug: Drop blkdebug_aiocb_info.cancel Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block/blkdebug.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/block/blkdebug.c b/block/blkdebug.c index 69b330eced..08131b3a27 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -52,11 +52,8 @@ typedef struct BlkdebugSuspendedReq { QLIST_ENTRY(BlkdebugSuspendedReq) next; } BlkdebugSuspendedReq; -static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb); - static const AIOCBInfo blkdebug_aiocb_info = { - .aiocb_size = sizeof(BlkdebugAIOCB), - .cancel = blkdebug_aio_cancel, + .aiocb_size = sizeof(BlkdebugAIOCB), }; enum { @@ -450,16 +447,6 @@ static void error_callback_bh(void *opaque) qemu_aio_release(acb); } -static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb) -{ - BlkdebugAIOCB *acb = container_of(blockacb, BlkdebugAIOCB, common); - if (acb->bh) { - qemu_bh_delete(acb->bh); - acb->bh = NULL; - } - qemu_aio_release(acb); -} - static BlockDriverAIOCB *inject_error(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque, BlkdebugRule *rule) { From 9784e70f3d6b9805b5291f71f2543e346fe08433 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:18 +0800 Subject: [PATCH 15/59] blkverify: Drop blkverify_aiocb_info.cancel Also the finished pointer is not used any more. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block/blkverify.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/block/blkverify.c b/block/blkverify.c index 163064cf6b..460393f8d0 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -29,7 +29,6 @@ struct BlkverifyAIOCB { int ret; /* first completed request's result */ unsigned int done; /* completion counter */ - bool *finished; /* completion signal for cancel */ QEMUIOVector *qiov; /* user I/O vector */ QEMUIOVector raw_qiov; /* cloned I/O vector for raw file */ @@ -38,22 +37,8 @@ struct BlkverifyAIOCB { void (*verify)(BlkverifyAIOCB *acb); }; -static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb) -{ - BlkverifyAIOCB *acb = (BlkverifyAIOCB *)blockacb; - AioContext *aio_context = bdrv_get_aio_context(blockacb->bs); - bool finished = false; - - /* Wait until request completes, invokes its callback, and frees itself */ - acb->finished = &finished; - while (!finished) { - aio_poll(aio_context, true); - } -} - static const AIOCBInfo blkverify_aiocb_info = { .aiocb_size = sizeof(BlkverifyAIOCB), - .cancel = blkverify_aio_cancel, }; static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb, @@ -194,7 +179,6 @@ static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write, acb->qiov = qiov; acb->buf = NULL; acb->verify = NULL; - acb->finished = NULL; return acb; } @@ -208,9 +192,6 @@ static void blkverify_aio_bh(void *opaque) qemu_vfree(acb->buf); } acb->common.cb(acb->common.opaque, acb->ret); - if (acb->finished) { - *acb->finished = true; - } qemu_aio_release(acb); } From facb5539d6662e8208fc9a763719a5367d4e65d4 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:19 +0800 Subject: [PATCH 16/59] curl: Drop curl_aiocb_info.cancel Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block/curl.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/block/curl.c b/block/curl.c index 938f9d94e8..6f5d6aebb2 100644 --- a/block/curl.c +++ b/block/curl.c @@ -613,14 +613,8 @@ out_noclean: return -EINVAL; } -static void curl_aio_cancel(BlockDriverAIOCB *blockacb) -{ - // Do we have to implement canceling? Seems to work without... -} - static const AIOCBInfo curl_aiocb_info = { .aiocb_size = sizeof(CURLAIOCB), - .cancel = curl_aio_cancel, }; From 533ffb17a5b023fe531477732b71d97267012863 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:20 +0800 Subject: [PATCH 17/59] qed: Drop qed_aiocb_info.cancel Also drop the now unused ->finished field. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block/qed.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/block/qed.c b/block/qed.c index f8d9e12263..7a15d446c5 100644 --- a/block/qed.c +++ b/block/qed.c @@ -18,22 +18,8 @@ #include "qapi/qmp/qerror.h" #include "migration/migration.h" -static void qed_aio_cancel(BlockDriverAIOCB *blockacb) -{ - QEDAIOCB *acb = (QEDAIOCB *)blockacb; - AioContext *aio_context = bdrv_get_aio_context(blockacb->bs); - bool finished = false; - - /* Wait for the request to finish */ - acb->finished = &finished; - while (!finished) { - aio_poll(aio_context, true); - } -} - static const AIOCBInfo qed_aiocb_info = { .aiocb_size = sizeof(QEDAIOCB), - .cancel = qed_aio_cancel, }; static int bdrv_qed_probe(const uint8_t *buf, int buf_size, @@ -919,18 +905,12 @@ static void qed_aio_complete_bh(void *opaque) BlockDriverCompletionFunc *cb = acb->common.cb; void *user_opaque = acb->common.opaque; int ret = acb->bh_ret; - bool *finished = acb->finished; qemu_bh_delete(acb->bh); qemu_aio_release(acb); /* Invoke callback */ cb(user_opaque, ret); - - /* Signal cancel completion */ - if (finished) { - *finished = true; - } } static void qed_aio_complete(QEDAIOCB *acb, int ret) @@ -1397,7 +1377,6 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs, opaque, flags); acb->flags = flags; - acb->finished = NULL; acb->qiov = qiov; acb->qiov_offset = 0; acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE; From 997dd8df3e95b2fdbd1f30b3deefaad4e9efd14a Mon Sep 17 00:00:00 2001 From: Liu Yuan Date: Thu, 11 Sep 2014 13:41:21 +0800 Subject: [PATCH 18/59] quorum: fix quorum_aio_cancel() For a fifo read pattern, we only have one running aio (possible other cases that has less number than num_children in the future), so we need to check if .acb is NULL against bdrv_aio_cancel() to avoid segfault. Cc: Eric Blake Cc: Benoit Canet Cc: Kevin Wolf Cc: Stefan Hajnoczi Signed-off-by: Liu Yuan Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block/quorum.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/quorum.c b/block/quorum.c index 093382e8f5..41c4249547 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -138,7 +138,9 @@ static void quorum_aio_cancel(BlockDriverAIOCB *blockacb) /* cancel all callbacks */ for (i = 0; i < s->num_children; i++) { - bdrv_aio_cancel(acb->qcrs[i].aiocb); + if (acb->qcrs[i].aiocb) { + bdrv_aio_cancel(acb->qcrs[i].aiocb); + } } g_free(acb->qcrs); From 7940e5056bfdb5d2861774e294d5459952cd0aee Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:22 +0800 Subject: [PATCH 19/59] quorum: Convert quorum_aiocb_info.cancel to .cancel_async Before, we cancel all the child requests with bdrv_aio_cancel, then free the acb.. Now we just kick off asynchronous cancellation of child requests and return, we know quorum_aio_cb will be called later, so in the end quorum_aio_finalize will take care of calling the caller's cb. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block/quorum.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/block/quorum.c b/block/quorum.c index 41c4249547..f343c04c53 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -139,17 +139,14 @@ static void quorum_aio_cancel(BlockDriverAIOCB *blockacb) /* cancel all callbacks */ for (i = 0; i < s->num_children; i++) { if (acb->qcrs[i].aiocb) { - bdrv_aio_cancel(acb->qcrs[i].aiocb); + bdrv_aio_cancel_async(acb->qcrs[i].aiocb); } } - - g_free(acb->qcrs); - qemu_aio_release(acb); } static AIOCBInfo quorum_aiocb_info = { .aiocb_size = sizeof(QuorumAIOCB), - .cancel = quorum_aio_cancel, + .cancel_async = quorum_aio_cancel, }; static void quorum_aio_finalize(QuorumAIOCB *acb) From 7691e24dbebb46658e89b3f950fda6ec78bbb823 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:23 +0800 Subject: [PATCH 20/59] rbd: Drop rbd_aiocb_info.cancel And also drop the now unused "cancelled" field. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block/rbd.c | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/block/rbd.c b/block/rbd.c index b7f7d5ff30..e5341fc3eb 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -77,7 +77,6 @@ typedef struct RBDAIOCB { int64_t sector_num; int error; struct BDRVRBDState *s; - int cancelled; int status; } RBDAIOCB; @@ -408,9 +407,7 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); acb->status = 0; - if (!acb->cancelled) { - qemu_aio_release(acb); - } + qemu_aio_release(acb); } /* TODO Convert to fine grained options */ @@ -539,25 +536,8 @@ static void qemu_rbd_close(BlockDriverState *bs) rados_shutdown(s->cluster); } -/* - * Cancel aio. Since we don't reference acb in a non qemu threads, - * it is safe to access it here. - */ -static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb) -{ - RBDAIOCB *acb = (RBDAIOCB *) blockacb; - acb->cancelled = 1; - - while (acb->status == -EINPROGRESS) { - aio_poll(bdrv_get_aio_context(acb->common.bs), true); - } - - qemu_aio_release(acb); -} - static const AIOCBInfo rbd_aiocb_info = { .aiocb_size = sizeof(RBDAIOCB), - .cancel = qemu_rbd_aio_cancel, }; static void rbd_finish_bh(void *opaque) @@ -640,7 +620,6 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs, acb->ret = 0; acb->error = 0; acb->s = s; - acb->cancelled = 0; acb->bh = NULL; acb->status = -EINPROGRESS; From 6d24b4df38b0e1b2a6cdeeaa4a0e3dcdee643364 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:24 +0800 Subject: [PATCH 21/59] sheepdog: Convert sd_aiocb_info.cancel to .cancel_async Also drop the now unused SheepdogAIOCB.finished field. Note that this aio is internal to sheepdog driver and has NULL cb and opaque, and should be unused at all. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block/sheepdog.c | 48 ++++++++++++++++++++---------------------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index 7da36e1f9a..f538606f87 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -315,7 +315,6 @@ struct SheepdogAIOCB { void (*aio_done_func)(SheepdogAIOCB *); bool cancelable; - bool *finished; int nr_pending; }; @@ -446,9 +445,6 @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req) static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb) { qemu_coroutine_enter(acb->coroutine, NULL); - if (acb->finished) { - *acb->finished = true; - } qemu_aio_release(acb); } @@ -482,36 +478,33 @@ static void sd_aio_cancel(BlockDriverAIOCB *blockacb) SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb; BDRVSheepdogState *s = acb->common.bs->opaque; AIOReq *aioreq, *next; - bool finished = false; - acb->finished = &finished; - while (!finished) { - if (sd_acb_cancelable(acb)) { - /* Remove outstanding requests from pending and failed queues. */ - QLIST_FOREACH_SAFE(aioreq, &s->pending_aio_head, aio_siblings, - next) { - if (aioreq->aiocb == acb) { - free_aio_req(s, aioreq); - } + if (sd_acb_cancelable(acb)) { + /* Remove outstanding requests from pending and failed queues. */ + QLIST_FOREACH_SAFE(aioreq, &s->pending_aio_head, aio_siblings, + next) { + if (aioreq->aiocb == acb) { + free_aio_req(s, aioreq); } - QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings, - next) { - if (aioreq->aiocb == acb) { - free_aio_req(s, aioreq); - } - } - - assert(acb->nr_pending == 0); - sd_finish_aiocb(acb); - return; } - aio_poll(s->aio_context, true); + QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings, + next) { + if (aioreq->aiocb == acb) { + free_aio_req(s, aioreq); + } + } + + assert(acb->nr_pending == 0); + if (acb->common.cb) { + acb->common.cb(acb->common.opaque, -ECANCELED); + } + sd_finish_aiocb(acb); } } static const AIOCBInfo sd_aiocb_info = { - .aiocb_size = sizeof(SheepdogAIOCB), - .cancel = sd_aio_cancel, + .aiocb_size = sizeof(SheepdogAIOCB), + .cancel_async = sd_aio_cancel, }; static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, @@ -528,7 +521,6 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, acb->aio_done_func = NULL; acb->cancelable = true; - acb->finished = NULL; acb->coroutine = qemu_coroutine_self(); acb->ret = 0; acb->nr_pending = 0; From 5da91e4ef46fe85694c5c43a8f3a186a7a83cda7 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:25 +0800 Subject: [PATCH 22/59] win32-aio: Drop win32_aiocb_info.cancel Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block/win32-aio.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/block/win32-aio.c b/block/win32-aio.c index 5030e3274d..eed86f7e92 100644 --- a/block/win32-aio.c +++ b/block/win32-aio.c @@ -106,22 +106,8 @@ static void win32_aio_completion_cb(EventNotifier *e) } } -static void win32_aio_cancel(BlockDriverAIOCB *blockacb) -{ - QEMUWin32AIOCB *waiocb = (QEMUWin32AIOCB *)blockacb; - - /* - * CancelIoEx is only supported in Vista and newer. For now, just - * wait for completion. - */ - while (!HasOverlappedIoCompleted(&waiocb->ov)) { - aio_poll(bdrv_get_aio_context(blockacb->bs), true); - } -} - static const AIOCBInfo win32_aiocb_info = { .aiocb_size = sizeof(QEMUWin32AIOCB), - .cancel = win32_aio_cancel, }; BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs, From e551c999bcca1f29742741033853651f6ea88479 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:26 +0800 Subject: [PATCH 23/59] ide: Convert trim_aiocb_info.cancel to .cancel_async We know that either bh is scheduled or ide_issue_trim_cb will be called again, so we just set i, j and ret to the right values. In both cases, ide_trim_bh_cb will be called. Also forward the cancellation to the iocb->aiocb which we get from bdrv_aio_discard. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- hw/ide/core.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index f15c026a26..055d150c41 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -372,23 +372,21 @@ static void trim_aio_cancel(BlockDriverAIOCB *acb) { TrimAIOCB *iocb = container_of(acb, TrimAIOCB, common); - /* Exit the loop in case bdrv_aio_cancel calls ide_issue_trim_cb again. */ + /* Exit the loop so ide_issue_trim_cb will not continue */ iocb->j = iocb->qiov->niov - 1; iocb->i = (iocb->qiov->iov[iocb->j].iov_len / 8) - 1; - /* Tell ide_issue_trim_cb not to trigger the completion, too. */ - qemu_bh_delete(iocb->bh); - iocb->bh = NULL; + iocb->ret = -ECANCELED; if (iocb->aiocb) { - bdrv_aio_cancel(iocb->aiocb); + bdrv_aio_cancel_async(iocb->aiocb); + iocb->aiocb = NULL; } - qemu_aio_release(iocb); } static const AIOCBInfo trim_aiocb_info = { .aiocb_size = sizeof(TrimAIOCB), - .cancel = trim_aio_cancel, + .cancel_async = trim_aio_cancel, }; static void ide_trim_bh_cb(void *opaque) From ca5fd113b8ae5898853a757e06cb8d8a0c5e5d85 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:27 +0800 Subject: [PATCH 24/59] block: Drop AIOCBInfo.cancel Now that all the implementations are converted to asynchronous version and we can emulate synchronous cancellation with it. Let's drop the unused member. Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block.c | 24 ++++++++++-------------- include/block/aio.h | 1 - 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/block.c b/block.c index 45d3a5ba88..3a3648da97 100644 --- a/block.c +++ b/block.c @@ -4640,22 +4640,18 @@ int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs) void bdrv_aio_cancel(BlockDriverAIOCB *acb) { - if (acb->aiocb_info->cancel) { - acb->aiocb_info->cancel(acb); - } else { - qemu_aio_ref(acb); - bdrv_aio_cancel_async(acb); - while (acb->refcnt > 1) { - if (acb->aiocb_info->get_aio_context) { - aio_poll(acb->aiocb_info->get_aio_context(acb), true); - } else if (acb->bs) { - aio_poll(bdrv_get_aio_context(acb->bs), true); - } else { - abort(); - } + qemu_aio_ref(acb); + bdrv_aio_cancel_async(acb); + while (acb->refcnt > 1) { + if (acb->aiocb_info->get_aio_context) { + aio_poll(acb->aiocb_info->get_aio_context(acb), true); + } else if (acb->bs) { + aio_poll(bdrv_get_aio_context(acb->bs), true); + } else { + abort(); } - qemu_aio_release(acb); } + qemu_aio_release(acb); } /* Async version of aio cancel. The caller is not blocked if the acb implements diff --git a/include/block/aio.h b/include/block/aio.h index ad361e34c7..f2d0582bd1 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -26,7 +26,6 @@ typedef struct BlockDriverAIOCB BlockDriverAIOCB; typedef void BlockDriverCompletionFunc(void *opaque, int ret); typedef struct AIOCBInfo { - void (*cancel)(BlockDriverAIOCB *acb); void (*cancel_async)(BlockDriverAIOCB *acb); AioContext *(*get_aio_context)(BlockDriverAIOCB *acb); size_t aiocb_size; From 8007429a99d6ea8480ba0a7a5fb5ae92473f798c Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 13:41:28 +0800 Subject: [PATCH 25/59] block: Rename qemu_aio_release -> qemu_aio_unref MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suggested-by: BenoƮt Canet Signed-off-by: Fam Zheng Signed-off-by: Stefan Hajnoczi --- block.c | 8 ++++---- block/archipelago.c | 4 ++-- block/blkdebug.c | 2 +- block/blkverify.c | 2 +- block/curl.c | 10 +++++----- block/iscsi.c | 6 +++--- block/linux-aio.c | 4 ++-- block/qed.c | 2 +- block/quorum.c | 2 +- block/rbd.c | 4 ++-- block/sheepdog.c | 8 ++++---- block/win32-aio.c | 4 ++-- dma-helpers.c | 2 +- hw/ide/core.c | 2 +- include/block/aio.h | 2 +- thread-pool.c | 4 ++-- 16 files changed, 33 insertions(+), 33 deletions(-) diff --git a/block.c b/block.c index 3a3648da97..a857913fc2 100644 --- a/block.c +++ b/block.c @@ -4651,7 +4651,7 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb) abort(); } } - qemu_aio_release(acb); + qemu_aio_unref(acb); } /* Async version of aio cancel. The caller is not blocked if the acb implements @@ -4692,7 +4692,7 @@ static void bdrv_aio_bh_cb(void *opaque) acb->common.cb(acb->common.opaque, acb->ret); qemu_bh_delete(acb->bh); acb->bh = NULL; - qemu_aio_release(acb); + qemu_aio_unref(acb); } static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, @@ -4760,7 +4760,7 @@ static void bdrv_co_em_bh(void *opaque) acb->common.cb(acb->common.opaque, acb->req.error); qemu_bh_delete(acb->bh); - qemu_aio_release(acb); + qemu_aio_unref(acb); } /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */ @@ -4891,7 +4891,7 @@ void qemu_aio_ref(void *p) acb->refcnt++; } -void qemu_aio_release(void *p) +void qemu_aio_unref(void *p) { BlockDriverAIOCB *acb = p; assert(acb->refcnt > 0); diff --git a/block/archipelago.c b/block/archipelago.c index 435efa79c8..3c86d0ba5a 100644 --- a/block/archipelago.c +++ b/block/archipelago.c @@ -317,7 +317,7 @@ static void qemu_archipelago_complete_aio(void *opaque) aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret); aio_cb->status = 0; - qemu_aio_release(aio_cb); + qemu_aio_unref(aio_cb); g_free(reqdata); } @@ -890,7 +890,7 @@ static BlockDriverAIOCB *qemu_archipelago_aio_rw(BlockDriverState *bs, err_exit: error_report("qemu_archipelago_aio_rw(): I/O Error\n"); - qemu_aio_release(aio_cb); + qemu_aio_unref(aio_cb); return NULL; } diff --git a/block/blkdebug.c b/block/blkdebug.c index 08131b3a27..ced0b600f9 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -444,7 +444,7 @@ static void error_callback_bh(void *opaque) struct BlkdebugAIOCB *acb = opaque; qemu_bh_delete(acb->bh); acb->common.cb(acb->common.opaque, acb->ret); - qemu_aio_release(acb); + qemu_aio_unref(acb); } static BlockDriverAIOCB *inject_error(BlockDriverState *bs, diff --git a/block/blkverify.c b/block/blkverify.c index 460393f8d0..7d64a23a09 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -192,7 +192,7 @@ static void blkverify_aio_bh(void *opaque) qemu_vfree(acb->buf); } acb->common.cb(acb->common.opaque, acb->ret); - qemu_aio_release(acb); + qemu_aio_unref(acb); } static void blkverify_aio_cb(void *opaque, int ret) diff --git a/block/curl.c b/block/curl.c index 6f5d6aebb2..225407c643 100644 --- a/block/curl.c +++ b/block/curl.c @@ -212,7 +212,7 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque) qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start, acb->end - acb->start); acb->common.cb(acb->common.opaque, 0); - qemu_aio_release(acb); + qemu_aio_unref(acb); s->acb[i] = NULL; } } @@ -304,7 +304,7 @@ static void curl_multi_check_completion(BDRVCURLState *s) } acb->common.cb(acb->common.opaque, -EIO); - qemu_aio_release(acb); + qemu_aio_unref(acb); state->acb[i] = NULL; } } @@ -636,7 +636,7 @@ static void curl_readv_bh_cb(void *p) // we can just call the callback and be done. switch (curl_find_buf(s, start, acb->nb_sectors * SECTOR_SIZE, acb)) { case FIND_RET_OK: - qemu_aio_release(acb); + qemu_aio_unref(acb); // fall through case FIND_RET_WAIT: return; @@ -648,7 +648,7 @@ static void curl_readv_bh_cb(void *p) state = curl_init_state(acb->common.bs, s); if (!state) { acb->common.cb(acb->common.opaque, -EIO); - qemu_aio_release(acb); + qemu_aio_unref(acb); return; } @@ -664,7 +664,7 @@ static void curl_readv_bh_cb(void *p) if (state->buf_len && state->orig_buf == NULL) { curl_clean_state(state); acb->common.cb(acb->common.opaque, -ENOMEM); - qemu_aio_release(acb); + qemu_aio_unref(acb); return; } state->acb[0] = acb; diff --git a/block/iscsi.c b/block/iscsi.c index f8328d64eb..3c1b416704 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -126,7 +126,7 @@ iscsi_bh_cb(void *p) acb->task = NULL; } - qemu_aio_release(acb); + qemu_aio_unref(acb); } static void @@ -680,7 +680,7 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, if (acb->task == NULL) { error_report("iSCSI: Failed to allocate task for scsi command. %s", iscsi_get_error(iscsi)); - qemu_aio_release(acb); + qemu_aio_unref(acb); return NULL; } memset(acb->task, 0, sizeof(struct scsi_task)); @@ -718,7 +718,7 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, (data.size > 0) ? &data : NULL, acb) != 0) { scsi_free_scsi_task(acb->task); - qemu_aio_release(acb); + qemu_aio_unref(acb); return NULL; } diff --git a/block/linux-aio.c b/block/linux-aio.c index b67f56cda3..f4baba2be0 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -88,7 +88,7 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s, } laiocb->common.cb(laiocb->common.opaque, ret); - qemu_aio_release(laiocb); + qemu_aio_unref(laiocb); } /* The completion BH fetches completed I/O requests and invokes their @@ -286,7 +286,7 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd, return &laiocb->common; out_free_aiocb: - qemu_aio_release(laiocb); + qemu_aio_unref(laiocb); return NULL; } diff --git a/block/qed.c b/block/qed.c index 7a15d446c5..e2316d7ff0 100644 --- a/block/qed.c +++ b/block/qed.c @@ -907,7 +907,7 @@ static void qed_aio_complete_bh(void *opaque) int ret = acb->bh_ret; qemu_bh_delete(acb->bh); - qemu_aio_release(acb); + qemu_aio_unref(acb); /* Invoke callback */ cb(user_opaque, ret); diff --git a/block/quorum.c b/block/quorum.c index f343c04c53..7687466733 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -168,7 +168,7 @@ static void quorum_aio_finalize(QuorumAIOCB *acb) } g_free(acb->qcrs); - qemu_aio_release(acb); + qemu_aio_unref(acb); } static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b) diff --git a/block/rbd.c b/block/rbd.c index e5341fc3eb..96947e328a 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -407,7 +407,7 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); acb->status = 0; - qemu_aio_release(acb); + qemu_aio_unref(acb); } /* TODO Convert to fine grained options */ @@ -671,7 +671,7 @@ failed_completion: failed: g_free(rcb); qemu_vfree(acb->bounce); - qemu_aio_release(acb); + qemu_aio_unref(acb); return NULL; } diff --git a/block/sheepdog.c b/block/sheepdog.c index f538606f87..2d78ef91f7 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -445,7 +445,7 @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req) static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb) { qemu_coroutine_enter(acb->coroutine, NULL); - qemu_aio_release(acb); + qemu_aio_unref(acb); } /* @@ -2130,7 +2130,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, ret = sd_co_rw_vector(acb); if (ret <= 0) { - qemu_aio_release(acb); + qemu_aio_unref(acb); return ret; } @@ -2151,7 +2151,7 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num, ret = sd_co_rw_vector(acb); if (ret <= 0) { - qemu_aio_release(acb); + qemu_aio_unref(acb); return ret; } @@ -2510,7 +2510,7 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num, ret = sd_co_rw_vector(acb); if (ret <= 0) { - qemu_aio_release(acb); + qemu_aio_unref(acb); return ret; } diff --git a/block/win32-aio.c b/block/win32-aio.c index eed86f7e92..9323c1aed1 100644 --- a/block/win32-aio.c +++ b/block/win32-aio.c @@ -88,7 +88,7 @@ static void win32_aio_process_completion(QEMUWin32AIOState *s, waiocb->common.cb(waiocb->common.opaque, ret); - qemu_aio_release(waiocb); + qemu_aio_unref(waiocb); } static void win32_aio_completion_cb(EventNotifier *e) @@ -158,7 +158,7 @@ BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs, out_dec_count: aio->count--; out: - qemu_aio_release(waiocb); + qemu_aio_unref(waiocb); return NULL; } diff --git a/dma-helpers.c b/dma-helpers.c index 207b21ef6f..7f86e18e72 100644 --- a/dma-helpers.c +++ b/dma-helpers.c @@ -124,7 +124,7 @@ static void dma_complete(DMAAIOCB *dbs, int ret) qemu_bh_delete(dbs->bh); dbs->bh = NULL; } - qemu_aio_release(dbs); + qemu_aio_unref(dbs); } static void dma_bdrv_cb(void *opaque, int ret) diff --git a/hw/ide/core.c b/hw/ide/core.c index 055d150c41..190700ac74 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -397,7 +397,7 @@ static void ide_trim_bh_cb(void *opaque) qemu_bh_delete(iocb->bh); iocb->bh = NULL; - qemu_aio_release(iocb); + qemu_aio_unref(iocb); } static void ide_issue_trim_cb(void *opaque, int ret) diff --git a/include/block/aio.h b/include/block/aio.h index f2d0582bd1..67a75ddd56 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -41,7 +41,7 @@ struct BlockDriverAIOCB { void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque); -void qemu_aio_release(void *p); +void qemu_aio_unref(void *p); void qemu_aio_ref(void *p); typedef struct AioHandler AioHandler; diff --git a/thread-pool.c b/thread-pool.c index bb1395cff8..86fa4a8f89 100644 --- a/thread-pool.c +++ b/thread-pool.c @@ -185,12 +185,12 @@ restart: qemu_bh_schedule(pool->completion_bh); elem->common.cb(elem->common.opaque, elem->ret); - qemu_aio_release(elem); + qemu_aio_unref(elem); goto restart; } else { /* remove the request */ QLIST_REMOVE(elem, all); - qemu_aio_release(elem); + qemu_aio_unref(elem); } } } From 0722eba9450cb8be9713fec1caa0772330739586 Mon Sep 17 00:00:00 2001 From: Gonglei Date: Tue, 16 Sep 2014 10:19:33 +0800 Subject: [PATCH 26/59] qdev-monitor: fix segmentation fault on qdev_device_help() Normally, qmp_device_list_properties() may return NULL when a device haven't special properties excpet Object and DeviceState properties, such as virtio-balloon-device. We just need check local_err instead of prop_list. Example: Segmentation fault (core dumped) The backtrace as below: Program received signal SIGSEGV, Segmentation fault. 0x00005555559af1a8 in error_get_pretty (err=0x0) at util/error.c:152 152 return err->msg; (gdb) bt func=0x55555574a6ca , opaque=0x0, abort_on_failure=0) at util/qemu-option.c:1072 Signed-off-by: Gonglei Reviewed-by: Markus Armbruster Signed-off-by: Stefan Hajnoczi --- qdev-monitor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qdev-monitor.c b/qdev-monitor.c index fb9ee24b3a..5ec66067f5 100644 --- a/qdev-monitor.c +++ b/qdev-monitor.c @@ -206,7 +206,7 @@ int qdev_device_help(QemuOpts *opts) } prop_list = qmp_device_list_properties(driver, &local_err); - if (!prop_list) { + if (local_err) { error_printf("%s\n", error_get_pretty(local_err)); error_free(local_err); return 1; From a90d411e63ef29bb99b984e0fdb7796aeee1c724 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 15 Sep 2014 14:52:58 +0200 Subject: [PATCH 27/59] aio-win32: avoid out-of-bounds access to the events array If ret is WAIT_TIMEOUT and there was an event returned by select(), we can write to a location after the end of the array. But in that case we can retry the WaitForMultipleObjects call with the same set of events, so just move the event[ret - WAIT_OBJECT_0] assignment inside the existin conditional. Reported-by: TeLeMan Signed-off-by: Paolo Bonzini Reviewed-by: TeLeMan Signed-off-by: Stefan Hajnoczi --- aio-win32.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/aio-win32.c b/aio-win32.c index 7daeae18f1..d81313b00b 100644 --- a/aio-win32.c +++ b/aio-win32.c @@ -335,6 +335,7 @@ bool aio_poll(AioContext *ctx, bool blocking) event = NULL; if ((DWORD) (ret - WAIT_OBJECT_0) < count) { event = events[ret - WAIT_OBJECT_0]; + events[ret - WAIT_OBJECT_0] = events[--count]; } else if (!have_select_revents) { break; } @@ -343,9 +344,6 @@ bool aio_poll(AioContext *ctx, bool blocking) blocking = false; progress |= aio_dispatch_handlers(ctx, event); - - /* Try again, but only call each handler once. */ - events[ret - WAIT_OBJECT_0] = events[--count]; } progress |= timerlistgroup_run_timers(&ctx->tlg); From e819ab22777fd455d5b130afe48c808d35ef7e93 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 14:09:56 +0800 Subject: [PATCH 28/59] block: Introduce "null" drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an analogue to Linux null_blk. It can be used for testing or benchmarking block device emulation and general block layer functionalities such as coroutines and throttling, where disk IO is not necessary or wanted. Use null-aio:// for AIO version, and null-co:// for coroutine version. [Resolved conflict with Fam's async bdrv_aio_cancel() series: 1. Drop .bdrv_aio_cancel() since it is now done by block.c 2. Rename qemu_aio_release() to qemu_aio_unref() --Stefan] Signed-off-by: Fam Zheng Reviewed-by: BenoƮt Canet Reviewed-by: Eric Blake Message-id: 1410415798-20673-2-git-send-email-famz@redhat.com Signed-off-by: Stefan Hajnoczi --- block/Makefile.objs | 1 + block/null.c | 168 +++++++++++++++++++++++++++++++++++++++++++ qapi/block-core.json | 19 ++++- 3 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 block/null.c diff --git a/block/Makefile.objs b/block/Makefile.objs index c9c8bbbcde..d6e23a25de 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -9,6 +9,7 @@ block-obj-y += snapshot.o qapi.o block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o block-obj-$(CONFIG_POSIX) += raw-posix.o block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o +block-obj-y += null.o block-obj-y += nbd.o nbd-client.o sheepdog.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o diff --git a/block/null.c b/block/null.c new file mode 100644 index 0000000000..828441999c --- /dev/null +++ b/block/null.c @@ -0,0 +1,168 @@ +/* + * Null block driver + * + * Authors: + * Fam Zheng + * + * Copyright (C) 2014 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "block/block_int.h" + +typedef struct { + int64_t length; +} BDRVNullState; + +static QemuOptsList runtime_opts = { + .name = "null", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = "filename", + .type = QEMU_OPT_STRING, + .help = "", + }, + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "size of the null block", + }, + { /* end of list */ } + }, +}; + +static int null_file_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + QemuOpts *opts; + BDRVNullState *s = bs->opaque; + + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &error_abort); + s->length = + qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 1 << 30); + qemu_opts_del(opts); + return 0; +} + +static void null_close(BlockDriverState *bs) +{ +} + +static int64_t null_getlength(BlockDriverState *bs) +{ + BDRVNullState *s = bs->opaque; + return s->length; +} + +static coroutine_fn int null_co_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov) +{ + return 0; +} + +static coroutine_fn int null_co_writev(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov) +{ + return 0; +} + +static coroutine_fn int null_co_flush(BlockDriverState *bs) +{ + return 0; +} + +typedef struct { + BlockDriverAIOCB common; + QEMUBH *bh; +} NullAIOCB; + +static const AIOCBInfo null_aiocb_info = { + .aiocb_size = sizeof(NullAIOCB), +}; + +static void null_bh_cb(void *opaque) +{ + NullAIOCB *acb = opaque; + acb->common.cb(acb->common.opaque, 0); + qemu_bh_delete(acb->bh); + qemu_aio_unref(acb); +} + +static inline BlockDriverAIOCB *null_aio_common(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, + void *opaque) +{ + NullAIOCB *acb; + + acb = qemu_aio_get(&null_aiocb_info, bs, cb, opaque); + acb->bh = aio_bh_new(bdrv_get_aio_context(bs), null_bh_cb, acb); + qemu_bh_schedule(acb->bh); + return &acb->common; +} + +static BlockDriverAIOCB *null_aio_readv(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque) +{ + return null_aio_common(bs, cb, opaque); +} + +static BlockDriverAIOCB *null_aio_writev(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque) +{ + return null_aio_common(bs, cb, opaque); +} + +static BlockDriverAIOCB *null_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, + void *opaque) +{ + return null_aio_common(bs, cb, opaque); +} + +static BlockDriver bdrv_null_co = { + .format_name = "null-co", + .protocol_name = "null-co", + .instance_size = sizeof(BDRVNullState), + + .bdrv_file_open = null_file_open, + .bdrv_close = null_close, + .bdrv_getlength = null_getlength, + + .bdrv_co_readv = null_co_readv, + .bdrv_co_writev = null_co_writev, + .bdrv_co_flush_to_disk = null_co_flush, +}; + +static BlockDriver bdrv_null_aio = { + .format_name = "null-aio", + .protocol_name = "null-aio", + .instance_size = sizeof(BDRVNullState), + + .bdrv_file_open = null_file_open, + .bdrv_close = null_close, + .bdrv_getlength = null_getlength, + + .bdrv_aio_readv = null_aio_readv, + .bdrv_aio_writev = null_aio_writev, + .bdrv_aio_flush = null_aio_flush, +}; + +static void bdrv_null_init(void) +{ + bdrv_register(&bdrv_null_co); + bdrv_register(&bdrv_null_aio); +} + +block_init(bdrv_null_init); diff --git a/qapi/block-core.json b/qapi/block-core.json index 95dcd81ed4..a886b5d3e4 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1151,7 +1151,8 @@ 'data': [ 'archipelago', 'file', 'host_device', 'host_cdrom', 'host_floppy', 'http', 'https', 'ftp', 'ftps', 'tftp', 'vvfat', 'blkdebug', 'blkverify', 'bochs', 'cloop', 'cow', 'dmg', 'parallels', 'qcow', - 'qcow2', 'qed', 'raw', 'vdi', 'vhdx', 'vmdk', 'vpc', 'quorum' ] } + 'qcow2', 'qed', 'raw', 'vdi', 'vhdx', 'vmdk', 'vpc', 'quorum', + 'null-aio', 'null-co' ] } ## # @BlockdevOptionsBase @@ -1203,6 +1204,18 @@ { 'type': 'BlockdevOptionsFile', 'data': { 'filename': 'str' } } +## +# @BlockdevOptionsNull +# +# Driver specific block device options for the null backend. +# +# @size: #optional size of the device in bytes. +# +# Since: 2.2 +## +{ 'type': 'BlockdevOptionsNull', + 'data': { '*size': 'int' } } + ## # @BlockdevOptionsVVFAT # @@ -1503,7 +1516,9 @@ 'vhdx': 'BlockdevOptionsGenericFormat', 'vmdk': 'BlockdevOptionsGenericCOWFormat', 'vpc': 'BlockdevOptionsGenericFormat', - 'quorum': 'BlockdevOptionsQuorum' + 'quorum': 'BlockdevOptionsQuorum', + 'null-aio': 'BlockdevOptionsNull', + 'null-co': 'BlockdevOptionsNull' } } ## From e8712cea6aa34a38130c6eb412d46180f1d4fe6f Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 14:09:57 +0800 Subject: [PATCH 29/59] qapi: Sort BlockdevDriver enum data list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: BenoƮt Canet Signed-off-by: Fam Zheng Reviewed-by: Eric Blake Message-id: 1410415798-20673-3-git-send-email-famz@redhat.com Signed-off-by: Stefan Hajnoczi --- qapi/block-core.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/qapi/block-core.json b/qapi/block-core.json index a886b5d3e4..c3eeab5dae 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1148,11 +1148,11 @@ # Since: 2.0 ## { 'enum': 'BlockdevDriver', - 'data': [ 'archipelago', 'file', 'host_device', 'host_cdrom', 'host_floppy', - 'http', 'https', 'ftp', 'ftps', 'tftp', 'vvfat', 'blkdebug', - 'blkverify', 'bochs', 'cloop', 'cow', 'dmg', 'parallels', 'qcow', - 'qcow2', 'qed', 'raw', 'vdi', 'vhdx', 'vmdk', 'vpc', 'quorum', - 'null-aio', 'null-co' ] } + 'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop', 'cow', + 'dmg', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device', + 'host_floppy', 'http', 'https', 'null-aio', 'null-co', 'parallels', + 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp', 'vdi', 'vhdx', + 'vmdk', 'vpc', 'vvfat' ] } ## # @BlockdevOptionsBase From db866be982f970d1dfe3798465847e3fb0ae0aaf Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 11 Sep 2014 14:09:58 +0800 Subject: [PATCH 30/59] qapi: Sort items in BlockdevOptions definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Fam Zheng Reviewed-by: BenoƮt Canet Reviewed-by: Eric Blake Message-id: 1410415798-20673-4-git-send-email-famz@redhat.com Signed-off-by: Stefan Hajnoczi --- qapi/block-core.json | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/qapi/block-core.json b/qapi/block-core.json index c3eeab5dae..1692627e00 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1484,41 +1484,41 @@ 'discriminator': 'driver', 'data': { 'archipelago':'BlockdevOptionsArchipelago', - 'file': 'BlockdevOptionsFile', - 'host_device':'BlockdevOptionsFile', - 'host_cdrom': 'BlockdevOptionsFile', - 'host_floppy':'BlockdevOptionsFile', - 'http': 'BlockdevOptionsFile', - 'https': 'BlockdevOptionsFile', - 'ftp': 'BlockdevOptionsFile', - 'ftps': 'BlockdevOptionsFile', - 'tftp': 'BlockdevOptionsFile', -# TODO gluster: Wait for structured options -# TODO iscsi: Wait for structured options -# TODO nbd: Should take InetSocketAddress for 'host'? -# TODO nfs: Wait for structured options -# TODO rbd: Wait for structured options -# TODO sheepdog: Wait for structured options -# TODO ssh: Should take InetSocketAddress for 'host'? - 'vvfat': 'BlockdevOptionsVVFAT', 'blkdebug': 'BlockdevOptionsBlkdebug', 'blkverify': 'BlockdevOptionsBlkverify', 'bochs': 'BlockdevOptionsGenericFormat', 'cloop': 'BlockdevOptionsGenericFormat', 'cow': 'BlockdevOptionsGenericCOWFormat', 'dmg': 'BlockdevOptionsGenericFormat', + 'file': 'BlockdevOptionsFile', + 'ftp': 'BlockdevOptionsFile', + 'ftps': 'BlockdevOptionsFile', +# TODO gluster: Wait for structured options + 'host_cdrom': 'BlockdevOptionsFile', + 'host_device':'BlockdevOptionsFile', + 'host_floppy':'BlockdevOptionsFile', + 'http': 'BlockdevOptionsFile', + 'https': 'BlockdevOptionsFile', +# TODO iscsi: Wait for structured options +# TODO nbd: Should take InetSocketAddress for 'host'? +# TODO nfs: Wait for structured options + 'null-aio': 'BlockdevOptionsNull', + 'null-co': 'BlockdevOptionsNull', 'parallels': 'BlockdevOptionsGenericFormat', - 'qcow': 'BlockdevOptionsGenericCOWFormat', 'qcow2': 'BlockdevOptionsQcow2', + 'qcow': 'BlockdevOptionsGenericCOWFormat', 'qed': 'BlockdevOptionsGenericCOWFormat', + 'quorum': 'BlockdevOptionsQuorum', 'raw': 'BlockdevOptionsGenericFormat', +# TODO rbd: Wait for structured options +# TODO sheepdog: Wait for structured options +# TODO ssh: Should take InetSocketAddress for 'host'? + 'tftp': 'BlockdevOptionsFile', 'vdi': 'BlockdevOptionsGenericFormat', 'vhdx': 'BlockdevOptionsGenericFormat', 'vmdk': 'BlockdevOptionsGenericCOWFormat', 'vpc': 'BlockdevOptionsGenericFormat', - 'quorum': 'BlockdevOptionsQuorum', - 'null-aio': 'BlockdevOptionsNull', - 'null-co': 'BlockdevOptionsNull' + 'vvfat': 'BlockdevOptionsVVFAT' } } ## From 9bf040b962f90aa2e1cef6543dfee6c96f73ef7e Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 5 Sep 2014 16:07:15 +0200 Subject: [PATCH 31/59] qapi/block: Add "fatal" to BLOCK_IMAGE_CORRUPTED Not every BLOCK_IMAGE_CORRUPTED event must be fatal; for example, when reading from an image, they should generally not be. Nonetheless, even an image only read from may of course be corrupted and this can be detected during normal operation. In this case, a non-fatal event should be emitted, but the image should not be marked corrupt (in accordance to "fatal" set to false). Signed-off-by: Max Reitz Reviewed-by: Eric Blake Message-id: 1409926039-29044-2-git-send-email-mreitz@redhat.com Signed-off-by: Stefan Hajnoczi --- block/qcow2-refcount.c | 1 + qapi/block-core.json | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 43665b86e7..0bd75d2fe6 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1853,6 +1853,7 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset, offset, true, size, + true, &error_abort); g_free(message); diff --git a/qapi/block-core.json b/qapi/block-core.json index 1692627e00..ef7faaab5e 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1570,7 +1570,7 @@ ## # @BLOCK_IMAGE_CORRUPTED # -# Emitted when a disk image is being marked corrupt +# Emitted when a corruption has been detected in a disk image # # @device: device name # @@ -1584,13 +1584,18 @@ # @size: #optional, if the corruption resulted from an image access, this is # the access size # +# fatal: if set, the image is marked corrupt and therefore unusable after this +# event and must be repaired (Since 2.2; before, every +# BLOCK_IMAGE_CORRUPTED event was fatal) +# # Since: 1.7 ## { 'event': 'BLOCK_IMAGE_CORRUPTED', 'data': { 'device' : 'str', 'msg' : 'str', '*offset': 'int', - '*size' : 'int' } } + '*size' : 'int', + 'fatal' : 'bool' } } ## # @BLOCK_IO_ERROR From 85186ebdac7e183242deaa55d5049988de832be1 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 5 Sep 2014 16:07:16 +0200 Subject: [PATCH 32/59] qcow2: Add qcow2_signal_corruption() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a helper function for easily marking an image corrupt (on fatal corruptions) while outputting an informative message to stderr and via QAPI. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: BenoƮt Canet Message-id: 1409926039-29044-3-git-send-email-mreitz@redhat.com Signed-off-by: Stefan Hajnoczi --- block/qcow2.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ block/qcow2.h | 5 +++++ 2 files changed, 53 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c index 0daf25cb58..f4bbe8b273 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -31,6 +31,8 @@ #include "qapi/qmp/qerror.h" #include "qapi/qmp/qbool.h" #include "qapi/util.h" +#include "qapi/qmp/types.h" +#include "qapi-event.h" #include "trace.h" #include "qemu/option_int.h" @@ -2529,6 +2531,52 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts) return 0; } +/* + * If offset or size are negative, respectively, they will not be included in + * the BLOCK_IMAGE_CORRUPTED event emitted. + * fatal will be ignored for read-only BDS; corruptions found there will always + * be considered non-fatal. + */ +void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, + int64_t size, const char *message_format, ...) +{ + BDRVQcowState *s = bs->opaque; + char *message; + va_list ap; + + fatal = fatal && !bs->read_only; + + if (s->signaled_corruption && + (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT))) + { + return; + } + + va_start(ap, message_format); + message = g_strdup_vprintf(message_format, ap); + va_end(ap); + + if (fatal) { + fprintf(stderr, "qcow2: Marking image as corrupt: %s; further " + "corruption events will be suppressed\n", message); + } else { + fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal " + "corruption events will be suppressed\n", message); + } + + qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), message, + offset >= 0, offset, size >= 0, size, + fatal, &error_abort); + g_free(message); + + if (fatal) { + qcow2_mark_corrupt(bs); + bs->drv = NULL; /* make BDS unusable */ + } + + s->signaled_corruption = true; +} + static QemuOptsList qcow2_create_opts = { .name = "qcow2-create-opts", .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head), diff --git a/block/qcow2.h b/block/qcow2.h index 6aeb7ea90f..7b7b6a677c 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -261,6 +261,7 @@ typedef struct BDRVQcowState { bool discard_passthrough[QCOW2_DISCARD_MAX]; int overlap_check; /* bitmask of Qcow2MetadataOverlap values */ + bool signaled_corruption; uint64_t incompatible_features; uint64_t compatible_features; @@ -477,6 +478,10 @@ int qcow2_mark_corrupt(BlockDriverState *bs); int qcow2_mark_consistent(BlockDriverState *bs); int qcow2_update_header(BlockDriverState *bs); +void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, + int64_t size, const char *message_format, ...) + GCC_FMT_ATTR(5, 6); + /* qcow2-refcount.c functions */ int qcow2_refcount_init(BlockDriverState *bs); void qcow2_refcount_close(BlockDriverState *bs); From adb435522b86b3fca2324cb8c94e17b55ae071f1 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 5 Sep 2014 16:07:17 +0200 Subject: [PATCH 33/59] qcow2: Use qcow2_signal_corruption() for overlaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the new function in case of a failed overlap check. This changes output in case of corruption, so adapt iotest 060's reference output accordingly. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: BenoƮt Canet Message-id: 1409926039-29044-4-git-send-email-mreitz@redhat.com Signed-off-by: Stefan Hajnoczi --- block/qcow2-refcount.c | 24 +++--------------------- tests/qemu-iotests/060.out | 10 +++++----- 2 files changed, 8 insertions(+), 26 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 0bd75d2fe6..b9d421e153 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -26,8 +26,6 @@ #include "block/block_int.h" #include "block/qcow2.h" #include "qemu/range.h" -#include "qapi/qmp/types.h" -#include "qapi-event.h" static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size); static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, @@ -1838,27 +1836,11 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset, return ret; } else if (ret > 0) { int metadata_ol_bitnr = ffs(ret) - 1; - char *message; - assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR); - fprintf(stderr, "qcow2: Preventing invalid write on metadata (overlaps " - "with %s); image marked as corrupt.\n", - metadata_ol_names[metadata_ol_bitnr]); - message = g_strdup_printf("Prevented %s overwrite", - metadata_ol_names[metadata_ol_bitnr]); - qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), - message, - true, - offset, - true, - size, - true, - &error_abort); - g_free(message); - - qcow2_mark_corrupt(bs); - bs->drv = NULL; /* make BDS unusable */ + qcow2_signal_corruption(bs, true, offset, size, "Preventing invalid " + "write on metadata (overlaps with %s)", + metadata_ol_names[metadata_ol_bitnr]); return -EIO; } diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out index c27c952412..30806dae89 100644 --- a/tests/qemu-iotests/060.out +++ b/tests/qemu-iotests/060.out @@ -8,7 +8,7 @@ ERROR cluster 3 refcount=1 reference=3 1 errors were found on the image. Data may be corrupted, or further writes to the image may corrupt it. incompatible_features 0x0 -qcow2: Preventing invalid write on metadata (overlaps with active L1 table); image marked as corrupt. +qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with active L1 table); further corruption events will be suppressed write failed: Input/output error incompatible_features 0x2 qemu-io: can't open device TEST_DIR/t.IMGFMT: IMGFMT: Image is corrupt; cannot be opened read/write @@ -24,7 +24,7 @@ ERROR cluster 2 refcount=1 reference=2 2 errors were found on the image. Data may be corrupted, or further writes to the image may corrupt it. incompatible_features 0x0 -qcow2: Preventing invalid write on metadata (overlaps with refcount block); image marked as corrupt. +qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with refcount block); further corruption events will be suppressed write failed: Input/output error incompatible_features 0x2 Repairing refcount block 0 refcount=2 @@ -56,7 +56,7 @@ Data may be corrupted, or further writes to the image may corrupt it. 1 leaked clusters were found on the image. This means waste of disk space, but no harm to data. incompatible_features 0x0 -qcow2: Preventing invalid write on metadata (overlaps with inactive L2 table); image marked as corrupt. +qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with inactive L2 table); further corruption events will be suppressed write failed: Input/output error incompatible_features 0x2 Repairing cluster 4 refcount=1 reference=2 @@ -88,7 +88,7 @@ wrote 65536/65536 bytes at offset 536870912 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) discard 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -qcow2: Preventing invalid write on metadata (overlaps with active L2 table); image marked as corrupt. +qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with active L2 table); further corruption events will be suppressed blkdebug: Suspended request '0' write failed: Input/output error blkdebug: Resuming request '0' @@ -99,6 +99,6 @@ aio_write failed: No medium found Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -qcow2: Preventing invalid write on metadata (overlaps with qcow2_header); image marked as corrupt. +qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with qcow2_header); further corruption events will be suppressed write failed: Input/output error *** done From a97c67ee6c1546b985c1048c7a1f9e4fc13d9ee1 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 5 Sep 2014 16:07:18 +0200 Subject: [PATCH 34/59] qcow2: Check L1/L2/reftable entries for alignment Offsets taken from the L1, L2 and refcount tables are generally assumed to be correctly aligned. However, this cannot be guaranteed if the image has been written to by something different than qemu, thus check all offsets taken from these tables for correct cluster alignment. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Message-id: 1409926039-29044-5-git-send-email-mreitz@redhat.com Signed-off-by: Stefan Hajnoczi --- block/qcow2-cluster.c | 43 ++++++++++++++++++++++++++++++++++++++--- block/qcow2-refcount.c | 44 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 82 insertions(+), 5 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 735f687b06..f7dd8c0985 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -486,6 +486,13 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, goto out; } + if (offset_into_cluster(s, l2_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 + " unaligned (L1 index: %#" PRIx64 ")", + l2_offset, l1_index); + return -EIO; + } + /* load the l2 table in memory */ ret = l2_load(bs, l2_offset, &l2_table); @@ -508,8 +515,11 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, break; case QCOW2_CLUSTER_ZERO: if (s->qcow_version < 3) { - qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); - return -EIO; + qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found" + " in pre-v3 image (L2 offset: %#" PRIx64 + ", L2 index: %#x)", l2_offset, l2_index); + ret = -EIO; + goto fail; } c = count_contiguous_clusters(nb_clusters, s->cluster_size, &l2_table[l2_index], QCOW_OFLAG_ZERO); @@ -525,6 +535,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, c = count_contiguous_clusters(nb_clusters, s->cluster_size, &l2_table[l2_index], QCOW_OFLAG_ZERO); *cluster_offset &= L2E_OFFSET_MASK; + if (offset_into_cluster(s, *cluster_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset %#" + PRIx64 " unaligned (L2 offset: %#" PRIx64 + ", L2 index: %#x)", *cluster_offset, + l2_offset, l2_index); + ret = -EIO; + goto fail; + } break; default: abort(); @@ -541,6 +559,10 @@ out: *num = nb_available - index_in_cluster; return ret; + +fail: + qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table); + return ret; } /* @@ -576,6 +598,12 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, assert(l1_index < s->l1_size); l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; + if (offset_into_cluster(s, l2_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 + " unaligned (L1 index: %#" PRIx64 ")", + l2_offset, l1_index); + return -EIO; + } /* seek the l2 table of the given l2 offset */ @@ -948,6 +976,15 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, bool offset_matches = (cluster_offset & L2E_OFFSET_MASK) == *host_offset; + if (offset_into_cluster(s, cluster_offset & L2E_OFFSET_MASK)) { + qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset " + "%#llx unaligned (guest offset: %#" PRIx64 + ")", cluster_offset & L2E_OFFSET_MASK, + guest_offset); + ret = -EIO; + goto out; + } + if (*host_offset != 0 && !offset_matches) { *bytes = 0; ret = 0; @@ -979,7 +1016,7 @@ out: /* Only return a host offset if we actually made progress. Otherwise we * would make requirements for handle_alloc() that it can't fulfill */ - if (ret) { + if (ret > 0) { *host_offset = (cluster_offset & L2E_OFFSET_MASK) + offset_into_cluster(s, guest_offset); } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index b9d421e153..2bcaaf9b98 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -108,6 +108,13 @@ static int get_refcount(BlockDriverState *bs, int64_t cluster_index) if (!refcount_block_offset) return 0; + if (offset_into_cluster(s, refcount_block_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" PRIx64 + " unaligned (reftable index: %#" PRIx64 ")", + refcount_block_offset, refcount_table_index); + return -EIO; + } + ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset, (void**) &refcount_block); if (ret < 0) { @@ -181,6 +188,14 @@ static int alloc_refcount_block(BlockDriverState *bs, /* If it's already there, we're done */ if (refcount_block_offset) { + if (offset_into_cluster(s, refcount_block_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" + PRIx64 " unaligned (reftable index: " + "%#x)", refcount_block_offset, + refcount_table_index); + return -EIO; + } + return load_refcount_block(bs, refcount_block_offset, (void**) refcount_block); } @@ -836,8 +851,14 @@ void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry, case QCOW2_CLUSTER_NORMAL: case QCOW2_CLUSTER_ZERO: if (l2_entry & L2E_OFFSET_MASK) { - qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK, - nb_clusters << s->cluster_bits, type); + if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) { + qcow2_signal_corruption(bs, false, -1, -1, + "Cannot free unaligned cluster %#llx", + l2_entry & L2E_OFFSET_MASK); + } else { + qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK, + nb_clusters << s->cluster_bits, type); + } } break; case QCOW2_CLUSTER_UNALLOCATED: @@ -901,6 +922,14 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, old_l2_offset = l2_offset; l2_offset &= L1E_OFFSET_MASK; + if (offset_into_cluster(s, l2_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" + PRIx64 " unaligned (L1 index: %#x)", + l2_offset, i); + ret = -EIO; + goto fail; + } + ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) &l2_table); if (ret < 0) { @@ -933,6 +962,17 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, case QCOW2_CLUSTER_NORMAL: case QCOW2_CLUSTER_ZERO: + if (offset_into_cluster(s, offset & L2E_OFFSET_MASK)) { + qcow2_signal_corruption(bs, true, -1, -1, "Data " + "cluster offset %#llx " + "unaligned (L2 offset: %#" + PRIx64 ", L2 index: %#x)", + offset & L2E_OFFSET_MASK, + l2_offset, j); + ret = -EIO; + goto fail; + } + cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits; if (!cluster_index) { /* unallocated */ From 5b0ed2be883238f52567ba2635ea38f34e8eb90d Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 5 Sep 2014 16:07:19 +0200 Subject: [PATCH 35/59] iotests: Add more tests for qcow2 corruption Add tests for unaligned L1/L2/reftable entries and non-fatal corruption reports. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Message-id: 1409926039-29044-6-git-send-email-mreitz@redhat.com Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/060 | 56 ++++++++++++++++++++++++++++++++++++-- tests/qemu-iotests/060.out | 51 ++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060 index 830386fdaa..2355567951 100755 --- a/tests/qemu-iotests/060 +++ b/tests/qemu-iotests/060 @@ -169,9 +169,61 @@ echo "=== Testing unallocated image header ===" echo _make_test_img 64M # Create L1/L2 -$QEMU_IO -c "$OPEN_RW" -c "write 0 64k" | _filter_qemu_io +$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io poke_file "$TEST_IMG" "$rb_offset" "\x00\x00" -$QEMU_IO -c "$OPEN_RW" -c "write 64k 64k" | _filter_qemu_io +$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "=== Testing unaligned L1 entry ===" +echo +_make_test_img 64M +$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io +# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are +# aligned or not does not matter +poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" +$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "=== Testing unaligned L2 entry ===" +echo +_make_test_img 64M +$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io +poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" +$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "=== Testing unaligned reftable entry ===" +echo +_make_test_img 64M +poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00" +$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "=== Testing non-fatal corruption on freeing ===" +echo +_make_test_img 64M +$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io +poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" +$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io + +echo +echo "=== Testing read-only corruption report ===" +echo +_make_test_img 64M +$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io +poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" +# Should only emit a single error message +$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io + +echo +echo "=== Testing non-fatal and then fatal corruption report ===" +echo +_make_test_img 64M +$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io +poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" +poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00" +# Should emit two error messages +$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io # success, all done echo "*** done" diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out index 30806dae89..4f0c6d0c8e 100644 --- a/tests/qemu-iotests/060.out +++ b/tests/qemu-iotests/060.out @@ -101,4 +101,55 @@ wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with qcow2_header); further corruption events will be suppressed write failed: Input/output error + +=== Testing unaligned L1 entry === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qcow2: Marking image as corrupt: L2 table offset 0x42a00 unaligned (L1 index: 0); further corruption events will be suppressed +read failed: Input/output error + +=== Testing unaligned L2 entry === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qcow2: Marking image as corrupt: Data cluster offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further corruption events will be suppressed +read failed: Input/output error + +=== Testing unaligned reftable entry === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +qcow2: Marking image as corrupt: Refblock offset 0x22a00 unaligned (reftable index: 0); further corruption events will be suppressed +write failed: Input/output error + +=== Testing non-fatal corruption on freeing === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qcow2: Image is corrupt: Cannot free unaligned cluster 0x52a00; further non-fatal corruption events will be suppressed +discard 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Testing read-only corruption report === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qcow2: Image is corrupt: Data cluster offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further non-fatal corruption events will be suppressed +read failed: Input/output error +read failed: Input/output error + +=== Testing non-fatal and then fatal corruption report === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qcow2: Image is corrupt: Cannot free unaligned cluster 0x52a00; further non-fatal corruption events will be suppressed +qcow2: Marking image as corrupt: Data cluster offset 0x62a00 unaligned (L2 offset: 0x40000, L2 index: 0x1); further corruption events will be suppressed +discard 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read failed: Input/output error *** done From 407ba0844d90a344e5ed012129e6e40e091bf48b Mon Sep 17 00:00:00 2001 From: Maria Kustova Date: Sun, 14 Sep 2014 16:07:02 +0400 Subject: [PATCH 36/59] image-fuzzer: Trivial readability and formatting improvements Signed-off-by: Maria Kustova Signed-off-by: Stefan Hajnoczi --- tests/image-fuzzer/qcow2/fuzz.py | 10 +++---- tests/image-fuzzer/runner.py | 49 ++++++++++++++++---------------- 2 files changed, 28 insertions(+), 31 deletions(-) diff --git a/tests/image-fuzzer/qcow2/fuzz.py b/tests/image-fuzzer/qcow2/fuzz.py index 57527f9b4a..404b439f48 100644 --- a/tests/image-fuzzer/qcow2/fuzz.py +++ b/tests/image-fuzzer/qcow2/fuzz.py @@ -332,9 +332,8 @@ def l1_entry(current): constraints = UINT64_V # Reserved bits are ignored # Added a possibility when only flags are fuzzed - offset = 0x7fffffffffffffff & random.choice([selector(current, - constraints), - current]) + offset = 0x7fffffffffffffff & \ + random.choice([selector(current, constraints), current]) is_cow = random.randint(0, 1) return offset + (is_cow << UINT64_M) @@ -344,9 +343,8 @@ def l2_entry(current): constraints = UINT64_V # Reserved bits are ignored # Add a possibility when only flags are fuzzed - offset = 0x3ffffffffffffffe & random.choice([selector(current, - constraints), - current]) + offset = 0x3ffffffffffffffe & \ + random.choice([selector(current, constraints), current]) is_compressed = random.randint(0, 1) is_cow = random.randint(0, 1) is_zero = random.randint(0, 1) diff --git a/tests/image-fuzzer/runner.py b/tests/image-fuzzer/runner.py index c903c8a342..b7002905e1 100755 --- a/tests/image-fuzzer/runner.py +++ b/tests/image-fuzzer/runner.py @@ -70,7 +70,7 @@ def run_app(fd, q_args): """Exception for signal.alarm events.""" pass - def handler(*arg): + def handler(*args): """Notify that an alarm event occurred.""" raise Alarm @@ -134,8 +134,8 @@ class TestEnv(object): self.init_path = os.getcwd() self.work_dir = work_dir self.current_dir = os.path.join(work_dir, 'test-' + test_id) - self.qemu_img = os.environ.get('QEMU_IMG', 'qemu-img')\ - .strip().split(' ') + self.qemu_img = \ + os.environ.get('QEMU_IMG', 'qemu-img').strip().split(' ') self.qemu_io = os.environ.get('QEMU_IO', 'qemu-io').strip().split(' ') self.commands = [['qemu-img', 'check', '-f', 'qcow2', '$test_img'], ['qemu-img', 'info', '-f', 'qcow2', '$test_img'], @@ -212,10 +212,8 @@ class TestEnv(object): os.chdir(self.current_dir) backing_file_name, backing_file_fmt = self._create_backing_file() - img_size = image_generator.create_image('test.img', - backing_file_name, - backing_file_fmt, - fuzz_config) + img_size = image_generator.create_image( + 'test.img', backing_file_name, backing_file_fmt, fuzz_config) for item in commands: shutil.copy('test.img', 'copy.img') # 'off' and 'len' are multiple of the sector size @@ -228,7 +226,7 @@ class TestEnv(object): elif item[0] == 'qemu-io': current_cmd = list(self.qemu_io) else: - multilog("Warning: test command '%s' is not defined.\n" \ + multilog("Warning: test command '%s' is not defined.\n" % item[0], sys.stderr, self.log, self.parent_log) continue # Replace all placeholders with their real values @@ -244,29 +242,28 @@ class TestEnv(object): "Backing file: %s\n" \ % (self.seed, " ".join(current_cmd), self.current_dir, backing_file_name) - temp_log = StringIO.StringIO() try: retcode = run_app(temp_log, current_cmd) except OSError, e: - multilog(test_summary + "Error: Start of '%s' failed. " \ - "Reason: %s\n\n" % (os.path.basename( - current_cmd[0]), e[1]), + multilog("%sError: Start of '%s' failed. Reason: %s\n\n" + % (test_summary, os.path.basename(current_cmd[0]), + e[1]), sys.stderr, self.log, self.parent_log) raise TestException if retcode < 0: self.log.write(temp_log.getvalue()) - multilog(test_summary + "FAIL: Test terminated by signal " + - "%s\n\n" % str_signal(-retcode), sys.stderr, self.log, - self.parent_log) + multilog("%sFAIL: Test terminated by signal %s\n\n" + % (test_summary, str_signal(-retcode)), + sys.stderr, self.log, self.parent_log) self.failed = True else: if self.log_all: self.log.write(temp_log.getvalue()) - multilog(test_summary + "PASS: Application exited with" + - " the code '%d'\n\n" % retcode, sys.stdout, - self.log, self.parent_log) + multilog("%sPASS: Application exited with the code " \ + "'%d'\n\n" % (test_summary, retcode), + sys.stdout, self.log, self.parent_log) temp_log.close() os.remove('copy.img') @@ -286,8 +283,9 @@ if __name__ == '__main__': Set up test environment in TEST_DIR and run a test in it. A module for test image generation should be specified via IMG_GENERATOR. + Example: - runner.py -c '[["qemu-img", "info", "$test_img"]]' /tmp/test qcow2 + runner.py -c '[["qemu-img", "info", "$test_img"]]' /tmp/test qcow2 Optional arguments: -h, --help display this help and exit @@ -305,20 +303,22 @@ if __name__ == '__main__': '--command' accepts a JSON array of commands. Each command presents an application under test with all its paramaters as a list of strings, - e.g. - ["qemu-io", "$test_img", "-c", "write $off $len"] + e.g. ["qemu-io", "$test_img", "-c", "write $off $len"]. Supported application aliases: 'qemu-img' and 'qemu-io'. + Supported argument aliases: $test_img for the fuzzed image, $off for an offset, $len for length. Values for $off and $len will be generated based on the virtual disk - size of the fuzzed image + size of the fuzzed image. + Paths to 'qemu-img' and 'qemu-io' are retrevied from 'QEMU_IMG' and - 'QEMU_IO' environment variables + 'QEMU_IO' environment variables. '--config' accepts a JSON array of fields to be fuzzed, e.g. - '[["header"], ["header", "version"]]' + '[["header"], ["header", "version"]]'. + Each of the list elements can consist of a complex image element only as ["header"] or ["feature_name_table"] or an exact field as ["header", "version"]. In the first case random portion of the element @@ -368,7 +368,6 @@ if __name__ == '__main__': seed = None config = None duration = None - for opt, arg in opts: if opt in ('-h', '--help'): usage() From 93bb1315250dd010e65dc067af103cbaf0de03ae Mon Sep 17 00:00:00 2001 From: Gonglei Date: Tue, 16 Sep 2014 21:36:55 +0800 Subject: [PATCH 37/59] hmp: fix memory leak at hmp_info_block_jobs() Signed-off-by: Gonglei Reviewed-by: Markus Armbruster Message-id: 1410874615-14292-1-git-send-email-arei.gonglei@huawei.com Signed-off-by: Stefan Hajnoczi --- hmp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hmp.c b/hmp.c index 40a90dae70..31fb6a15ca 100644 --- a/hmp.c +++ b/hmp.c @@ -679,6 +679,8 @@ void hmp_info_block_jobs(Monitor *mon, const QDict *qdict) } list = list->next; } + + qapi_free_BlockJobInfoList(list); } void hmp_info_tpm(Monitor *mon, const QDict *qdict) From 7b17ce60cc58b3c20b3e708a2d69f6bbe2b4edfa Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 20 Aug 2014 19:59:33 +0200 Subject: [PATCH 38/59] qcow2: Fix leak of QemuOpts in qcow2_open() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the QemuOpts object opts is leaked if anything fails from its creation up to and including the image repair block. Fix this by freeing that object in the fail path. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: BenoƮt Canet Message-id: 1408557576-14574-2-git-send-email-mreitz@redhat.com Signed-off-by: Stefan Hajnoczi --- block/qcow2.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index f4bbe8b273..744e0db7fc 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -538,7 +538,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, unsigned int len, i; int ret = 0; QCowHeader header; - QemuOpts *opts; + QemuOpts *opts = NULL; Error *local_err = NULL; uint64_t ext_end; uint64_t l1_vm_state_index; @@ -935,7 +935,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, error_setg(errp, "Unsupported value '%s' for qcow2 option " "'overlap-check'. Allowed are either of the following: " "none, constant, cached, all", opt_overlap_check); - qemu_opts_del(opts); ret = -EINVAL; goto fail; } @@ -950,6 +949,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, } qemu_opts_del(opts); + opts = NULL; if (s->use_lazy_refcounts && s->qcow_version < 3) { error_setg(errp, "Lazy refcounts require a qcow2 image with at least " @@ -967,6 +967,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, return ret; fail: + qemu_opts_del(opts); g_free(s->unknown_header_fields); cleanup_unknown_header_ext(bs); qcow2_free_snapshots(bs); From e775ba7721181e3ff6677b91f0464968330988d9 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 20 Aug 2014 19:59:34 +0200 Subject: [PATCH 39/59] qapi: Allow enums in anonymous unions Signed-off-by: Max Reitz Reviewed-by: Eric Blake Message-id: 1408557576-14574-3-git-send-email-mreitz@redhat.com Signed-off-by: Stefan Hajnoczi --- scripts/qapi-types.py | 2 ++ scripts/qapi-visit.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py index b4632324a7..d2f815bca2 100644 --- a/scripts/qapi-types.py +++ b/scripts/qapi-types.py @@ -177,6 +177,8 @@ const int %(name)s_qtypes[QTYPE_MAX] = { qtype = "QTYPE_QDICT" elif find_union(qapi_type): qtype = "QTYPE_QDICT" + elif find_enum(qapi_type): + qtype = "QTYPE_QSTRING" else: assert False, "Invalid anonymous union member" diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py index c12969721a..df9f7fb657 100644 --- a/scripts/qapi-visit.py +++ b/scripts/qapi-visit.py @@ -263,7 +263,8 @@ void visit_type_%(name)s(Visitor *m, %(name)s **obj, const char *name, Error **e for key in members: assert (members[key] in builtin_types or find_struct(members[key]) - or find_union(members[key])), "Invalid anonymous union member" + or find_union(members[key]) + or find_enum(members[key])), "Invalid anonymous union member" enum_full_value = generate_enum_full_value(disc_type, key) ret += mcgen(''' From ee42b5ce0b33986970f5c2fd4b676cddc0d6306c Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 20 Aug 2014 19:59:35 +0200 Subject: [PATCH 40/59] qcow2: Add overlap-check.template option Being able to set the overlap-check option to a string and then refine it via the overlap-check.* options is a nice idea for the command line but does not work so well for non-flattened dicts. In that case, one can only specify either but not both, so add a field to overlap-check.* which does the same as directly specifying overlap-check but can be used in conjunction with the other fields in non-flattened dicts. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Message-id: 1408557576-14574-4-git-send-email-mreitz@redhat.com Signed-off-by: Stefan Hajnoczi --- block/qcow2.c | 24 ++++++++++++++++++++++-- block/qcow2.h | 1 + 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 744e0db7fc..778fc1ec13 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -405,6 +405,12 @@ static QemuOptsList qcow2_runtime_opts = { .help = "Selects which overlap checks to perform from a range of " "templates (none, constant, cached, all)", }, + { + .name = QCOW2_OPT_OVERLAP_TEMPLATE, + .type = QEMU_OPT_STRING, + .help = "Selects which overlap checks to perform from a range of " + "templates (none, constant, cached, all)", + }, { .name = QCOW2_OPT_OVERLAP_MAIN_HEADER, .type = QEMU_OPT_BOOL, @@ -542,7 +548,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, Error *local_err = NULL; uint64_t ext_end; uint64_t l1_vm_state_index; - const char *opt_overlap_check; + const char *opt_overlap_check, *opt_overlap_check_template; int overlap_check_template = 0; uint64_t l2_cache_size, refcount_cache_size; @@ -922,7 +928,21 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, s->discard_passthrough[QCOW2_DISCARD_OTHER] = qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); - opt_overlap_check = qemu_opt_get(opts, "overlap-check") ?: "cached"; + opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP); + opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE); + if (opt_overlap_check_template && opt_overlap_check && + strcmp(opt_overlap_check_template, opt_overlap_check)) + { + error_setg(errp, "Conflicting values for qcow2 options '" + QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE + "' ('%s')", opt_overlap_check, opt_overlap_check_template); + ret = -EINVAL; + goto fail; + } + if (!opt_overlap_check) { + opt_overlap_check = opt_overlap_check_template ?: "cached"; + } + if (!strcmp(opt_overlap_check, "none")) { overlap_check_template = 0; } else if (!strcmp(opt_overlap_check, "constant")) { diff --git a/block/qcow2.h b/block/qcow2.h index 7b7b6a677c..7d61e615ff 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -83,6 +83,7 @@ #define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot" #define QCOW2_OPT_DISCARD_OTHER "pass-discard-other" #define QCOW2_OPT_OVERLAP "overlap-check" +#define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template" #define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header" #define QCOW2_OPT_OVERLAP_ACTIVE_L1 "overlap-check.active-l1" #define QCOW2_OPT_OVERLAP_ACTIVE_L2 "overlap-check.active-l2" From f658581130878905e7af001286b8514f28d23c43 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 20 Aug 2014 19:59:36 +0200 Subject: [PATCH 41/59] qapi/block-core: Add "new" qcow2 options qcow2 supports more than four options by now, add the new options (overlap check mode and metadata cache size) Signed-off-by: Max Reitz Reviewed-by: Eric Blake Message-id: 1408557576-14574-5-git-send-email-mreitz@redhat.com Signed-off-by: Stefan Hajnoczi --- qapi/block-core.json | 79 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/qapi/block-core.json b/qapi/block-core.json index ef7faaab5e..85dc87d6c7 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1263,6 +1263,67 @@ 'base': 'BlockdevOptionsGenericFormat', 'data': { '*backing': 'BlockdevRef' } } +## +# @Qcow2OverlapCheckMode +# +# General overlap check modes. +# +# @none: Do not perform any checks +# +# @constant: Perform only checks which can be done in constant time and +# without reading anything from disk +# +# @cached: Perform only checks which can be done without reading anything +# from disk +# +# @all: Perform all available overlap checks +# +# Since: 2.2 +## +{ 'enum': 'Qcow2OverlapCheckMode', + 'data': [ 'none', 'constant', 'cached', 'all' ] } + +## +# @Qcow2OverlapCheckFlags +# +# Structure of flags for each metadata structure. Setting a field to 'true' +# makes qemu guard that structure against unintended overwriting. The default +# value is chosen according to the template given. +# +# @template: Specifies a template mode which can be adjusted using the other +# flags, defaults to 'cached' +# +# Since: 2.2 +## +{ 'type': 'Qcow2OverlapCheckFlags', + 'data': { '*template': 'Qcow2OverlapCheckMode', + '*main-header': 'bool', + '*active-l1': 'bool', + '*active-l2': 'bool', + '*refcount-table': 'bool', + '*refcount-block': 'bool', + '*snapshot-table': 'bool', + '*inactive-l1': 'bool', + '*inactive-l2': 'bool' } } + +## +# @Qcow2OverlapChecks +# +# Specifies which metadata structures should be guarded against unintended +# overwriting. +# +# @flags: set of flags for separate specification of each metadata structure +# type +# +# @mode: named mode which chooses a specific set of flags +# +# Since: 2.2 +## +{ 'union': 'Qcow2OverlapChecks', + 'discriminator': {}, + 'data': { 'flags': 'Qcow2OverlapCheckFlags', + 'mode': 'Qcow2OverlapCheckMode' } } + ## # @BlockdevOptionsQcow2 # @@ -1282,6 +1343,18 @@ # should be issued on other occasions where a cluster # gets freed # +# @overlap-check: #optional which overlap checks to perform for writes +# to the image, defaults to 'cached' (since 2.2) +# +# @cache-size: #optional the maximum total size of the L2 table and +# refcount block caches in bytes (since 2.2) +# +# @l2-cache-size: #optional the maximum size of the L2 table cache in +# bytes (since 2.2) +# +# @refcount-cache-size: #optional the maximum size of the refcount block cache +# in bytes (since 2.2) +# # Since: 1.7 ## { 'type': 'BlockdevOptionsQcow2', @@ -1289,7 +1362,11 @@ 'data': { '*lazy-refcounts': 'bool', '*pass-discard-request': 'bool', '*pass-discard-snapshot': 'bool', - '*pass-discard-other': 'bool' } } + '*pass-discard-other': 'bool', + '*overlap-check': 'Qcow2OverlapChecks', + '*cache-size': 'int', + '*l2-cache-size': 'int', + '*refcount-cache-size': 'int' } } ## From 56271efdeaa9d01cff9d82c4b8b2ab73152fe1ea Mon Sep 17 00:00:00 2001 From: Maria Kustova Date: Tue, 19 Aug 2014 16:25:11 +0400 Subject: [PATCH 42/59] docs: List all image elements currently supported by the fuzzer Reviewed-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Signed-off-by: Maria Kustova Message-id: cb71485d0f55d1d8401eebaead8324eb78673060.1408450493.git.maria.k@catit.be Signed-off-by: Stefan Hajnoczi --- docs/image-fuzzer.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/image-fuzzer.txt b/docs/image-fuzzer.txt index 0d0005d34a..3e23ebec33 100644 --- a/docs/image-fuzzer.txt +++ b/docs/image-fuzzer.txt @@ -125,7 +125,8 @@ If a fuzzer configuration is specified, then it has the next interpretation: will be always fuzzed for every test. This case is useful for regression testing. -For now only header fields, header extensions and L1/L2 tables are generated. +The generator can create header fields, header extensions, L1/L2 tables and +refcount table and blocks. Module interfaces ----------------- From 2e5be6b77ef428df9088a5d01586cd0cd0b1a107 Mon Sep 17 00:00:00 2001 From: Maria Kustova Date: Tue, 19 Aug 2014 16:25:12 +0400 Subject: [PATCH 43/59] fuzz: Add fuzzing functions for entries of refcount table and blocks Reviewed-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Signed-off-by: Maria Kustova Message-id: c9f4027b6f401c67e9d18f94aed29be445e81d48.1408450493.git.maria.k@catit.be Signed-off-by: Stefan Hajnoczi --- tests/image-fuzzer/qcow2/fuzz.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/image-fuzzer/qcow2/fuzz.py b/tests/image-fuzzer/qcow2/fuzz.py index 404b439f48..20eba6bc1b 100644 --- a/tests/image-fuzzer/qcow2/fuzz.py +++ b/tests/image-fuzzer/qcow2/fuzz.py @@ -18,8 +18,8 @@ import random - UINT8 = 0xff +UINT16 = 0xffff UINT32 = 0xffffffff UINT64 = 0xffffffffffffffff # Most significant bit orders @@ -28,6 +28,8 @@ UINT64_M = 63 # Fuzz vectors UINT8_V = [0, 0x10, UINT8/4, UINT8/2 - 1, UINT8/2, UINT8/2 + 1, UINT8 - 1, UINT8] +UINT16_V = [0, 0x100, 0x1000, UINT16/4, UINT16/2 - 1, UINT16/2, UINT16/2 + 1, + UINT16 - 1, UINT16] UINT32_V = [0, 0x100, 0x1000, 0x10000, 0x100000, UINT32/4, UINT32/2 - 1, UINT32/2, UINT32/2 + 1, UINT32 - 1, UINT32] UINT64_V = UINT32_V + [0x1000000, 0x10000000, 0x100000000, UINT64/4, @@ -351,3 +353,15 @@ def l2_entry(current): value = offset + (is_cow << UINT64_M) + \ (is_compressed << UINT64_M - 1) + is_zero return value + + +def refcount_table_entry(current): + """Fuzz an entry of the refcount table.""" + constraints = UINT64_V + return selector(current, constraints) + + +def refcount_block_entry(current): + """Fuzz an entry of a refcount block.""" + constraints = UINT16_V + return selector(current, constraints) From 1e8fd8d44d6d5d5fa6d7583ac796809886abbd69 Mon Sep 17 00:00:00 2001 From: Maria Kustova Date: Tue, 19 Aug 2014 16:25:13 +0400 Subject: [PATCH 44/59] layout: Add generators for refcount table and blocks Refcount structures are placed in clusters randomly selected from all unallocated host clusters. Reviewed-by: Stefan Hajnoczi Signed-off-by: Maria Kustova Reviewed-by: Fam Zheng Message-id: 7e2f38608db6fba2da53997390b19400d445c45d.1408450493.git.maria.k@catit.be Signed-off-by: Stefan Hajnoczi --- tests/image-fuzzer/qcow2/layout.py | 138 ++++++++++++++++++++++++++++- 1 file changed, 137 insertions(+), 1 deletion(-) diff --git a/tests/image-fuzzer/qcow2/layout.py b/tests/image-fuzzer/qcow2/layout.py index 730c771d3c..63e801f4e8 100644 --- a/tests/image-fuzzer/qcow2/layout.py +++ b/tests/image-fuzzer/qcow2/layout.py @@ -102,6 +102,8 @@ class Image(object): self.end_of_extension_area = FieldsList() self.l2_tables = FieldsList() self.l1_table = FieldsList() + self.refcount_table = FieldsList() + self.refcount_blocks = FieldsList() self.ext_offset = 0 self.create_header(cluster_bits, backing_file_name) self.set_backing_file_name(backing_file_name) @@ -113,7 +115,8 @@ class Image(object): def __iter__(self): return chain(self.header, self.backing_file_format, self.feature_name_table, self.end_of_extension_area, - self.backing_file_name, self.l1_table, self.l2_tables) + self.backing_file_name, self.l1_table, self.l2_tables, + self.refcount_table, self.refcount_blocks) def create_header(self, cluster_bits, backing_file_name=None): """Generate a random valid header.""" @@ -330,6 +333,138 @@ class Image(object): float(self.cluster_size**2))) self.header['l1_table_offset'][0].value = l1_offset + def create_refcount_structures(self): + """Generate random refcount blocks and refcount table.""" + def allocate_rfc_blocks(data, size): + """Return indices of clusters allocated for refcount blocks.""" + cluster_ids = set() + diff = block_ids = set([x / size for x in data]) + while len(diff) != 0: + # Allocate all yet not allocated clusters + new = self._get_available_clusters(data | cluster_ids, + len(diff)) + # Indices of new refcount blocks necessary to cover clusters + # in 'new' + diff = set([x / size for x in new]) - block_ids + cluster_ids |= new + block_ids |= diff + return cluster_ids, block_ids + + def allocate_rfc_table(data, init_blocks, block_size): + """Return indices of clusters allocated for the refcount table + and updated indices of clusters allocated for blocks and indices + of blocks. + """ + blocks = set(init_blocks) + clusters = set() + # Number of entries in one cluster of the refcount table + size = self.cluster_size / UINT64_S + # Number of clusters necessary for the refcount table based on + # the current number of refcount blocks + table_size = int(ceil((max(blocks) + 1) / float(size))) + # Index of the first cluster of the refcount table + table_start = self._get_adjacent_clusters(data, table_size + 1) + # Clusters allocated for the current length of the refcount table + table_clusters = set(range(table_start, table_start + table_size)) + # Clusters allocated for the refcount table including + # last optional one for potential l1 growth + table_clusters_allocated = set(range(table_start, table_start + + table_size + 1)) + # New refcount blocks necessary for clusters occupied by the + # refcount table + diff = set([c / block_size for c in table_clusters]) - blocks + blocks |= diff + while len(diff) != 0: + # Allocate clusters for new refcount blocks + new = self._get_available_clusters((data | clusters) | + table_clusters_allocated, + len(diff)) + # Indices of new refcount blocks necessary to cover + # clusters in 'new' + diff = set([x / block_size for x in new]) - blocks + clusters |= new + blocks |= diff + # Check if the refcount table needs one more cluster + if int(ceil((max(blocks) + 1) / float(size))) > table_size: + new_block_id = (table_start + table_size) / block_size + # Check if the additional table cluster needs + # one more refcount block + if new_block_id not in blocks: + diff.add(new_block_id) + table_clusters.add(table_start + table_size) + table_size += 1 + return table_clusters, blocks, clusters + + def create_table_entry(table_offset, block_cluster, block_size, + cluster): + """Generate a refcount table entry.""" + offset = table_offset + UINT64_S * (cluster / block_size) + return ['>Q', offset, block_cluster * self.cluster_size, + 'refcount_table_entry'] + + def create_block_entry(block_cluster, block_size, cluster): + """Generate a list of entries for the current block.""" + entry_size = self.cluster_size / block_size + offset = block_cluster * self.cluster_size + entry_offset = offset + entry_size * (cluster % block_size) + # While snapshots are not supported all refcounts are set to 1 + return ['>H', entry_offset, 1, 'refcount_block_entry'] + # Size of a block entry in bits + refcount_bits = 1 << self.header['refcount_order'][0].value + # Number of refcount entries per refcount block + # Convert self.cluster_size from bytes to bits to have the same + # base for the numerator and denominator + block_size = self.cluster_size * 8 / refcount_bits + meta_data = self._get_metadata() + if len(self.data_clusters) == 0: + # All metadata for an empty guest image needs 4 clusters: + # header, rfc table, rfc block, L1 table. + # Header takes cluster #0, other clusters ##1-3 can be used + block_clusters = set([random.choice(list(set(range(1, 4)) - + meta_data))]) + block_ids = set([0]) + table_clusters = set([random.choice(list(set(range(1, 4)) - + meta_data - + block_clusters))]) + else: + block_clusters, block_ids = \ + allocate_rfc_blocks(self.data_clusters | + meta_data, block_size) + table_clusters, block_ids, new_clusters = \ + allocate_rfc_table(self.data_clusters | + meta_data | + block_clusters, + block_ids, + block_size) + block_clusters |= new_clusters + + meta_data |= block_clusters | table_clusters + table_offset = min(table_clusters) * self.cluster_size + block_id = None + # Clusters allocated for refcount blocks + block_clusters = list(block_clusters) + # Indices of refcount blocks + block_ids = list(block_ids) + # Refcount table entries + rfc_table = [] + # Refcount entries + rfc_blocks = [] + + for cluster in sorted(self.data_clusters | meta_data): + if cluster / block_size != block_id: + block_id = cluster / block_size + block_cluster = block_clusters[block_ids.index(block_id)] + rfc_table.append(create_table_entry(table_offset, + block_cluster, + block_size, cluster)) + rfc_blocks.append(create_block_entry(block_cluster, block_size, + cluster)) + self.refcount_table = FieldsList(rfc_table) + self.refcount_blocks = FieldsList(rfc_blocks) + + self.header['refcount_table_offset'][0].value = table_offset + self.header['refcount_table_clusters'][0].value = len(table_clusters) + def fuzz(self, fields_to_fuzz=None): """Fuzz an image by corrupting values of a random subset of its fields. @@ -471,6 +606,7 @@ def create_image(test_img_path, backing_file_name=None, backing_file_fmt=None, image.create_feature_name_table() image.set_end_of_extension_area() image.create_l_structures() + image.create_refcount_structures() image.fuzz(fields_to_fuzz) image.write(test_img_path) return image.image_size From 1cd1031ddc3bdef68acbbdd0d010c09279c727ea Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 21 Aug 2014 13:44:32 -0400 Subject: [PATCH 45/59] ahci: Adding basic functionality qtest. Currently, there is no qtest to test the functionality of the AHCI functionality present within the Q35 machine type. This patch adds a skeleton for an AHCI test suite, and adds a simple sanity-check test case where we identify that the AHCI device is present, then disengage the virtual machine. Signed-off-by: John Snow Message-id: 1408643079-30675-2-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi --- tests/Makefile | 2 + tests/ahci-test.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 198 insertions(+) create mode 100644 tests/ahci-test.c diff --git a/tests/Makefile b/tests/Makefile index a5e3d0c369..f5de29c0b9 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -132,6 +132,7 @@ check-qtest-i386-y = tests/endianness-test$(EXESUF) check-qtest-i386-y += tests/fdc-test$(EXESUF) gcov-files-i386-y = hw/block/fdc.c check-qtest-i386-y += tests/ide-test$(EXESUF) +check-qtest-i386-y += tests/ahci-test$(EXESUF) check-qtest-i386-y += tests/hd-geo-test$(EXESUF) gcov-files-i386-y += hw/block/hd-geometry.c check-qtest-i386-y += tests/boot-order-test$(EXESUF) @@ -307,6 +308,7 @@ tests/endianness-test$(EXESUF): tests/endianness-test.o tests/spapr-phb-test$(EXESUF): tests/spapr-phb-test.o $(libqos-obj-y) tests/fdc-test$(EXESUF): tests/fdc-test.o tests/ide-test$(EXESUF): tests/ide-test.o $(libqos-pc-obj-y) +tests/ahci-test$(EXESUF): tests/ahci-test.o $(libqos-pc-obj-y) tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y) tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o $(libqos-obj-y) diff --git a/tests/ahci-test.c b/tests/ahci-test.c new file mode 100644 index 0000000000..cc49dba30f --- /dev/null +++ b/tests/ahci-test.c @@ -0,0 +1,196 @@ +/* + * AHCI test cases + * + * Copyright (c) 2014 John Snow + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include + +#include + +#include "libqtest.h" +#include "libqos/pci-pc.h" +#include "libqos/malloc-pc.h" + +#include "qemu-common.h" +#include "qemu/host-utils.h" + +#include "hw/pci/pci_ids.h" +#include "hw/pci/pci_regs.h" + +/* Test-specific defines. */ +#define TEST_IMAGE_SIZE (64 * 1024 * 1024) + +/*** Supplementary PCI Config Space IDs & Masks ***/ +#define PCI_DEVICE_ID_INTEL_Q35_AHCI (0x2922) + +/*** Globals ***/ +static QGuestAllocator *guest_malloc; +static QPCIBus *pcibus; +static char tmp_path[] = "/tmp/qtest.XXXXXX"; + +/*** Function Declarations ***/ +static QPCIDevice *get_ahci_device(void); +static void free_ahci_device(QPCIDevice *dev); + +/*** Utilities ***/ + +/** + * Locate, verify, and return a handle to the AHCI device. + */ +static QPCIDevice *get_ahci_device(void) +{ + QPCIDevice *ahci; + uint16_t vendor_id, device_id; + + pcibus = qpci_init_pc(); + + /* Find the AHCI PCI device and verify it's the right one. */ + ahci = qpci_device_find(pcibus, QPCI_DEVFN(0x1F, 0x02)); + g_assert(ahci != NULL); + + vendor_id = qpci_config_readw(ahci, PCI_VENDOR_ID); + device_id = qpci_config_readw(ahci, PCI_DEVICE_ID); + + g_assert_cmphex(vendor_id, ==, PCI_VENDOR_ID_INTEL); + g_assert_cmphex(device_id, ==, PCI_DEVICE_ID_INTEL_Q35_AHCI); + + return ahci; +} + +static void free_ahci_device(QPCIDevice *ahci) +{ + /* libqos doesn't have a function for this, so free it manually */ + g_free(ahci); + + if (pcibus) { + qpci_free_pc(pcibus); + pcibus = NULL; + } +} + +/*** Test Setup & Teardown ***/ + +/** + * Launch QEMU with the given command line, + * and then set up interrupts and our guest malloc interface. + */ +static void qtest_boot(const char *cmdline_fmt, ...) +{ + va_list ap; + char *cmdline; + + va_start(ap, cmdline_fmt); + cmdline = g_strdup_vprintf(cmdline_fmt, ap); + va_end(ap); + + qtest_start(cmdline); + qtest_irq_intercept_in(global_qtest, "ioapic"); + guest_malloc = pc_alloc_init(); + + g_free(cmdline); +} + +/** + * Tear down the QEMU instance. + */ +static void qtest_shutdown(void) +{ + g_free(guest_malloc); + guest_malloc = NULL; + qtest_end(); +} + +/** + * Start a Q35 machine and bookmark a handle to the AHCI device. + */ +static QPCIDevice *ahci_boot(void) +{ + qtest_boot("-drive if=none,id=drive0,file=%s,cache=writeback,serial=%s" + " -M q35 " + "-device ide-hd,drive=drive0 " + "-global ide-hd.ver=%s", + tmp_path, "testdisk", "version"); + + /* Verify that we have an AHCI device present. */ + return get_ahci_device(); +} + +/** + * Clean up the PCI device, then terminate the QEMU instance. + */ +static void ahci_shutdown(QPCIDevice *ahci) +{ + free_ahci_device(ahci); + qtest_shutdown(); +} + +/******************************************************************************/ +/* Test Interfaces */ +/******************************************************************************/ + +/** + * Basic sanity test to boot a machine, find an AHCI device, and shutdown. + */ +static void test_sanity(void) +{ + QPCIDevice *ahci; + ahci = ahci_boot(); + ahci_shutdown(ahci); +} + +/******************************************************************************/ + +int main(int argc, char **argv) +{ + const char *arch; + int fd; + int ret; + + /* Should be first to utilize g_test functionality, So we can see errors. */ + g_test_init(&argc, &argv, NULL); + + /* Check architecture */ + arch = qtest_get_arch(); + if (strcmp(arch, "i386") && strcmp(arch, "x86_64")) { + g_test_message("Skipping test for non-x86"); + return 0; + } + + /* Create a temporary raw image */ + fd = mkstemp(tmp_path); + g_assert(fd >= 0); + ret = ftruncate(fd, TEST_IMAGE_SIZE); + g_assert(ret == 0); + close(fd); + + /* Run the tests */ + qtest_add_func("/ahci/sanity", test_sanity); + + ret = g_test_run(); + + /* Cleanup */ + unlink(tmp_path); + + return ret; +} From c8b5b20f81030f8f40be041ccc23021ac602c468 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 21 Aug 2014 13:44:33 -0400 Subject: [PATCH 46/59] ahci: MSI capability should be at 0x80, not 0x50. In the Intel ICH9 data sheet, the MSI capability offset in the PCI configuration space for ICH9 AHCI devices is specified to be 0x80. Further, the PCI capability pointer should always point to 0x80 in ICH9 devices, despite the fact that AHCI 1.3 specifies that it should be pointing to PMCAP (Which in this instance would be 0x70) to maintain adherence to the Intel data sheet specifications and real observed behavior. Signed-off-by: John Snow Message-id: 1408643079-30675-3-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi --- hw/ide/ich.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/ide/ich.c b/hw/ide/ich.c index a2f1639310..8eb77a1472 100644 --- a/hw/ide/ich.c +++ b/hw/ide/ich.c @@ -71,6 +71,7 @@ #include #include +#define ICH9_MSI_CAP_OFFSET 0x80 #define ICH9_SATA_CAP_OFFSET 0xA8 #define ICH9_IDP_BAR 4 @@ -115,7 +116,6 @@ static int pci_ich9_ahci_init(PCIDevice *dev) /* XXX Software should program this register */ dev->config[0x90] = 1 << 6; /* Address Map Register - AHCI mode */ - msi_init(dev, 0x50, 1, true, false); d->ahci.irq = pci_allocate_irq(dev); pci_register_bar(dev, ICH9_IDP_BAR, PCI_BASE_ADDRESS_SPACE_IO, @@ -135,6 +135,11 @@ static int pci_ich9_ahci_init(PCIDevice *dev) (ICH9_IDP_BAR + 0x4) | (ICH9_IDP_INDEX_LOG2 << 4)); d->ahci.idp_offset = ICH9_IDP_INDEX; + /* Although the AHCI 1.3 specification states that the first capability + * should be PMCAP, the Intel ICH9 data sheet specifies that the ICH9 + * AHCI device puts the MSI capability first, pointing to 0x80. */ + msi_init(dev, ICH9_MSI_CAP_OFFSET, 1, true, false); + return 0; } From 8840a843dc14adc9b6e7b5454ca64116e110a17c Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 21 Aug 2014 13:44:34 -0400 Subject: [PATCH 47/59] ahci: Add test_pci_spec to ahci-test. Adds a specification adherence test for AHCI where the boot-up values for the PCI configuration space are compared against the AHCI 1.3 specification. This test does not itself attempt to engage the device. Signed-off-by: John Snow Message-id: 1408643079-30675-4-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi --- tests/ahci-test.c | 305 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 299 insertions(+), 6 deletions(-) diff --git a/tests/ahci-test.c b/tests/ahci-test.c index cc49dba30f..0674d5e189 100644 --- a/tests/ahci-test.c +++ b/tests/ahci-test.c @@ -25,7 +25,7 @@ #include #include #include - +#include #include #include "libqtest.h" @@ -43,15 +43,36 @@ /*** Supplementary PCI Config Space IDs & Masks ***/ #define PCI_DEVICE_ID_INTEL_Q35_AHCI (0x2922) +#define PCI_MSI_FLAGS_RESERVED (0xFF00) +#define PCI_PM_CTRL_RESERVED (0xFC) +#define PCI_BCC(REG32) ((REG32) >> 24) +#define PCI_PI(REG32) (((REG32) >> 8) & 0xFF) +#define PCI_SCC(REG32) (((REG32) >> 16) & 0xFF) + +/*** Recognized AHCI Device Types ***/ +#define AHCI_INTEL_ICH9 (PCI_DEVICE_ID_INTEL_Q35_AHCI << 16 | \ + PCI_VENDOR_ID_INTEL) /*** Globals ***/ static QGuestAllocator *guest_malloc; static QPCIBus *pcibus; static char tmp_path[] = "/tmp/qtest.XXXXXX"; +static bool ahci_pedantic; +static uint32_t ahci_fingerprint; + +/*** Macro Utilities ***/ +#define ASSERT_BIT_SET(data, mask) g_assert_cmphex((data) & (mask), ==, (mask)) +#define ASSERT_BIT_CLEAR(data, mask) g_assert_cmphex((data) & (mask), ==, 0) /*** Function Declarations ***/ static QPCIDevice *get_ahci_device(void); static void free_ahci_device(QPCIDevice *dev); +static void ahci_test_pci_spec(QPCIDevice *ahci); +static void ahci_test_pci_caps(QPCIDevice *ahci, uint16_t header, + uint8_t offset); +static void ahci_test_satacap(QPCIDevice *ahci, uint8_t offset); +static void ahci_test_msicap(QPCIDevice *ahci, uint8_t offset); +static void ahci_test_pmcap(QPCIDevice *ahci, uint8_t offset); /*** Utilities ***/ @@ -61,7 +82,6 @@ static void free_ahci_device(QPCIDevice *dev); static QPCIDevice *get_ahci_device(void) { QPCIDevice *ahci; - uint16_t vendor_id, device_id; pcibus = qpci_init_pc(); @@ -69,11 +89,15 @@ static QPCIDevice *get_ahci_device(void) ahci = qpci_device_find(pcibus, QPCI_DEVFN(0x1F, 0x02)); g_assert(ahci != NULL); - vendor_id = qpci_config_readw(ahci, PCI_VENDOR_ID); - device_id = qpci_config_readw(ahci, PCI_DEVICE_ID); + ahci_fingerprint = qpci_config_readl(ahci, PCI_VENDOR_ID); - g_assert_cmphex(vendor_id, ==, PCI_VENDOR_ID_INTEL); - g_assert_cmphex(device_id, ==, PCI_DEVICE_ID_INTEL_Q35_AHCI); + switch (ahci_fingerprint) { + case AHCI_INTEL_ICH9: + break; + default: + /* Unknown device. */ + g_assert_not_reached(); + } return ahci; } @@ -145,6 +169,239 @@ static void ahci_shutdown(QPCIDevice *ahci) qtest_shutdown(); } +/*** Specification Adherence Tests ***/ + +/** + * Implementation for test_pci_spec. Ensures PCI configuration space is sane. + */ +static void ahci_test_pci_spec(QPCIDevice *ahci) +{ + uint8_t datab; + uint16_t data; + uint32_t datal; + + /* Most of these bits should start cleared until we turn them on. */ + data = qpci_config_readw(ahci, PCI_COMMAND); + ASSERT_BIT_CLEAR(data, PCI_COMMAND_MEMORY); + ASSERT_BIT_CLEAR(data, PCI_COMMAND_MASTER); + ASSERT_BIT_CLEAR(data, PCI_COMMAND_SPECIAL); /* Reserved */ + ASSERT_BIT_CLEAR(data, PCI_COMMAND_VGA_PALETTE); /* Reserved */ + ASSERT_BIT_CLEAR(data, PCI_COMMAND_PARITY); + ASSERT_BIT_CLEAR(data, PCI_COMMAND_WAIT); /* Reserved */ + ASSERT_BIT_CLEAR(data, PCI_COMMAND_SERR); + ASSERT_BIT_CLEAR(data, PCI_COMMAND_FAST_BACK); + ASSERT_BIT_CLEAR(data, PCI_COMMAND_INTX_DISABLE); + ASSERT_BIT_CLEAR(data, 0xF800); /* Reserved */ + + data = qpci_config_readw(ahci, PCI_STATUS); + ASSERT_BIT_CLEAR(data, 0x01 | 0x02 | 0x04); /* Reserved */ + ASSERT_BIT_CLEAR(data, PCI_STATUS_INTERRUPT); + ASSERT_BIT_SET(data, PCI_STATUS_CAP_LIST); /* must be set */ + ASSERT_BIT_CLEAR(data, PCI_STATUS_UDF); /* Reserved */ + ASSERT_BIT_CLEAR(data, PCI_STATUS_PARITY); + ASSERT_BIT_CLEAR(data, PCI_STATUS_SIG_TARGET_ABORT); + ASSERT_BIT_CLEAR(data, PCI_STATUS_REC_TARGET_ABORT); + ASSERT_BIT_CLEAR(data, PCI_STATUS_REC_MASTER_ABORT); + ASSERT_BIT_CLEAR(data, PCI_STATUS_SIG_SYSTEM_ERROR); + ASSERT_BIT_CLEAR(data, PCI_STATUS_DETECTED_PARITY); + + /* RID occupies the low byte, CCs occupy the high three. */ + datal = qpci_config_readl(ahci, PCI_CLASS_REVISION); + if (ahci_pedantic) { + /* AHCI 1.3 specifies that at-boot, the RID should reset to 0x00, + * Though in practice this is likely seldom true. */ + ASSERT_BIT_CLEAR(datal, 0xFF); + } + + /* BCC *must* equal 0x01. */ + g_assert_cmphex(PCI_BCC(datal), ==, 0x01); + if (PCI_SCC(datal) == 0x01) { + /* IDE */ + ASSERT_BIT_SET(0x80000000, datal); + ASSERT_BIT_CLEAR(0x60000000, datal); + } else if (PCI_SCC(datal) == 0x04) { + /* RAID */ + g_assert_cmphex(PCI_PI(datal), ==, 0); + } else if (PCI_SCC(datal) == 0x06) { + /* AHCI */ + g_assert_cmphex(PCI_PI(datal), ==, 0x01); + } else { + g_assert_not_reached(); + } + + datab = qpci_config_readb(ahci, PCI_CACHE_LINE_SIZE); + g_assert_cmphex(datab, ==, 0); + + datab = qpci_config_readb(ahci, PCI_LATENCY_TIMER); + g_assert_cmphex(datab, ==, 0); + + /* Only the bottom 7 bits must be off. */ + datab = qpci_config_readb(ahci, PCI_HEADER_TYPE); + ASSERT_BIT_CLEAR(datab, 0x7F); + + /* BIST is optional, but the low 7 bits must always start off regardless. */ + datab = qpci_config_readb(ahci, PCI_BIST); + ASSERT_BIT_CLEAR(datab, 0x7F); + + /* BARS 0-4 do not have a boot spec, but ABAR/BAR5 must be clean. */ + datal = qpci_config_readl(ahci, PCI_BASE_ADDRESS_5); + g_assert_cmphex(datal, ==, 0); + + qpci_config_writel(ahci, PCI_BASE_ADDRESS_5, 0xFFFFFFFF); + datal = qpci_config_readl(ahci, PCI_BASE_ADDRESS_5); + /* ABAR must be 32-bit, memory mapped, non-prefetchable and + * must be >= 512 bytes. To that end, bits 0-8 must be off. */ + ASSERT_BIT_CLEAR(datal, 0xFF); + + /* Capability list MUST be present, */ + datal = qpci_config_readl(ahci, PCI_CAPABILITY_LIST); + /* But these bits are reserved. */ + ASSERT_BIT_CLEAR(datal, ~0xFF); + g_assert_cmphex(datal, !=, 0); + + /* Check specification adherence for capability extenstions. */ + data = qpci_config_readw(ahci, datal); + + switch (ahci_fingerprint) { + case AHCI_INTEL_ICH9: + /* Intel ICH9 Family Datasheet 14.1.19 p.550 */ + g_assert_cmphex((data & 0xFF), ==, PCI_CAP_ID_MSI); + break; + default: + /* AHCI 1.3, Section 2.1.14 -- CAP must point to PMCAP. */ + g_assert_cmphex((data & 0xFF), ==, PCI_CAP_ID_PM); + } + + ahci_test_pci_caps(ahci, data, (uint8_t)datal); + + /* Reserved. */ + datal = qpci_config_readl(ahci, PCI_CAPABILITY_LIST + 4); + g_assert_cmphex(datal, ==, 0); + + /* IPIN might vary, but ILINE must be off. */ + datab = qpci_config_readb(ahci, PCI_INTERRUPT_LINE); + g_assert_cmphex(datab, ==, 0); +} + +/** + * Test PCI capabilities for AHCI specification adherence. + */ +static void ahci_test_pci_caps(QPCIDevice *ahci, uint16_t header, + uint8_t offset) +{ + uint8_t cid = header & 0xFF; + uint8_t next = header >> 8; + + g_test_message("CID: %02x; next: %02x", cid, next); + + switch (cid) { + case PCI_CAP_ID_PM: + ahci_test_pmcap(ahci, offset); + break; + case PCI_CAP_ID_MSI: + ahci_test_msicap(ahci, offset); + break; + case PCI_CAP_ID_SATA: + ahci_test_satacap(ahci, offset); + break; + + default: + g_test_message("Unknown CAP 0x%02x", cid); + } + + if (next) { + ahci_test_pci_caps(ahci, qpci_config_readw(ahci, next), next); + } +} + +/** + * Test SATA PCI capabilitity for AHCI specification adherence. + */ +static void ahci_test_satacap(QPCIDevice *ahci, uint8_t offset) +{ + uint16_t dataw; + uint32_t datal; + + g_test_message("Verifying SATACAP"); + + /* Assert that the SATACAP version is 1.0, And reserved bits are empty. */ + dataw = qpci_config_readw(ahci, offset + 2); + g_assert_cmphex(dataw, ==, 0x10); + + /* Grab the SATACR1 register. */ + datal = qpci_config_readw(ahci, offset + 4); + + switch (datal & 0x0F) { + case 0x04: /* BAR0 */ + case 0x05: /* BAR1 */ + case 0x06: + case 0x07: + case 0x08: + case 0x09: /* BAR5 */ + case 0x0F: /* Immediately following SATACR1 in PCI config space. */ + break; + default: + /* Invalid BARLOC for the Index Data Pair. */ + g_assert_not_reached(); + } + + /* Reserved. */ + g_assert_cmphex((datal >> 24), ==, 0x00); +} + +/** + * Test MSI PCI capability for AHCI specification adherence. + */ +static void ahci_test_msicap(QPCIDevice *ahci, uint8_t offset) +{ + uint16_t dataw; + uint32_t datal; + + g_test_message("Verifying MSICAP"); + + dataw = qpci_config_readw(ahci, offset + PCI_MSI_FLAGS); + ASSERT_BIT_CLEAR(dataw, PCI_MSI_FLAGS_ENABLE); + ASSERT_BIT_CLEAR(dataw, PCI_MSI_FLAGS_QSIZE); + ASSERT_BIT_CLEAR(dataw, PCI_MSI_FLAGS_RESERVED); + + datal = qpci_config_readl(ahci, offset + PCI_MSI_ADDRESS_LO); + g_assert_cmphex(datal, ==, 0); + + if (dataw & PCI_MSI_FLAGS_64BIT) { + g_test_message("MSICAP is 64bit"); + datal = qpci_config_readl(ahci, offset + PCI_MSI_ADDRESS_HI); + g_assert_cmphex(datal, ==, 0); + dataw = qpci_config_readw(ahci, offset + PCI_MSI_DATA_64); + g_assert_cmphex(dataw, ==, 0); + } else { + g_test_message("MSICAP is 32bit"); + dataw = qpci_config_readw(ahci, offset + PCI_MSI_DATA_32); + g_assert_cmphex(dataw, ==, 0); + } +} + +/** + * Test Power Management PCI capability for AHCI specification adherence. + */ +static void ahci_test_pmcap(QPCIDevice *ahci, uint8_t offset) +{ + uint16_t dataw; + + g_test_message("Verifying PMCAP"); + + dataw = qpci_config_readw(ahci, offset + PCI_PM_PMC); + ASSERT_BIT_CLEAR(dataw, PCI_PM_CAP_PME_CLOCK); + ASSERT_BIT_CLEAR(dataw, PCI_PM_CAP_RESERVED); + ASSERT_BIT_CLEAR(dataw, PCI_PM_CAP_D1); + ASSERT_BIT_CLEAR(dataw, PCI_PM_CAP_D2); + + dataw = qpci_config_readw(ahci, offset + PCI_PM_CTRL); + ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_STATE_MASK); + ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_RESERVED); + ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_DATA_SEL_MASK); + ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_DATA_SCALE_MASK); +} + /******************************************************************************/ /* Test Interfaces */ /******************************************************************************/ @@ -159,6 +416,18 @@ static void test_sanity(void) ahci_shutdown(ahci); } +/** + * Ensure that the PCI configuration space for the AHCI device is in-line with + * the AHCI 1.3 specification for initial values. + */ +static void test_pci_spec(void) +{ + QPCIDevice *ahci; + ahci = ahci_boot(); + ahci_test_pci_spec(ahci); + ahci_shutdown(ahci); +} + /******************************************************************************/ int main(int argc, char **argv) @@ -166,10 +435,33 @@ int main(int argc, char **argv) const char *arch; int fd; int ret; + int c; + + static struct option long_options[] = { + {"pedantic", no_argument, 0, 'p' }, + {0, 0, 0, 0}, + }; /* Should be first to utilize g_test functionality, So we can see errors. */ g_test_init(&argc, &argv, NULL); + while (1) { + c = getopt_long(argc, argv, "", long_options, NULL); + if (c == -1) { + break; + } + switch (c) { + case -1: + break; + case 'p': + ahci_pedantic = 1; + break; + default: + fprintf(stderr, "Unrecognized ahci_test option.\n"); + g_assert_not_reached(); + } + } + /* Check architecture */ arch = qtest_get_arch(); if (strcmp(arch, "i386") && strcmp(arch, "x86_64")) { @@ -186,6 +478,7 @@ int main(int argc, char **argv) /* Run the tests */ qtest_add_func("/ahci/sanity", test_sanity); + qtest_add_func("/ahci/pci_spec", test_pci_spec); ret = g_test_run(); From 96d6d3bad978a4085b9560e30316c98cb6e0489c Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 21 Aug 2014 13:44:35 -0400 Subject: [PATCH 48/59] ahci: add test_pci_enable to ahci-test. This adds a test wherein we engage the PCI AHCI device and ensure that the memory region for the HBA functionality is now accessible. Under Q35 environments, additional PCI configuration is performed to ensure that the HBA functionality will become usable. Signed-off-by: John Snow Message-id: 1408643079-30675-5-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi --- tests/ahci-test.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++ tests/libqos/pci.c | 6 ++++++ 2 files changed, 59 insertions(+) diff --git a/tests/ahci-test.c b/tests/ahci-test.c index 0674d5e189..ee41b411ce 100644 --- a/tests/ahci-test.c +++ b/tests/ahci-test.c @@ -66,6 +66,7 @@ static uint32_t ahci_fingerprint; /*** Function Declarations ***/ static QPCIDevice *get_ahci_device(void); +static QPCIDevice *start_ahci_device(QPCIDevice *dev, void **hba_base); static void free_ahci_device(QPCIDevice *dev); static void ahci_test_pci_spec(QPCIDevice *ahci); static void ahci_test_pci_caps(QPCIDevice *ahci, uint16_t header, @@ -169,6 +170,44 @@ static void ahci_shutdown(QPCIDevice *ahci) qtest_shutdown(); } +/*** Logical Device Initialization ***/ + +/** + * Start the PCI device and sanity-check default operation. + */ +static void ahci_pci_enable(QPCIDevice *ahci, void **hba_base) +{ + uint8_t reg; + + start_ahci_device(ahci, hba_base); + + switch (ahci_fingerprint) { + case AHCI_INTEL_ICH9: + /* ICH9 has a register at PCI 0x92 that + * acts as a master port enabler mask. */ + reg = qpci_config_readb(ahci, 0x92); + reg |= 0x3F; + qpci_config_writeb(ahci, 0x92, reg); + ASSERT_BIT_SET(qpci_config_readb(ahci, 0x92), 0x3F); + break; + } + +} + +/** + * Map BAR5/ABAR, and engage the PCI device. + */ +static QPCIDevice *start_ahci_device(QPCIDevice *ahci, void **hba_base) +{ + /* Map AHCI's ABAR (BAR5) */ + *hba_base = qpci_iomap(ahci, 5, NULL); + + /* turns on pci.cmd.iose, pci.cmd.mse and pci.cmd.bme */ + qpci_device_enable(ahci); + + return ahci; +} + /*** Specification Adherence Tests ***/ /** @@ -428,6 +467,19 @@ static void test_pci_spec(void) ahci_shutdown(ahci); } +/** + * Engage the PCI AHCI device and sanity check the response. + * Perform additional PCI config space bringup for the HBA. + */ +static void test_pci_enable(void) +{ + QPCIDevice *ahci; + void *hba_base; + ahci = ahci_boot(); + ahci_pci_enable(ahci, &hba_base); + ahci_shutdown(ahci); +} + /******************************************************************************/ int main(int argc, char **argv) @@ -479,6 +531,7 @@ int main(int argc, char **argv) /* Run the tests */ qtest_add_func("/ahci/sanity", test_sanity); qtest_add_func("/ahci/pci_spec", test_pci_spec); + qtest_add_func("/ahci/pci_enable", test_pci_enable); ret = g_test_run(); diff --git a/tests/libqos/pci.c b/tests/libqos/pci.c index d5ce683d77..4e630c250a 100644 --- a/tests/libqos/pci.c +++ b/tests/libqos/pci.c @@ -71,6 +71,12 @@ void qpci_device_enable(QPCIDevice *dev) cmd = qpci_config_readw(dev, PCI_COMMAND); cmd |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER; qpci_config_writew(dev, PCI_COMMAND, cmd); + + /* Verify the bits are now set. */ + cmd = qpci_config_readw(dev, PCI_COMMAND); + g_assert_cmphex(cmd & PCI_COMMAND_IO, ==, PCI_COMMAND_IO); + g_assert_cmphex(cmd & PCI_COMMAND_MEMORY, ==, PCI_COMMAND_MEMORY); + g_assert_cmphex(cmd & PCI_COMMAND_MASTER, ==, PCI_COMMAND_MASTER); } uint8_t qpci_find_capability(QPCIDevice *dev, uint8_t id) From fac7aa7fc2ebc26803b0a7b44b010f47ce3e1dd8 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 21 Aug 2014 13:44:36 -0400 Subject: [PATCH 49/59] ahci: properly shadow the TFD register In a real AHCI device, several S/ATA registers are mirrored or shadowed within the AHCI register set. These registers are not updated synchronously for each read access, but are instead updated after a Device-to-Host Register FIS packet is received. The D2H FIS contains the values from these registers on the device. In QEMU, by reaching directly into the device to grab these bits before they are "sent," we may introduce race conditions where unexpected values are present "before they are sent" which could cause issues for some guests, particularly if an attempt is made to read the PxTFD register prior to enabling the port, where incorrect values will be read. This patch also addresses the boot-time values for the PxTFD and PxSIG registers to bring them in line with the AHCI 1.3 specification. Lastly, several fields (PxTFD, PxSIG and PxSACT) are read-only, and any attempts to write to them should be ignored. Signed-off-by: John Snow Message-id: 1408643079-30675-6-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi --- hw/ide/ahci.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 9e1a265127..8978643fff 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -78,8 +78,7 @@ static uint32_t ahci_port_read(AHCIState *s, int port, int offset) val = pr->cmd; break; case PORT_TFDATA: - val = ((uint16_t)s->dev[port].port.ifs[0].error << 8) | - s->dev[port].port.ifs[0].status; + val = pr->tfdata; break; case PORT_SIG: val = pr->sig; @@ -251,14 +250,13 @@ static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val) check_cmd(s, port); break; case PORT_TFDATA: - s->dev[port].port.ifs[0].error = (val >> 8) & 0xff; - s->dev[port].port.ifs[0].status = val & 0xff; + /* Read Only. */ break; case PORT_SIG: - pr->sig = val; + /* Read Only */ break; case PORT_SCR_STAT: - pr->scr_stat = val; + /* Read Only */ break; case PORT_SCR_CTL: if (((pr->scr_ctl & AHCI_SCR_SCTL_DET) == 1) && @@ -497,6 +495,8 @@ static void ahci_reset_port(AHCIState *s, int port) pr->scr_stat = 0; pr->scr_err = 0; pr->scr_act = 0; + pr->tfdata = 0x7F; + pr->sig = 0xFFFFFFFF; d->busy_slot = -1; d->init_d2h_sent = false; @@ -528,16 +528,16 @@ static void ahci_reset_port(AHCIState *s, int port) s->dev[port].port_state = STATE_RUN; if (!ide_state->bs) { - s->dev[port].port_regs.sig = 0; + pr->sig = 0; ide_state->status = SEEK_STAT | WRERR_STAT; } else if (ide_state->drive_kind == IDE_CD) { - s->dev[port].port_regs.sig = SATA_SIGNATURE_CDROM; + pr->sig = SATA_SIGNATURE_CDROM; ide_state->lcyl = 0x14; ide_state->hcyl = 0xeb; DPRINTF(port, "set lcyl = %d\n", ide_state->lcyl); ide_state->status = SEEK_STAT | WRERR_STAT | READY_STAT; } else { - s->dev[port].port_regs.sig = SATA_SIGNATURE_DISK; + pr->sig = SATA_SIGNATURE_DISK; ide_state->status = SEEK_STAT | WRERR_STAT; } @@ -563,7 +563,8 @@ static void debug_print_fis(uint8_t *fis, int cmd_len) static void ahci_write_fis_sdb(AHCIState *s, int port, uint32_t finished) { - AHCIPortRegs *pr = &s->dev[port].port_regs; + AHCIDevice *ad = &s->dev[port]; + AHCIPortRegs *pr = &ad->port_regs; IDEState *ide_state; uint8_t *sdb_fis; @@ -572,8 +573,8 @@ static void ahci_write_fis_sdb(AHCIState *s, int port, uint32_t finished) return; } - sdb_fis = &s->dev[port].res_fis[RES_FIS_SDBFIS]; - ide_state = &s->dev[port].port.ifs[0]; + sdb_fis = &ad->res_fis[RES_FIS_SDBFIS]; + ide_state = &ad->port.ifs[0]; /* clear memory */ *(uint32_t*)sdb_fis = 0; @@ -582,9 +583,14 @@ static void ahci_write_fis_sdb(AHCIState *s, int port, uint32_t finished) sdb_fis[0] = ide_state->error; sdb_fis[2] = ide_state->status & 0x77; s->dev[port].finished |= finished; - *(uint32_t*)(sdb_fis + 4) = cpu_to_le32(s->dev[port].finished); + *(uint32_t*)(sdb_fis + 4) = cpu_to_le32(ad->finished); - ahci_trigger_irq(s, &s->dev[port], PORT_IRQ_SDB_FIS); + /* Update shadow registers (except BSY 0x80 and DRQ 0x08) */ + pr->tfdata = (ad->port.ifs[0].error << 8) | + (ad->port.ifs[0].status & 0x77) | + (pr->tfdata & 0x88); + + ahci_trigger_irq(s, ad, PORT_IRQ_SDB_FIS); } static void ahci_write_fis_pio(AHCIDevice *ad, uint16_t len) @@ -642,6 +648,10 @@ static void ahci_write_fis_pio(AHCIDevice *ad, uint16_t len) pio_fis[18] = 0; pio_fis[19] = 0; + /* Update shadow registers: */ + pr->tfdata = (ad->port.ifs[0].error << 8) | + ad->port.ifs[0].status; + if (pio_fis[2] & ERR_STAT) { ahci_trigger_irq(ad->hba, ad, PORT_IRQ_TF_ERR); } @@ -693,6 +703,10 @@ static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis) d2h_fis[i] = 0; } + /* Update shadow registers: */ + pr->tfdata = (ad->port.ifs[0].error << 8) | + ad->port.ifs[0].status; + if (d2h_fis[2] & ERR_STAT) { ahci_trigger_irq(ad->hba, ad, PORT_IRQ_TF_ERR); } From c2f3029fbc5e7beb4cfb7ac264e10838fada524e Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 21 Aug 2014 13:44:37 -0400 Subject: [PATCH 50/59] ahci: Add test_hba_spec to ahci-test. Add a test routine that checks the boot-up values of the HBA configuration memory space against the AHCI 1.3 specification and Intel ICH9 data sheet (for Q35 machines) for adherence and sane values. The HBA is not yet engaged or put into the idle state. [Replaced g_assert_false(...) with g_assert(!...) for glib <2.38 compatibility, reported by Peter Maydell . --Stefan] Signed-off-by: John Snow Signed-off-by: Stefan Hajnoczi Message-id: 1408643079-30675-7-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi --- tests/ahci-test.c | 561 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 560 insertions(+), 1 deletion(-) diff --git a/tests/ahci-test.c b/tests/ahci-test.c index ee41b411ce..9d5a7870d9 100644 --- a/tests/ahci-test.c +++ b/tests/ahci-test.c @@ -53,21 +53,242 @@ #define AHCI_INTEL_ICH9 (PCI_DEVICE_ID_INTEL_Q35_AHCI << 16 | \ PCI_VENDOR_ID_INTEL) +/*** AHCI/HBA Register Offsets and Bitmasks ***/ +#define AHCI_CAP (0) +#define AHCI_CAP_NP (0x1F) +#define AHCI_CAP_SXS (0x20) +#define AHCI_CAP_EMS (0x40) +#define AHCI_CAP_CCCS (0x80) +#define AHCI_CAP_NCS (0x1F00) +#define AHCI_CAP_PSC (0x2000) +#define AHCI_CAP_SSC (0x4000) +#define AHCI_CAP_PMD (0x8000) +#define AHCI_CAP_FBSS (0x10000) +#define AHCI_CAP_SPM (0x20000) +#define AHCI_CAP_SAM (0x40000) +#define AHCI_CAP_RESERVED (0x80000) +#define AHCI_CAP_ISS (0xF00000) +#define AHCI_CAP_SCLO (0x1000000) +#define AHCI_CAP_SAL (0x2000000) +#define AHCI_CAP_SALP (0x4000000) +#define AHCI_CAP_SSS (0x8000000) +#define AHCI_CAP_SMPS (0x10000000) +#define AHCI_CAP_SSNTF (0x20000000) +#define AHCI_CAP_SNCQ (0x40000000) +#define AHCI_CAP_S64A (0x80000000) + +#define AHCI_GHC (1) +#define AHCI_GHC_HR (0x01) +#define AHCI_GHC_IE (0x02) +#define AHCI_GHC_MRSM (0x04) +#define AHCI_GHC_RESERVED (0x7FFFFFF8) +#define AHCI_GHC_AE (0x80000000) + +#define AHCI_IS (2) +#define AHCI_PI (3) +#define AHCI_VS (4) + +#define AHCI_CCCCTL (5) +#define AHCI_CCCCTL_EN (0x01) +#define AHCI_CCCCTL_RESERVED (0x06) +#define AHCI_CCCCTL_CC (0xFF00) +#define AHCI_CCCCTL_TV (0xFFFF0000) + +#define AHCI_CCCPORTS (6) +#define AHCI_EMLOC (7) + +#define AHCI_EMCTL (8) +#define AHCI_EMCTL_STSMR (0x01) +#define AHCI_EMCTL_CTLTM (0x100) +#define AHCI_EMCTL_CTLRST (0x200) +#define AHCI_EMCTL_RESERVED (0xF0F0FCFE) + +#define AHCI_CAP2 (9) +#define AHCI_CAP2_BOH (0x01) +#define AHCI_CAP2_NVMP (0x02) +#define AHCI_CAP2_APST (0x04) +#define AHCI_CAP2_RESERVED (0xFFFFFFF8) + +#define AHCI_BOHC (10) +#define AHCI_RESERVED (11) +#define AHCI_NVMHCI (24) +#define AHCI_VENDOR (40) +#define AHCI_PORTS (64) + +/*** Port Memory Offsets & Bitmasks ***/ +#define AHCI_PX_CLB (0) +#define AHCI_PX_CLB_RESERVED (0x1FF) + +#define AHCI_PX_CLBU (1) + +#define AHCI_PX_FB (2) +#define AHCI_PX_FB_RESERVED (0xFF) + +#define AHCI_PX_FBU (3) + +#define AHCI_PX_IS (4) +#define AHCI_PX_IS_DHRS (0x1) +#define AHCI_PX_IS_PSS (0x2) +#define AHCI_PX_IS_DSS (0x4) +#define AHCI_PX_IS_SDBS (0x8) +#define AHCI_PX_IS_UFS (0x10) +#define AHCI_PX_IS_DPS (0x20) +#define AHCI_PX_IS_PCS (0x40) +#define AHCI_PX_IS_DMPS (0x80) +#define AHCI_PX_IS_RESERVED (0x23FFF00) +#define AHCI_PX_IS_PRCS (0x400000) +#define AHCI_PX_IS_IPMS (0x800000) +#define AHCI_PX_IS_OFS (0x1000000) +#define AHCI_PX_IS_INFS (0x4000000) +#define AHCI_PX_IS_IFS (0x8000000) +#define AHCI_PX_IS_HBDS (0x10000000) +#define AHCI_PX_IS_HBFS (0x20000000) +#define AHCI_PX_IS_TFES (0x40000000) +#define AHCI_PX_IS_CPDS (0x80000000) + +#define AHCI_PX_IE (5) +#define AHCI_PX_IE_DHRE (0x1) +#define AHCI_PX_IE_PSE (0x2) +#define AHCI_PX_IE_DSE (0x4) +#define AHCI_PX_IE_SDBE (0x8) +#define AHCI_PX_IE_UFE (0x10) +#define AHCI_PX_IE_DPE (0x20) +#define AHCI_PX_IE_PCE (0x40) +#define AHCI_PX_IE_DMPE (0x80) +#define AHCI_PX_IE_RESERVED (0x23FFF00) +#define AHCI_PX_IE_PRCE (0x400000) +#define AHCI_PX_IE_IPME (0x800000) +#define AHCI_PX_IE_OFE (0x1000000) +#define AHCI_PX_IE_INFE (0x4000000) +#define AHCI_PX_IE_IFE (0x8000000) +#define AHCI_PX_IE_HBDE (0x10000000) +#define AHCI_PX_IE_HBFE (0x20000000) +#define AHCI_PX_IE_TFEE (0x40000000) +#define AHCI_PX_IE_CPDE (0x80000000) + +#define AHCI_PX_CMD (6) +#define AHCI_PX_CMD_ST (0x1) +#define AHCI_PX_CMD_SUD (0x2) +#define AHCI_PX_CMD_POD (0x4) +#define AHCI_PX_CMD_CLO (0x8) +#define AHCI_PX_CMD_FRE (0x10) +#define AHCI_PX_CMD_RESERVED (0xE0) +#define AHCI_PX_CMD_CCS (0x1F00) +#define AHCI_PX_CMD_MPSS (0x2000) +#define AHCI_PX_CMD_FR (0x4000) +#define AHCI_PX_CMD_CR (0x8000) +#define AHCI_PX_CMD_CPS (0x10000) +#define AHCI_PX_CMD_PMA (0x20000) +#define AHCI_PX_CMD_HPCP (0x40000) +#define AHCI_PX_CMD_MPSP (0x80000) +#define AHCI_PX_CMD_CPD (0x100000) +#define AHCI_PX_CMD_ESP (0x200000) +#define AHCI_PX_CMD_FBSCP (0x400000) +#define AHCI_PX_CMD_APSTE (0x800000) +#define AHCI_PX_CMD_ATAPI (0x1000000) +#define AHCI_PX_CMD_DLAE (0x2000000) +#define AHCI_PX_CMD_ALPE (0x4000000) +#define AHCI_PX_CMD_ASP (0x8000000) +#define AHCI_PX_CMD_ICC (0xF0000000) + +#define AHCI_PX_RES1 (7) + +#define AHCI_PX_TFD (8) +#define AHCI_PX_TFD_STS (0xFF) +#define AHCI_PX_TFD_STS_ERR (0x01) +#define AHCI_PX_TFD_STS_CS1 (0x06) +#define AHCI_PX_TFD_STS_DRQ (0x08) +#define AHCI_PX_TFD_STS_CS2 (0x70) +#define AHCI_PX_TFD_STS_BSY (0x80) +#define AHCI_PX_TFD_ERR (0xFF00) +#define AHCI_PX_TFD_RESERVED (0xFFFF0000) + +#define AHCI_PX_SIG (9) +#define AHCI_PX_SIG_SECTOR_COUNT (0xFF) +#define AHCI_PX_SIG_LBA_LOW (0xFF00) +#define AHCI_PX_SIG_LBA_MID (0xFF0000) +#define AHCI_PX_SIG_LBA_HIGH (0xFF000000) + +#define AHCI_PX_SSTS (10) +#define AHCI_PX_SSTS_DET (0x0F) +#define AHCI_PX_SSTS_SPD (0xF0) +#define AHCI_PX_SSTS_IPM (0xF00) +#define AHCI_PX_SSTS_RESERVED (0xFFFFF000) +#define SSTS_DET_NO_DEVICE (0x00) +#define SSTS_DET_PRESENT (0x01) +#define SSTS_DET_ESTABLISHED (0x03) +#define SSTS_DET_OFFLINE (0x04) + +#define AHCI_PX_SCTL (11) + +#define AHCI_PX_SERR (12) +#define AHCI_PX_SERR_ERR (0xFFFF) +#define AHCI_PX_SERR_DIAG (0xFFFF0000) +#define AHCI_PX_SERR_DIAG_X (0x04000000) + +#define AHCI_PX_SACT (13) +#define AHCI_PX_CI (14) +#define AHCI_PX_SNTF (15) + +#define AHCI_PX_FBS (16) +#define AHCI_PX_FBS_EN (0x1) +#define AHCI_PX_FBS_DEC (0x2) +#define AHCI_PX_FBS_SDE (0x4) +#define AHCI_PX_FBS_DEV (0xF00) +#define AHCI_PX_FBS_ADO (0xF000) +#define AHCI_PX_FBS_DWE (0xF0000) +#define AHCI_PX_FBS_RESERVED (0xFFF000F8) + +#define AHCI_PX_RES2 (17) +#define AHCI_PX_VS (28) + +#define HBA_DATA_REGION_SIZE (256) +#define HBA_PORT_DATA_SIZE (128) +#define HBA_PORT_NUM_REG (HBA_PORT_DATA_SIZE/4) + +#define AHCI_VERSION_0_95 (0x00000905) +#define AHCI_VERSION_1_0 (0x00010000) +#define AHCI_VERSION_1_1 (0x00010100) +#define AHCI_VERSION_1_2 (0x00010200) +#define AHCI_VERSION_1_3 (0x00010300) + +typedef struct HBACap { + uint32_t cap; + uint32_t cap2; +} HBACap; + /*** Globals ***/ static QGuestAllocator *guest_malloc; static QPCIBus *pcibus; +static uint64_t barsize; static char tmp_path[] = "/tmp/qtest.XXXXXX"; static bool ahci_pedantic; static uint32_t ahci_fingerprint; /*** Macro Utilities ***/ +#define BITANY(data, mask) (((data) & (mask)) != 0) +#define BITSET(data, mask) (((data) & (mask)) == (mask)) +#define BITCLR(data, mask) (((data) & (mask)) == 0) #define ASSERT_BIT_SET(data, mask) g_assert_cmphex((data) & (mask), ==, (mask)) #define ASSERT_BIT_CLEAR(data, mask) g_assert_cmphex((data) & (mask), ==, 0) +/*** IO macros for the AHCI memory registers. ***/ +#define AHCI_READ(OFST) qpci_io_readl(ahci, hba_base + (OFST)) +#define AHCI_WRITE(OFST, VAL) qpci_io_writel(ahci, hba_base + (OFST), (VAL)) +#define AHCI_RREG(regno) AHCI_READ(4 * (regno)) +#define AHCI_WREG(regno, val) AHCI_WRITE(4 * (regno), (val)) + +/*** IO macros for port-specific offsets inside of AHCI memory. ***/ +#define PX_OFST(port, regno) (HBA_PORT_NUM_REG * (port) + AHCI_PORTS + (regno)) +#define PX_RREG(port, regno) AHCI_RREG(PX_OFST((port), (regno))) +#define PX_WREG(port, regno, val) AHCI_WREG(PX_OFST((port), (regno)), (val)) + /*** Function Declarations ***/ static QPCIDevice *get_ahci_device(void); static QPCIDevice *start_ahci_device(QPCIDevice *dev, void **hba_base); static void free_ahci_device(QPCIDevice *dev); +static void ahci_test_port_spec(QPCIDevice *ahci, void *hba_base, + HBACap *hcap, uint8_t port); static void ahci_test_pci_spec(QPCIDevice *ahci); static void ahci_test_pci_caps(QPCIDevice *ahci, uint16_t header, uint8_t offset); @@ -112,6 +333,9 @@ static void free_ahci_device(QPCIDevice *ahci) qpci_free_pc(pcibus); pcibus = NULL; } + + /* Clear our cached barsize information. */ + barsize = 0; } /*** Test Setup & Teardown ***/ @@ -200,7 +424,7 @@ static void ahci_pci_enable(QPCIDevice *ahci, void **hba_base) static QPCIDevice *start_ahci_device(QPCIDevice *ahci, void **hba_base) { /* Map AHCI's ABAR (BAR5) */ - *hba_base = qpci_iomap(ahci, 5, NULL); + *hba_base = qpci_iomap(ahci, 5, &barsize); /* turns on pci.cmd.iose, pci.cmd.mse and pci.cmd.bme */ qpci_device_enable(ahci); @@ -441,6 +665,325 @@ static void ahci_test_pmcap(QPCIDevice *ahci, uint8_t offset) ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_DATA_SCALE_MASK); } +static void ahci_test_hba_spec(QPCIDevice *ahci, void *hba_base) +{ + HBACap hcap; + unsigned i; + uint32_t cap, cap2, reg; + uint32_t ports; + uint8_t nports_impl; + uint8_t maxports; + + g_assert(ahci != 0); + g_assert(hba_base != 0); + + /* + * Note that the AHCI spec does expect the BIOS to set up a few things: + * CAP.SSS - Support for staggered spin-up (t/f) + * CAP.SMPS - Support for mechanical presence switches (t/f) + * PI - Ports Implemented (1-32) + * PxCMD.HPCP - Hot Plug Capable Port + * PxCMD.MPSP - Mechanical Presence Switch Present + * PxCMD.CPD - Cold Presence Detection support + * + * Additional items are touched if CAP.SSS is on, see AHCI 10.1.1 p.97: + * Foreach Port Implemented: + * -PxCMD.ST, PxCMD.CR, PxCMD.FRE, PxCMD.FR, PxSCTL.DET are 0 + * -PxCLB/U and PxFB/U are set to valid regions in memory + * -PxSUD is set to 1. + * -PxSSTS.DET is polled for presence; if detected, we continue: + * -PxSERR is cleared with 1's. + * -If PxTFD.STS.BSY, PxTFD.STS.DRQ, and PxTFD.STS.ERR are all zero, + * the device is ready. + */ + + /* 1 CAP - Capabilities Register */ + cap = AHCI_RREG(AHCI_CAP); + ASSERT_BIT_CLEAR(cap, AHCI_CAP_RESERVED); + + /* 2 GHC - Global Host Control */ + reg = AHCI_RREG(AHCI_GHC); + ASSERT_BIT_CLEAR(reg, AHCI_GHC_HR); + ASSERT_BIT_CLEAR(reg, AHCI_GHC_IE); + ASSERT_BIT_CLEAR(reg, AHCI_GHC_MRSM); + if (BITSET(cap, AHCI_CAP_SAM)) { + g_test_message("Supports AHCI-Only Mode: GHC_AE is Read-Only."); + ASSERT_BIT_SET(reg, AHCI_GHC_AE); + } else { + g_test_message("Supports AHCI/Legacy mix."); + ASSERT_BIT_CLEAR(reg, AHCI_GHC_AE); + } + + /* 3 IS - Interrupt Status */ + reg = AHCI_RREG(AHCI_IS); + g_assert_cmphex(reg, ==, 0); + + /* 4 PI - Ports Implemented */ + ports = AHCI_RREG(AHCI_PI); + /* Ports Implemented must be non-zero. */ + g_assert_cmphex(ports, !=, 0); + /* Ports Implemented must be <= Number of Ports. */ + nports_impl = ctpopl(ports); + g_assert_cmpuint(((AHCI_CAP_NP & cap) + 1), >=, nports_impl); + + g_assert_cmphex(barsize, >, 0); + /* Ports must be within the proper range. Given a mapping of SIZE, + * 256 bytes are used for global HBA control, and the rest is used + * for ports data, at 0x80 bytes each. */ + maxports = (barsize - HBA_DATA_REGION_SIZE) / HBA_PORT_DATA_SIZE; + /* e.g, 30 ports for 4K of memory. (4096 - 256) / 128 = 30 */ + g_assert_cmphex((reg >> maxports), ==, 0); + + /* 5 AHCI Version */ + reg = AHCI_RREG(AHCI_VS); + switch (reg) { + case AHCI_VERSION_0_95: + case AHCI_VERSION_1_0: + case AHCI_VERSION_1_1: + case AHCI_VERSION_1_2: + case AHCI_VERSION_1_3: + break; + default: + g_assert_not_reached(); + } + + /* 6 Command Completion Coalescing Control: depends on CAP.CCCS. */ + reg = AHCI_RREG(AHCI_CCCCTL); + if (BITSET(cap, AHCI_CAP_CCCS)) { + ASSERT_BIT_CLEAR(reg, AHCI_CCCCTL_EN); + ASSERT_BIT_CLEAR(reg, AHCI_CCCCTL_RESERVED); + ASSERT_BIT_SET(reg, AHCI_CCCCTL_CC); + ASSERT_BIT_SET(reg, AHCI_CCCCTL_TV); + } else { + g_assert_cmphex(reg, ==, 0); + } + + /* 7 CCC_PORTS */ + reg = AHCI_RREG(AHCI_CCCPORTS); + /* Must be zeroes initially regardless of CAP.CCCS */ + g_assert_cmphex(reg, ==, 0); + + /* 8 EM_LOC */ + reg = AHCI_RREG(AHCI_EMLOC); + if (BITCLR(cap, AHCI_CAP_EMS)) { + g_assert_cmphex(reg, ==, 0); + } + + /* 9 EM_CTL */ + reg = AHCI_RREG(AHCI_EMCTL); + if (BITSET(cap, AHCI_CAP_EMS)) { + ASSERT_BIT_CLEAR(reg, AHCI_EMCTL_STSMR); + ASSERT_BIT_CLEAR(reg, AHCI_EMCTL_CTLTM); + ASSERT_BIT_CLEAR(reg, AHCI_EMCTL_CTLRST); + ASSERT_BIT_CLEAR(reg, AHCI_EMCTL_RESERVED); + } else { + g_assert_cmphex(reg, ==, 0); + } + + /* 10 CAP2 -- Capabilities Extended */ + cap2 = AHCI_RREG(AHCI_CAP2); + ASSERT_BIT_CLEAR(cap2, AHCI_CAP2_RESERVED); + + /* 11 BOHC -- Bios/OS Handoff Control */ + reg = AHCI_RREG(AHCI_BOHC); + g_assert_cmphex(reg, ==, 0); + + /* 12 -- 23: Reserved */ + g_test_message("Verifying HBA reserved area is empty."); + for (i = AHCI_RESERVED; i < AHCI_NVMHCI; ++i) { + reg = AHCI_RREG(i); + g_assert_cmphex(reg, ==, 0); + } + + /* 24 -- 39: NVMHCI */ + if (BITCLR(cap2, AHCI_CAP2_NVMP)) { + g_test_message("Verifying HBA/NVMHCI area is empty."); + for (i = AHCI_NVMHCI; i < AHCI_VENDOR; ++i) { + reg = AHCI_RREG(i); + g_assert_cmphex(reg, ==, 0); + } + } + + /* 40 -- 63: Vendor */ + g_test_message("Verifying HBA/Vendor area is empty."); + for (i = AHCI_VENDOR; i < AHCI_PORTS; ++i) { + reg = AHCI_RREG(i); + g_assert_cmphex(reg, ==, 0); + } + + /* 64 -- XX: Port Space */ + hcap.cap = cap; + hcap.cap2 = cap2; + for (i = 0; ports || (i < maxports); ports >>= 1, ++i) { + if (BITSET(ports, 0x1)) { + g_test_message("Testing port %u for spec", i); + ahci_test_port_spec(ahci, hba_base, &hcap, i); + } else { + uint16_t j; + uint16_t low = AHCI_PORTS + (32 * i); + uint16_t high = AHCI_PORTS + (32 * (i + 1)); + g_test_message("Asserting unimplemented port %u " + "(reg [%u-%u]) is empty.", + i, low, high - 1); + for (j = low; j < high; ++j) { + reg = AHCI_RREG(j); + g_assert_cmphex(reg, ==, 0); + } + } + } +} + +/** + * Test the memory space for one port for specification adherence. + */ +static void ahci_test_port_spec(QPCIDevice *ahci, void *hba_base, + HBACap *hcap, uint8_t port) +{ + uint32_t reg; + unsigned i; + + /* (0) CLB */ + reg = PX_RREG(port, AHCI_PX_CLB); + ASSERT_BIT_CLEAR(reg, AHCI_PX_CLB_RESERVED); + + /* (1) CLBU */ + if (BITCLR(hcap->cap, AHCI_CAP_S64A)) { + reg = PX_RREG(port, AHCI_PX_CLBU); + g_assert_cmphex(reg, ==, 0); + } + + /* (2) FB */ + reg = PX_RREG(port, AHCI_PX_FB); + ASSERT_BIT_CLEAR(reg, AHCI_PX_FB_RESERVED); + + /* (3) FBU */ + if (BITCLR(hcap->cap, AHCI_CAP_S64A)) { + reg = PX_RREG(port, AHCI_PX_FBU); + g_assert_cmphex(reg, ==, 0); + } + + /* (4) IS */ + reg = PX_RREG(port, AHCI_PX_IS); + g_assert_cmphex(reg, ==, 0); + + /* (5) IE */ + reg = PX_RREG(port, AHCI_PX_IE); + g_assert_cmphex(reg, ==, 0); + + /* (6) CMD */ + reg = PX_RREG(port, AHCI_PX_CMD); + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_FRE); + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_RESERVED); + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_CCS); + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_FR); + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_CR); + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_PMA); /* And RW only if CAP.SPM */ + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_APSTE); /* RW only if CAP2.APST */ + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_ATAPI); + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_DLAE); + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_ALPE); /* RW only if CAP.SALP */ + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_ASP); /* RW only if CAP.SALP */ + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_ICC); + /* If CPDetect support does not exist, CPState must be off. */ + if (BITCLR(reg, AHCI_PX_CMD_CPD)) { + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_CPS); + } + /* If MPSPresence is not set, MPSState must be off. */ + if (BITCLR(reg, AHCI_PX_CMD_MPSP)) { + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_MPSS); + } + /* If we do not support MPS, MPSS and MPSP must be off. */ + if (BITCLR(hcap->cap, AHCI_CAP_SMPS)) { + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_MPSS); + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_MPSP); + } + /* If, via CPD or MPSP we detect a drive, HPCP must be on. */ + if (BITANY(reg, AHCI_PX_CMD_CPD || AHCI_PX_CMD_MPSP)) { + ASSERT_BIT_SET(reg, AHCI_PX_CMD_HPCP); + } + /* HPCP and ESP cannot both be active. */ + g_assert(!BITSET(reg, AHCI_PX_CMD_HPCP | AHCI_PX_CMD_ESP)); + /* If CAP.FBSS is not set, FBSCP must not be set. */ + if (BITCLR(hcap->cap, AHCI_CAP_FBSS)) { + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_FBSCP); + } + + /* (7) RESERVED */ + reg = PX_RREG(port, AHCI_PX_RES1); + g_assert_cmphex(reg, ==, 0); + + /* (8) TFD */ + reg = PX_RREG(port, AHCI_PX_TFD); + /* At boot, prior to an FIS being received, the TFD register should be 0x7F, + * which breaks down as follows, as seen in AHCI 1.3 sec 3.3.8, p. 27. */ + ASSERT_BIT_SET(reg, AHCI_PX_TFD_STS_ERR); + ASSERT_BIT_SET(reg, AHCI_PX_TFD_STS_CS1); + ASSERT_BIT_SET(reg, AHCI_PX_TFD_STS_DRQ); + ASSERT_BIT_SET(reg, AHCI_PX_TFD_STS_CS2); + ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_STS_BSY); + ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_ERR); + ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_RESERVED); + + /* (9) SIG */ + /* Though AHCI specifies the boot value should be 0xFFFFFFFF, + * Even when GHC.ST is zero, the AHCI HBA may receive the initial + * D2H register FIS and update the signature asynchronously, + * so we cannot expect a value here. AHCI 1.3, sec 3.3.9, pp 27-28 */ + + /* (10) SSTS / SCR0: SStatus */ + reg = PX_RREG(port, AHCI_PX_SSTS); + ASSERT_BIT_CLEAR(reg, AHCI_PX_SSTS_RESERVED); + /* Even though the register should be 0 at boot, it is asynchronous and + * prone to change, so we cannot test any well known value. */ + + /* (11) SCTL / SCR2: SControl */ + reg = PX_RREG(port, AHCI_PX_SCTL); + g_assert_cmphex(reg, ==, 0); + + /* (12) SERR / SCR1: SError */ + reg = PX_RREG(port, AHCI_PX_SERR); + g_assert_cmphex(reg, ==, 0); + + /* (13) SACT / SCR3: SActive */ + reg = PX_RREG(port, AHCI_PX_SACT); + g_assert_cmphex(reg, ==, 0); + + /* (14) CI */ + reg = PX_RREG(port, AHCI_PX_CI); + g_assert_cmphex(reg, ==, 0); + + /* (15) SNTF */ + reg = PX_RREG(port, AHCI_PX_SNTF); + g_assert_cmphex(reg, ==, 0); + + /* (16) FBS */ + reg = PX_RREG(port, AHCI_PX_FBS); + ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_EN); + ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_DEC); + ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_SDE); + ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_DEV); + ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_DWE); + ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_RESERVED); + if (BITSET(hcap->cap, AHCI_CAP_FBSS)) { + /* if Port-Multiplier FIS-based switching avail, ADO must >= 2 */ + g_assert((reg & AHCI_PX_FBS_ADO) >> ctzl(AHCI_PX_FBS_ADO) >= 2); + } + + /* [17 -- 27] RESERVED */ + for (i = AHCI_PX_RES2; i < AHCI_PX_VS; ++i) { + reg = PX_RREG(port, i); + g_assert_cmphex(reg, ==, 0); + } + + /* [28 -- 31] Vendor-Specific */ + for (i = AHCI_PX_VS; i < 32; ++i) { + reg = PX_RREG(port, i); + if (reg) { + g_test_message("INFO: Vendor register %u non-empty", i); + } + } +} + /******************************************************************************/ /* Test Interfaces */ /******************************************************************************/ @@ -480,6 +1023,21 @@ static void test_pci_enable(void) ahci_shutdown(ahci); } +/** + * Investigate the memory mapped regions of the HBA, + * and test them for AHCI specification adherence. + */ +static void test_hba_spec(void) +{ + QPCIDevice *ahci; + void *hba_base; + + ahci = ahci_boot(); + ahci_pci_enable(ahci, &hba_base); + ahci_test_hba_spec(ahci, hba_base); + ahci_shutdown(ahci); +} + /******************************************************************************/ int main(int argc, char **argv) @@ -532,6 +1090,7 @@ int main(int argc, char **argv) qtest_add_func("/ahci/sanity", test_sanity); qtest_add_func("/ahci/pci_spec", test_pci_spec); qtest_add_func("/ahci/pci_enable", test_pci_enable); + qtest_add_func("/ahci/hba_spec", test_hba_spec); ret = g_test_run(); From dbc180e5725e4eb4fd1632d0af41189c7410e459 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 21 Aug 2014 13:44:38 -0400 Subject: [PATCH 51/59] ahci: Add test_hba_enable to ahci-test. This test engages the HBA functionality and initializes values to sane defaults to allow for minimal HBA functionality. Buffers are allocated and pointers are updated to allow minimal I/O commands to complete as expected. Error registers and responses are sanity checked for specification adherence. Signed-off-by: John Snow Message-id: 1408643079-30675-8-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi --- tests/ahci-test.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) diff --git a/tests/ahci-test.c b/tests/ahci-test.c index 9d5a7870d9..9cd6faf0e8 100644 --- a/tests/ahci-test.c +++ b/tests/ahci-test.c @@ -277,11 +277,17 @@ static uint32_t ahci_fingerprint; #define AHCI_WRITE(OFST, VAL) qpci_io_writel(ahci, hba_base + (OFST), (VAL)) #define AHCI_RREG(regno) AHCI_READ(4 * (regno)) #define AHCI_WREG(regno, val) AHCI_WRITE(4 * (regno), (val)) +#define AHCI_SET(regno, mask) AHCI_WREG((regno), AHCI_RREG(regno) | (mask)) +#define AHCI_CLR(regno, mask) AHCI_WREG((regno), AHCI_RREG(regno) & ~(mask)) /*** IO macros for port-specific offsets inside of AHCI memory. ***/ #define PX_OFST(port, regno) (HBA_PORT_NUM_REG * (port) + AHCI_PORTS + (regno)) #define PX_RREG(port, regno) AHCI_RREG(PX_OFST((port), (regno))) #define PX_WREG(port, regno, val) AHCI_WREG(PX_OFST((port), (regno)), (val)) +#define PX_SET(port, reg, mask) PX_WREG((port), (reg), \ + PX_RREG((port), (reg)) | (mask)); +#define PX_CLR(port, reg, mask) PX_WREG((port), (reg), \ + PX_RREG((port), (reg)) & ~(mask)); /*** Function Declarations ***/ static QPCIDevice *get_ahci_device(void); @@ -432,6 +438,140 @@ static QPCIDevice *start_ahci_device(QPCIDevice *ahci, void **hba_base) return ahci; } +/** + * Test and initialize the AHCI's HBA memory areas. + * Initialize and start any ports with devices attached. + * Bring the HBA into the idle state. + */ +static void ahci_hba_enable(QPCIDevice *ahci, void *hba_base) +{ + /* Bits of interest in this section: + * GHC.AE Global Host Control / AHCI Enable + * PxCMD.ST Port Command: Start + * PxCMD.SUD "Spin Up Device" + * PxCMD.POD "Power On Device" + * PxCMD.FRE "FIS Receive Enable" + * PxCMD.FR "FIS Receive Running" + * PxCMD.CR "Command List Running" + */ + + g_assert(ahci != NULL); + g_assert(hba_base != NULL); + + uint32_t reg, ports_impl, clb, fb; + uint16_t i; + uint8_t num_cmd_slots; + + g_assert(hba_base != 0); + + /* Set GHC.AE to 1 */ + AHCI_SET(AHCI_GHC, AHCI_GHC_AE); + reg = AHCI_RREG(AHCI_GHC); + ASSERT_BIT_SET(reg, AHCI_GHC_AE); + + /* Read CAP.NCS, how many command slots do we have? */ + reg = AHCI_RREG(AHCI_CAP); + num_cmd_slots = ((reg & AHCI_CAP_NCS) >> ctzl(AHCI_CAP_NCS)) + 1; + g_test_message("Number of Command Slots: %u", num_cmd_slots); + + /* Determine which ports are implemented. */ + ports_impl = AHCI_RREG(AHCI_PI); + + for (i = 0; ports_impl; ports_impl >>= 1, ++i) { + if (!(ports_impl & 0x01)) { + continue; + } + + g_test_message("Initializing port %u", i); + + reg = PX_RREG(i, AHCI_PX_CMD); + if (BITCLR(reg, AHCI_PX_CMD_ST | AHCI_PX_CMD_CR | + AHCI_PX_CMD_FRE | AHCI_PX_CMD_FR)) { + g_test_message("port is idle"); + } else { + g_test_message("port needs to be idled"); + PX_CLR(i, AHCI_PX_CMD, (AHCI_PX_CMD_ST | AHCI_PX_CMD_FRE)); + /* The port has 500ms to disengage. */ + usleep(500000); + reg = PX_RREG(i, AHCI_PX_CMD); + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_CR); + ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_FR); + g_test_message("port is now idle"); + /* The spec does allow for possibly needing a PORT RESET + * or HBA reset if we fail to idle the port. */ + } + + /* Allocate Memory for the Command List Buffer & FIS Buffer */ + /* PxCLB space ... 0x20 per command, as in 4.2.2 p 36 */ + clb = guest_alloc(guest_malloc, num_cmd_slots * 0x20); + g_test_message("CLB: 0x%08x", clb); + PX_WREG(i, AHCI_PX_CLB, clb); + g_assert_cmphex(clb, ==, PX_RREG(i, AHCI_PX_CLB)); + + /* PxFB space ... 0x100, as in 4.2.1 p 35 */ + fb = guest_alloc(guest_malloc, 0x100); + g_test_message("FB: 0x%08x", fb); + PX_WREG(i, AHCI_PX_FB, fb); + g_assert_cmphex(fb, ==, PX_RREG(i, AHCI_PX_FB)); + + /* Clear PxSERR, PxIS, then IS.IPS[x] by writing '1's. */ + PX_WREG(i, AHCI_PX_SERR, 0xFFFFFFFF); + PX_WREG(i, AHCI_PX_IS, 0xFFFFFFFF); + AHCI_WREG(AHCI_IS, (1 << i)); + + /* Verify Interrupts Cleared */ + reg = PX_RREG(i, AHCI_PX_SERR); + g_assert_cmphex(reg, ==, 0); + + reg = PX_RREG(i, AHCI_PX_IS); + g_assert_cmphex(reg, ==, 0); + + reg = AHCI_RREG(AHCI_IS); + ASSERT_BIT_CLEAR(reg, (1 << i)); + + /* Enable All Interrupts: */ + PX_WREG(i, AHCI_PX_IE, 0xFFFFFFFF); + reg = PX_RREG(i, AHCI_PX_IE); + g_assert_cmphex(reg, ==, ~((uint32_t)AHCI_PX_IE_RESERVED)); + + /* Enable the FIS Receive Engine. */ + PX_SET(i, AHCI_PX_CMD, AHCI_PX_CMD_FRE); + reg = PX_RREG(i, AHCI_PX_CMD); + ASSERT_BIT_SET(reg, AHCI_PX_CMD_FR); + + /* AHCI 1.3 spec: if !STS.BSY, !STS.DRQ and PxSSTS.DET indicates + * physical presence, a device is present and may be started. However, + * PxSERR.DIAG.X /may/ need to be cleared a priori. */ + reg = PX_RREG(i, AHCI_PX_SERR); + if (BITSET(reg, AHCI_PX_SERR_DIAG_X)) { + PX_SET(i, AHCI_PX_SERR, AHCI_PX_SERR_DIAG_X); + } + + reg = PX_RREG(i, AHCI_PX_TFD); + if (BITCLR(reg, AHCI_PX_TFD_STS_BSY | AHCI_PX_TFD_STS_DRQ)) { + reg = PX_RREG(i, AHCI_PX_SSTS); + if ((reg & AHCI_PX_SSTS_DET) == SSTS_DET_ESTABLISHED) { + /* Device Found: set PxCMD.ST := 1 */ + PX_SET(i, AHCI_PX_CMD, AHCI_PX_CMD_ST); + ASSERT_BIT_SET(PX_RREG(i, AHCI_PX_CMD), AHCI_PX_CMD_CR); + g_test_message("Started Device %u", i); + } else if ((reg & AHCI_PX_SSTS_DET)) { + /* Device present, but in some unknown state. */ + g_assert_not_reached(); + } + } + } + + /* Enable GHC.IE */ + AHCI_SET(AHCI_GHC, AHCI_GHC_IE); + reg = AHCI_RREG(AHCI_GHC); + ASSERT_BIT_SET(reg, AHCI_GHC_IE); + + /* TODO: The device should now be idling and waiting for commands. + * In the future, a small test-case to inspect the Register D2H FIS + * and clear the initial interrupts might be good. */ +} + /*** Specification Adherence Tests ***/ /** @@ -1038,6 +1178,21 @@ static void test_hba_spec(void) ahci_shutdown(ahci); } +/** + * Engage the HBA functionality of the AHCI PCI device, + * and bring it into a functional idle state. + */ +static void test_hba_enable(void) +{ + QPCIDevice *ahci; + void *hba_base; + + ahci = ahci_boot(); + ahci_pci_enable(ahci, &hba_base); + ahci_hba_enable(ahci, hba_base); + ahci_shutdown(ahci); +} + /******************************************************************************/ int main(int argc, char **argv) @@ -1091,6 +1246,7 @@ int main(int argc, char **argv) qtest_add_func("/ahci/pci_spec", test_pci_spec); qtest_add_func("/ahci/pci_enable", test_pci_enable); qtest_add_func("/ahci/hba_spec", test_hba_spec); + qtest_add_func("/ahci/hba_enable", test_hba_enable); ret = g_test_run(); From 0fa781e3d50ec2e7b45dda96709b16b38f88799b Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 21 Aug 2014 13:44:39 -0400 Subject: [PATCH 52/59] ahci: Add test_identify case to ahci-test. Utilizing all of the bring-up code in pci_enable and hba_enable, this test issues a simple IDENTIFY command via the HBA and retrieves the response via the PIO receive mechanisms of the HBA. Bugs: The DPS interrupt (Descriptor Processed Status) does not currently get set. This will need to be adjusted in a future patch series when the AHCI DMA pathways are reworked to allow the feature, which may be utilized by OSX guests. Signed-off-by: John Snow Message-id: 1408643079-30675-9-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi --- tests/ahci-test.c | 304 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 304 insertions(+) diff --git a/tests/ahci-test.c b/tests/ahci-test.c index 9cd6faf0e8..4c77ebec7a 100644 --- a/tests/ahci-test.c +++ b/tests/ahci-test.c @@ -252,6 +252,97 @@ #define AHCI_VERSION_1_2 (0x00010200) #define AHCI_VERSION_1_3 (0x00010300) +/*** Structures ***/ + +/** + * Generic FIS structure. + */ +typedef struct FIS { + uint8_t fis_type; + uint8_t flags; + char data[0]; +} __attribute__((__packed__)) FIS; + +/** + * Register device-to-host FIS structure. + */ +typedef struct RegD2HFIS { + /* DW0 */ + uint8_t fis_type; + uint8_t flags; + uint8_t status; + uint8_t error; + /* DW1 */ + uint8_t lba_low; + uint8_t lba_mid; + uint8_t lba_high; + uint8_t device; + /* DW2 */ + uint8_t lba3; + uint8_t lba4; + uint8_t lba5; + uint8_t res1; + /* DW3 */ + uint16_t count; + uint8_t res2; + uint8_t res3; + /* DW4 */ + uint16_t res4; + uint16_t res5; +} __attribute__((__packed__)) RegD2HFIS; + +/** + * Register host-to-device FIS structure. + */ +typedef struct RegH2DFIS { + /* DW0 */ + uint8_t fis_type; + uint8_t flags; + uint8_t command; + uint8_t feature_low; + /* DW1 */ + uint8_t lba_low; + uint8_t lba_mid; + uint8_t lba_high; + uint8_t device; + /* DW2 */ + uint8_t lba3; + uint8_t lba4; + uint8_t lba5; + uint8_t feature_high; + /* DW3 */ + uint16_t count; + uint8_t icc; + uint8_t control; + /* DW4 */ + uint32_t aux; +} __attribute__((__packed__)) RegH2DFIS; + +/** + * Command List entry structure. + * The command list contains between 1-32 of these structures. + */ +typedef struct AHCICommand { + uint8_t b1; + uint8_t b2; + uint16_t prdtl; /* Phys Region Desc. Table Length */ + uint32_t prdbc; /* Phys Region Desc. Byte Count */ + uint32_t ctba; /* Command Table Descriptor Base Address */ + uint32_t ctbau; /* '' Upper */ + uint32_t res[4]; +} __attribute__((__packed__)) AHCICommand; + +/** + * Physical Region Descriptor; pointed to by the Command List Header, + * struct ahci_command. + */ +typedef struct PRD { + uint32_t dba; /* Data Base Address */ + uint32_t dbau; /* Data Base Address Upper */ + uint32_t res; /* Reserved */ + uint32_t dbc; /* Data Byte Count (0-indexed) & Interrupt Flag (bit 2^31) */ +} PRD; + typedef struct HBACap { uint32_t cap; uint32_t cap2; @@ -289,6 +380,10 @@ static uint32_t ahci_fingerprint; #define PX_CLR(port, reg, mask) PX_WREG((port), (reg), \ PX_RREG((port), (reg)) & ~(mask)); +/* For calculating how big the PRD table needs to be: */ +#define CMD_TBL_SIZ(n) ((0x80 + ((n) * sizeof(PRD)) + 0x7F) & ~0x7F) + + /*** Function Declarations ***/ static QPCIDevice *get_ahci_device(void); static QPCIDevice *start_ahci_device(QPCIDevice *dev, void **hba_base); @@ -304,6 +399,17 @@ static void ahci_test_pmcap(QPCIDevice *ahci, uint8_t offset); /*** Utilities ***/ +static void string_bswap16(uint16_t *s, size_t bytes) +{ + g_assert_cmphex((bytes & 1), ==, 0); + bytes /= 2; + + while (bytes--) { + *s = bswap16(*s); + s++; + } +} + /** * Locate, verify, and return a handle to the AHCI device. */ @@ -418,6 +524,7 @@ static void ahci_pci_enable(QPCIDevice *ahci, void **hba_base) reg = qpci_config_readb(ahci, 0x92); reg |= 0x3F; qpci_config_writeb(ahci, 0x92, reg); + /* 0...0111111b -- bit significant, ports 0-5 enabled. */ ASSERT_BIT_SET(qpci_config_readb(ahci, 0x92), 0x3F); break; } @@ -1124,6 +1231,186 @@ static void ahci_test_port_spec(QPCIDevice *ahci, void *hba_base, } } +/** + * Utilizing an initialized AHCI HBA, issue an IDENTIFY command to the first + * device we see, then read and check the response. + */ +static void ahci_test_identify(QPCIDevice *ahci, void *hba_base) +{ + RegD2HFIS *d2h = g_malloc0(0x20); + RegD2HFIS *pio = g_malloc0(0x20); + RegH2DFIS fis; + AHCICommand cmd; + PRD prd; + uint32_t ports, reg, clb, table, fb, data_ptr; + uint16_t buff[256]; + unsigned i; + int rc; + + g_assert(ahci != NULL); + g_assert(hba_base != NULL); + + /* We need to: + * (1) Create a Command Table Buffer and update the Command List Slot #0 + * to point to this buffer. + * (2) Construct an FIS host-to-device command structure, and write it to + * the top of the command table buffer. + * (3) Create a data buffer for the IDENTIFY response to be sent to + * (4) Create a Physical Region Descriptor that points to the data buffer, + * and write it to the bottom (offset 0x80) of the command table. + * (5) Now, PxCLB points to the command list, command 0 points to + * our table, and our table contains an FIS instruction and a + * PRD that points to our rx buffer. + * (6) We inform the HBA via PxCI that there is a command ready in slot #0. + */ + + /* Pick the first implemented and running port */ + ports = AHCI_RREG(AHCI_PI); + for (i = 0; i < 32; ports >>= 1, ++i) { + if (ports == 0) { + i = 32; + } + + if (!(ports & 0x01)) { + continue; + } + + reg = PX_RREG(i, AHCI_PX_CMD); + if (BITSET(reg, AHCI_PX_CMD_ST)) { + break; + } + } + g_assert_cmphex(i, <, 32); + g_test_message("Selected port %u for test", i); + + /* Clear out this port's interrupts (ignore the init register d2h fis) */ + reg = PX_RREG(i, AHCI_PX_IS); + PX_WREG(i, AHCI_PX_IS, reg); + g_assert_cmphex(PX_RREG(i, AHCI_PX_IS), ==, 0); + + /* Wipe the FIS-Recieve Buffer */ + fb = PX_RREG(i, AHCI_PX_FB); + g_assert_cmphex(fb, !=, 0); + qmemset(fb, 0x00, 0x100); + + /* Create a Command Table buffer. 0x80 is the smallest with a PRDTL of 0. */ + /* We need at least one PRD, so round up to the nearest 0x80 multiple. */ + table = guest_alloc(guest_malloc, CMD_TBL_SIZ(1)); + g_assert(table); + ASSERT_BIT_CLEAR(table, 0x7F); + + /* Create a data buffer ... where we will dump the IDENTIFY data to. */ + data_ptr = guest_alloc(guest_malloc, 512); + g_assert(data_ptr); + + /* Grab the Command List Buffer pointer */ + clb = PX_RREG(i, AHCI_PX_CLB); + g_assert(clb); + + /* Copy the existing Command #0 structure from the CLB into local memory, + * and build a new command #0. */ + memread(clb, &cmd, sizeof(cmd)); + cmd.b1 = 5; /* reg_h2d_fis is 5 double-words long */ + cmd.b2 = 0x04; /* clear PxTFD.STS.BSY when done */ + cmd.prdtl = cpu_to_le16(1); /* One PRD table entry. */ + cmd.prdbc = 0; + cmd.ctba = cpu_to_le32(table); + cmd.ctbau = 0; + + /* Construct our PRD, noting that DBC is 0-indexed. */ + prd.dba = cpu_to_le32(data_ptr); + prd.dbau = 0; + prd.res = 0; + /* 511+1 bytes, request DPS interrupt */ + prd.dbc = cpu_to_le32(511 | 0x80000000); + + /* Construct our Command FIS, Based on http://wiki.osdev.org/AHCI */ + memset(&fis, 0x00, sizeof(fis)); + fis.fis_type = 0x27; /* Register Host-to-Device FIS */ + fis.command = 0xEC; /* IDENTIFY */ + fis.device = 0; + fis.flags = 0x80; /* Indicate this is a command FIS */ + + /* We've committed nothing yet, no interrupts should be posted yet. */ + g_assert_cmphex(PX_RREG(i, AHCI_PX_IS), ==, 0); + + /* Commit the Command FIS to the Command Table */ + memwrite(table, &fis, sizeof(fis)); + + /* Commit the PRD entry to the Command Table */ + memwrite(table + 0x80, &prd, sizeof(prd)); + + /* Commit Command #0, pointing to the Table, to the Command List Buffer. */ + memwrite(clb, &cmd, sizeof(cmd)); + + /* Everything is in place, but we haven't given the go-ahead yet. */ + g_assert_cmphex(PX_RREG(i, AHCI_PX_IS), ==, 0); + + /* Issue Command #0 via PxCI */ + PX_WREG(i, AHCI_PX_CI, (1 << 0)); + while (BITSET(PX_RREG(i, AHCI_PX_TFD), AHCI_PX_TFD_STS_BSY)) { + usleep(50); + } + + /* Check for expected interrupts */ + reg = PX_RREG(i, AHCI_PX_IS); + ASSERT_BIT_SET(reg, AHCI_PX_IS_DHRS); + ASSERT_BIT_SET(reg, AHCI_PX_IS_PSS); + /* BUG: we expect AHCI_PX_IS_DPS to be set. */ + ASSERT_BIT_CLEAR(reg, AHCI_PX_IS_DPS); + + /* Clear expected interrupts and assert all interrupts now cleared. */ + PX_WREG(i, AHCI_PX_IS, AHCI_PX_IS_DHRS | AHCI_PX_IS_PSS | AHCI_PX_IS_DPS); + g_assert_cmphex(PX_RREG(i, AHCI_PX_IS), ==, 0); + + /* Check for errors. */ + reg = PX_RREG(i, AHCI_PX_SERR); + g_assert_cmphex(reg, ==, 0); + reg = PX_RREG(i, AHCI_PX_TFD); + ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_STS_ERR); + ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_ERR); + + /* Investigate CMD #0, assert that we read 512 bytes */ + memread(clb, &cmd, sizeof(cmd)); + g_assert_cmphex(512, ==, le32_to_cpu(cmd.prdbc)); + + /* Investigate FIS responses */ + memread(fb + 0x20, pio, 0x20); + memread(fb + 0x40, d2h, 0x20); + g_assert_cmphex(pio->fis_type, ==, 0x5f); + g_assert_cmphex(d2h->fis_type, ==, 0x34); + g_assert_cmphex(pio->flags, ==, d2h->flags); + g_assert_cmphex(pio->status, ==, d2h->status); + g_assert_cmphex(pio->error, ==, d2h->error); + + reg = PX_RREG(i, AHCI_PX_TFD); + g_assert_cmphex((reg & AHCI_PX_TFD_ERR), ==, pio->error); + g_assert_cmphex((reg & AHCI_PX_TFD_STS), ==, pio->status); + /* The PIO Setup FIS contains a "bytes read" field, which is a + * 16-bit value. The Physical Region Descriptor Byte Count is + * 32-bit, but for small transfers using one PRD, it should match. */ + g_assert_cmphex(le16_to_cpu(pio->res4), ==, le32_to_cpu(cmd.prdbc)); + + /* Last, but not least: Investigate the IDENTIFY response data. */ + memread(data_ptr, &buff, 512); + + /* Check serial number/version in the buffer */ + /* NB: IDENTIFY strings are packed in 16bit little endian chunks. + * Since we copy byte-for-byte in ahci-test, on both LE and BE, we need to + * unchunk this data. By contrast, ide-test copies 2 bytes at a time, and + * as a consequence, only needs to unchunk the data on LE machines. */ + string_bswap16(&buff[10], 20); + rc = memcmp(&buff[10], "testdisk ", 20); + g_assert_cmphex(rc, ==, 0); + + string_bswap16(&buff[23], 8); + rc = memcmp(&buff[23], "version ", 8); + g_assert_cmphex(rc, ==, 0); + + g_free(d2h); + g_free(pio); +} + /******************************************************************************/ /* Test Interfaces */ /******************************************************************************/ @@ -1193,6 +1480,22 @@ static void test_hba_enable(void) ahci_shutdown(ahci); } +/** + * Bring up the device and issue an IDENTIFY command. + * Inspect the state of the HBA device and the data returned. + */ +static void test_identify(void) +{ + QPCIDevice *ahci; + void *hba_base; + + ahci = ahci_boot(); + ahci_pci_enable(ahci, &hba_base); + ahci_hba_enable(ahci, hba_base); + ahci_test_identify(ahci, hba_base); + ahci_shutdown(ahci); +} + /******************************************************************************/ int main(int argc, char **argv) @@ -1247,6 +1550,7 @@ int main(int argc, char **argv) qtest_add_func("/ahci/pci_enable", test_pci_enable); qtest_add_func("/ahci/hba_spec", test_hba_spec); qtest_add_func("/ahci/hba_enable", test_hba_enable); + qtest_add_func("/ahci/identify", test_identify); ret = g_test_run(); From 3e7e6f186f6180c5900dbc8db04abbbda1275dad Mon Sep 17 00:00:00 2001 From: Chrysostomos Nanakos Date: Tue, 16 Sep 2014 12:17:11 +0300 Subject: [PATCH 53/59] block/archipelago: Fix typo in qemu_archipelago_truncate() Fix a typo introduced by 94c80a438c85f2c19698547fbb115ea46d80c5f1 Signed-off-by: Chrysostomos Nanakos Signed-off-by: Stefan Hajnoczi --- block/archipelago.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/archipelago.c b/block/archipelago.c index 3c86d0ba5a..73d91a422f 100644 --- a/block/archipelago.c +++ b/block/archipelago.c @@ -993,7 +993,7 @@ static int qemu_archipelago_truncate(BlockDriverState *bs, int64_t offset) req = xseg_get_request(s->xseg, s->srcport, s->mportno, X_ALLOC); if (!req) { archipelagolog("Cannot get XSEG request\n"); - return err_exit2; + goto err_exit2; } ret = xseg_prep_request(s->xseg, req, targetlen, 0); From 550830f9351291c585c963204ad9127998b1c1ce Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 16 Sep 2014 15:24:24 +0100 Subject: [PATCH 54/59] block: delete cow block driver This patch removes support for the cow file format. Normally we do not break backwards compatibility but in this case there is no impact and it is the most logical option. Extraordinary claims require extraordinary evidence so I will show why removing the cow block driver is the right thing to do. The cow file format is the disk image format for Usermode Linux, a way of running a Linux system in userspace. The performance of UML was never great and it was hacky, but it enjoyed some popularity before hardware virtualization support became mainstream. QEMU's block/cow.c is supposed to read this image file format. Unfortunately the file format was underspecified: 1. Earlier Linux versions used the MAXPATHLEN constant for the backing filename field. The value of MAXPATHLEN can change, so Linux switched to a 4096 literal but QEMU has a 1024 literal. 2. Padding was not used on the header struct (both in the Linux kernel and in QEMU) so the struct layout varied across architectures. In particular, i386 and x86_64 were different due to int64_t alignment differences. Linux now uses __attribute__((packed)), QEMU does not. Therefore: 1. QEMU cow images do not conform to the Linux cow image file format. 2. cow images cannot be shared between different host architectures. This means QEMU cow images are useless and QEMU has not had bug reports from users actually hitting these issues. Let's get rid of this thing, it serves no purpose and no one will be affected. Signed-off-by: Stefan Hajnoczi Reviewed-by: Markus Armbruster Message-id: 1410877464-20481-1-git-send-email-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi --- block/Makefile.objs | 2 +- block/cow.c | 433 ----------------------------------- qapi/block-core.json | 5 +- qemu-doc.texi | 9 - qemu-img.texi | 4 +- qmp-commands.hx | 2 +- tests/image-fuzzer/runner.py | 5 +- tests/qemu-iotests/069 | 2 +- tests/qemu-iotests/072 | 2 +- tests/qemu-iotests/099 | 2 +- tests/qemu-iotests/common | 6 - ui/cocoa.m | 2 +- 12 files changed, 12 insertions(+), 462 deletions(-) delete mode 100644 block/cow.c diff --git a/block/Makefile.objs b/block/Makefile.objs index d6e23a25de..a833ed5745 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -1,4 +1,4 @@ -block-obj-y += raw_bsd.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o +block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o diff --git a/block/cow.c b/block/cow.c deleted file mode 100644 index c3769fe03b..0000000000 --- a/block/cow.c +++ /dev/null @@ -1,433 +0,0 @@ -/* - * Block driver for the COW format - * - * Copyright (c) 2004 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu-common.h" -#include "block/block_int.h" -#include "qemu/module.h" - -/**************************************************************/ -/* COW block driver using file system holes */ - -/* user mode linux compatible COW file */ -#define COW_MAGIC 0x4f4f4f4d /* MOOO */ -#define COW_VERSION 2 - -struct cow_header_v2 { - uint32_t magic; - uint32_t version; - char backing_file[1024]; - int32_t mtime; - uint64_t size; - uint32_t sectorsize; -}; - -typedef struct BDRVCowState { - CoMutex lock; - int64_t cow_sectors_offset; -} BDRVCowState; - -static int cow_probe(const uint8_t *buf, int buf_size, const char *filename) -{ - const struct cow_header_v2 *cow_header = (const void *)buf; - - if (buf_size >= sizeof(struct cow_header_v2) && - be32_to_cpu(cow_header->magic) == COW_MAGIC && - be32_to_cpu(cow_header->version) == COW_VERSION) - return 100; - else - return 0; -} - -static int cow_open(BlockDriverState *bs, QDict *options, int flags, - Error **errp) -{ - BDRVCowState *s = bs->opaque; - struct cow_header_v2 cow_header; - int bitmap_size; - int64_t size; - int ret; - - /* see if it is a cow image */ - ret = bdrv_pread(bs->file, 0, &cow_header, sizeof(cow_header)); - if (ret < 0) { - goto fail; - } - - if (be32_to_cpu(cow_header.magic) != COW_MAGIC) { - error_setg(errp, "Image not in COW format"); - ret = -EINVAL; - goto fail; - } - - if (be32_to_cpu(cow_header.version) != COW_VERSION) { - char version[64]; - snprintf(version, sizeof(version), - "COW version %" PRIu32, cow_header.version); - error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, - bs->device_name, "cow", version); - ret = -ENOTSUP; - goto fail; - } - - /* cow image found */ - size = be64_to_cpu(cow_header.size); - bs->total_sectors = size / 512; - - pstrcpy(bs->backing_file, sizeof(bs->backing_file), - cow_header.backing_file); - - bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header); - s->cow_sectors_offset = (bitmap_size + 511) & ~511; - qemu_co_mutex_init(&s->lock); - return 0; - fail: - return ret; -} - -static inline void cow_set_bits(uint8_t *bitmap, int start, int64_t nb_sectors) -{ - int64_t bitnum = start, last = start + nb_sectors; - while (bitnum < last) { - if ((bitnum & 7) == 0 && bitnum + 8 <= last) { - bitmap[bitnum / 8] = 0xFF; - bitnum += 8; - continue; - } - bitmap[bitnum/8] |= (1 << (bitnum % 8)); - bitnum++; - } -} - -#define BITS_PER_BITMAP_SECTOR (512 * 8) - -/* Cannot use bitmap.c on big-endian machines. */ -static int cow_test_bit(int64_t bitnum, const uint8_t *bitmap) -{ - return (bitmap[bitnum / 8] & (1 << (bitnum & 7))) != 0; -} - -static int cow_find_streak(const uint8_t *bitmap, int value, int start, int nb_sectors) -{ - int streak_value = value ? 0xFF : 0; - int last = MIN(start + nb_sectors, BITS_PER_BITMAP_SECTOR); - int bitnum = start; - while (bitnum < last) { - if ((bitnum & 7) == 0 && bitmap[bitnum / 8] == streak_value) { - bitnum += 8; - continue; - } - if (cow_test_bit(bitnum, bitmap) == value) { - bitnum++; - continue; - } - break; - } - return MIN(bitnum, last) - start; -} - -/* Return true if first block has been changed (ie. current version is - * in COW file). Set the number of continuous blocks for which that - * is true. */ -static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *num_same) -{ - int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8; - uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE; - bool first = true; - int changed = 0, same = 0; - - do { - int ret; - uint8_t bitmap[BDRV_SECTOR_SIZE]; - - bitnum &= BITS_PER_BITMAP_SECTOR - 1; - int sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum); - - ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap)); - if (ret < 0) { - return ret; - } - - if (first) { - changed = cow_test_bit(bitnum, bitmap); - first = false; - } - - same += cow_find_streak(bitmap, changed, bitnum, nb_sectors); - - bitnum += sector_bits; - nb_sectors -= sector_bits; - offset += BDRV_SECTOR_SIZE; - } while (nb_sectors); - - *num_same = same; - return changed; -} - -static int64_t coroutine_fn cow_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *num_same) -{ - BDRVCowState *s = bs->opaque; - int ret = cow_co_is_allocated(bs, sector_num, nb_sectors, num_same); - int64_t offset = s->cow_sectors_offset + (sector_num << BDRV_SECTOR_BITS); - if (ret < 0) { - return ret; - } - return (ret ? BDRV_BLOCK_DATA : 0) | offset | BDRV_BLOCK_OFFSET_VALID; -} - -static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num, - int nb_sectors) -{ - int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8; - uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE; - bool first = true; - int sector_bits; - - for ( ; nb_sectors; - bitnum += sector_bits, - nb_sectors -= sector_bits, - offset += BDRV_SECTOR_SIZE) { - int ret, set; - uint8_t bitmap[BDRV_SECTOR_SIZE]; - - bitnum &= BITS_PER_BITMAP_SECTOR - 1; - sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum); - - ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap)); - if (ret < 0) { - return ret; - } - - /* Skip over any already set bits */ - set = cow_find_streak(bitmap, 1, bitnum, sector_bits); - bitnum += set; - sector_bits -= set; - nb_sectors -= set; - if (!sector_bits) { - continue; - } - - if (first) { - ret = bdrv_flush(bs->file); - if (ret < 0) { - return ret; - } - first = false; - } - - cow_set_bits(bitmap, bitnum, sector_bits); - - ret = bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap)); - if (ret < 0) { - return ret; - } - } - - return 0; -} - -static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - BDRVCowState *s = bs->opaque; - int ret, n; - - while (nb_sectors > 0) { - ret = cow_co_is_allocated(bs, sector_num, nb_sectors, &n); - if (ret < 0) { - return ret; - } - if (ret) { - ret = bdrv_pread(bs->file, - s->cow_sectors_offset + sector_num * 512, - buf, n * 512); - if (ret < 0) { - return ret; - } - } else { - if (bs->backing_hd) { - /* read from the base image */ - ret = bdrv_read(bs->backing_hd, sector_num, buf, n); - if (ret < 0) { - return ret; - } - } else { - memset(buf, 0, n * 512); - } - } - nb_sectors -= n; - sector_num += n; - buf += n * 512; - } - return 0; -} - -static coroutine_fn int cow_co_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - int ret; - BDRVCowState *s = bs->opaque; - qemu_co_mutex_lock(&s->lock); - ret = cow_read(bs, sector_num, buf, nb_sectors); - qemu_co_mutex_unlock(&s->lock); - return ret; -} - -static int cow_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - BDRVCowState *s = bs->opaque; - int ret; - - ret = bdrv_pwrite(bs->file, s->cow_sectors_offset + sector_num * 512, - buf, nb_sectors * 512); - if (ret < 0) { - return ret; - } - - return cow_update_bitmap(bs, sector_num, nb_sectors); -} - -static coroutine_fn int cow_co_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - int ret; - BDRVCowState *s = bs->opaque; - qemu_co_mutex_lock(&s->lock); - ret = cow_write(bs, sector_num, buf, nb_sectors); - qemu_co_mutex_unlock(&s->lock); - return ret; -} - -static void cow_close(BlockDriverState *bs) -{ -} - -static int cow_create(const char *filename, QemuOpts *opts, Error **errp) -{ - struct cow_header_v2 cow_header; - struct stat st; - int64_t image_sectors = 0; - char *image_filename = NULL; - Error *local_err = NULL; - int ret; - BlockDriverState *cow_bs = NULL; - - /* Read out options */ - image_sectors = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), - BDRV_SECTOR_SIZE); - image_filename = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); - - ret = bdrv_create_file(filename, opts, &local_err); - if (ret < 0) { - error_propagate(errp, local_err); - goto exit; - } - - ret = bdrv_open(&cow_bs, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err); - if (ret < 0) { - error_propagate(errp, local_err); - goto exit; - } - - memset(&cow_header, 0, sizeof(cow_header)); - cow_header.magic = cpu_to_be32(COW_MAGIC); - cow_header.version = cpu_to_be32(COW_VERSION); - if (image_filename) { - /* Note: if no file, we put a dummy mtime */ - cow_header.mtime = cpu_to_be32(0); - - if (stat(image_filename, &st) != 0) { - goto mtime_fail; - } - cow_header.mtime = cpu_to_be32(st.st_mtime); - mtime_fail: - pstrcpy(cow_header.backing_file, sizeof(cow_header.backing_file), - image_filename); - } - cow_header.sectorsize = cpu_to_be32(512); - cow_header.size = cpu_to_be64(image_sectors * 512); - ret = bdrv_pwrite(cow_bs, 0, &cow_header, sizeof(cow_header)); - if (ret < 0) { - goto exit; - } - - /* resize to include at least all the bitmap */ - ret = bdrv_truncate(cow_bs, - sizeof(cow_header) + ((image_sectors + 7) >> 3)); - if (ret < 0) { - goto exit; - } - -exit: - g_free(image_filename); - if (cow_bs) { - bdrv_unref(cow_bs); - } - return ret; -} - -static QemuOptsList cow_create_opts = { - .name = "cow-create-opts", - .head = QTAILQ_HEAD_INITIALIZER(cow_create_opts.head), - .desc = { - { - .name = BLOCK_OPT_SIZE, - .type = QEMU_OPT_SIZE, - .help = "Virtual disk size" - }, - { - .name = BLOCK_OPT_BACKING_FILE, - .type = QEMU_OPT_STRING, - .help = "File name of a base image" - }, - { /* end of list */ } - } -}; - -static BlockDriver bdrv_cow = { - .format_name = "cow", - .instance_size = sizeof(BDRVCowState), - - .bdrv_probe = cow_probe, - .bdrv_open = cow_open, - .bdrv_close = cow_close, - .bdrv_create = cow_create, - .bdrv_has_zero_init = bdrv_has_zero_init_1, - .supports_backing = true, - - .bdrv_read = cow_co_read, - .bdrv_write = cow_co_write, - .bdrv_co_get_block_status = cow_co_get_block_status, - - .create_opts = &cow_create_opts, -}; - -static void bdrv_cow_init(void) -{ - bdrv_register(&bdrv_cow); -} - -block_init(bdrv_cow_init); diff --git a/qapi/block-core.json b/qapi/block-core.json index 85dc87d6c7..fa2d1b7528 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -197,7 +197,7 @@ # 'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device', # 'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow', # 'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat' -# 2.2: 'archipelago' +# 2.2: 'archipelago' added, 'cow' dropped # # @backing_file: #optional the name of the backing file (for copy-on-write) # @@ -1148,7 +1148,7 @@ # Since: 2.0 ## { 'enum': 'BlockdevDriver', - 'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop', 'cow', + 'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop', 'dmg', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device', 'host_floppy', 'http', 'https', 'null-aio', 'null-co', 'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp', 'vdi', 'vhdx', @@ -1565,7 +1565,6 @@ 'blkverify': 'BlockdevOptionsBlkverify', 'bochs': 'BlockdevOptionsGenericFormat', 'cloop': 'BlockdevOptionsGenericFormat', - 'cow': 'BlockdevOptionsGenericCOWFormat', 'dmg': 'BlockdevOptionsGenericFormat', 'file': 'BlockdevOptionsFile', 'ftp': 'BlockdevOptionsFile', diff --git a/qemu-doc.texi b/qemu-doc.texi index ef3be72ae2..9973090c6c 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -654,15 +654,6 @@ File name of a base image (see @option{create} subcommand) If this option is set to @code{on}, the image is encrypted. @end table -@item cow -User Mode Linux Copy On Write image format. It is supported only for -compatibility with previous versions. -Supported options: -@table @code -@item backing_file -File name of a base image (see @option{create} subcommand) -@end table - @item vdi VirtualBox 1.1 compatible image format. Supported options: diff --git a/qemu-img.texi b/qemu-img.texi index 50d2cca7a0..e3ef4a00e9 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -228,8 +228,8 @@ compression is read-only. It means that if a compressed sector is rewritten, then it is rewritten as uncompressed data. Image conversion is also useful to get smaller image when using a -growable format such as @code{qcow} or @code{cow}: the empty sectors -are detected and suppressed from the destination image. +growable format such as @code{qcow}: the empty sectors are detected and +suppressed from the destination image. @var{sparse_size} indicates the consecutive number of bytes (defaults to 4k) that must contain only zeros for qemu-img to create a sparse image during diff --git a/qmp-commands.hx b/qmp-commands.hx index 7658d4bd24..76656cc074 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2081,7 +2081,7 @@ Each json-object contain the following: - "file": device file name (json-string) - "ro": true if read-only, false otherwise (json-bool) - "drv": driver format name (json-string) - - Possible values: "blkdebug", "bochs", "cloop", "cow", "dmg", + - Possible values: "blkdebug", "bochs", "cloop", "dmg", "file", "file", "ftp", "ftps", "host_cdrom", "host_device", "host_floppy", "http", "https", "nbd", "parallels", "qcow", "qcow2", "raw", diff --git a/tests/image-fuzzer/runner.py b/tests/image-fuzzer/runner.py index b7002905e1..0a8743ef41 100755 --- a/tests/image-fuzzer/runner.py +++ b/tests/image-fuzzer/runner.py @@ -150,8 +150,7 @@ class TestEnv(object): 'discard $off $len'], ['qemu-io', '$test_img', '-c', 'truncate $off']] - for fmt in ['raw', 'vmdk', 'vdi', 'cow', 'qcow2', 'file', - 'qed', 'vpc']: + for fmt in ['raw', 'vmdk', 'vdi', 'qcow2', 'file', 'qed', 'vpc']: self.commands.append( ['qemu-img', 'convert', '-f', 'qcow2', '-O', fmt, '$test_img', 'converted_image.' + fmt]) @@ -178,7 +177,7 @@ class TestEnv(object): by 'qemu-img create'. """ # All formats supported by the 'qemu-img create' command. - backing_file_fmt = random.choice(['raw', 'vmdk', 'vdi', 'cow', 'qcow2', + backing_file_fmt = random.choice(['raw', 'vmdk', 'vdi', 'qcow2', 'file', 'qed', 'vpc']) backing_file_name = 'backing_img.' + backing_file_fmt backing_file_size = random.randint(MIN_BACKING_FILE_SIZE, diff --git a/tests/qemu-iotests/069 b/tests/qemu-iotests/069 index e661598c4a..ce9e0541b2 100755 --- a/tests/qemu-iotests/069 +++ b/tests/qemu-iotests/069 @@ -38,7 +38,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 . ./common.rc . ./common.filter -_supported_fmt cow qed qcow qcow2 vmdk +_supported_fmt qed qcow qcow2 vmdk _supported_proto file _supported_os Linux _unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat" diff --git a/tests/qemu-iotests/072 b/tests/qemu-iotests/072 index 58faa8b5a7..e4a723d733 100755 --- a/tests/qemu-iotests/072 +++ b/tests/qemu-iotests/072 @@ -38,7 +38,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 . ./common.rc . ./common.filter -_supported_fmt vpc vmdk vhdx vdi qed qcow2 qcow cow +_supported_fmt vpc vmdk vhdx vdi qed qcow2 qcow _supported_proto file _supported_os Linux diff --git a/tests/qemu-iotests/099 b/tests/qemu-iotests/099 index a26d3d243e..ffc7ea795a 100755 --- a/tests/qemu-iotests/099 +++ b/tests/qemu-iotests/099 @@ -41,7 +41,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 # Basically all formats, but "raw" has issues with _filter_imgfmt regarding the # raw comparison image for blkverify; also, all images have to support creation -_supported_fmt cow qcow qcow2 qed vdi vhdx vmdk vpc +_supported_fmt qcow qcow2 qed vdi vhdx vmdk vpc _supported_proto file _supported_os Linux diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common index 70df659d5b..89c6dde263 100644 --- a/tests/qemu-iotests/common +++ b/tests/qemu-iotests/common @@ -136,7 +136,6 @@ common options check options -raw test raw (default) -bochs test bochs - -cow test cow -cloop test cloop -parallels test parallels -qcow test qcow @@ -182,11 +181,6 @@ testlist options xpand=false ;; - -cow) - IMGFMT=cow - xpand=false - ;; - -cloop) IMGFMT=cloop IMGFMT_GENERIC=false diff --git a/ui/cocoa.m b/ui/cocoa.m index 3147178118..d37c29b4a4 100644 --- a/ui/cocoa.m +++ b/ui/cocoa.m @@ -857,7 +857,7 @@ QemuCocoaView *cocoaView; [op setPrompt:@"Boot image"]; [op setMessage:@"Select the disk image you want to boot.\n\nHit the \"Cancel\" button to quit"]; NSArray *filetypes = [NSArray arrayWithObjects:@"img", @"iso", @"dmg", - @"qcow", @"qcow2", @"cow", @"cloop", @"vmdk", nil]; + @"qcow", @"qcow2", @"cloop", @"vmdk", nil]; #if (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) [op setAllowedFileTypes:filetypes]; [op beginSheetModalForWindow:normalWindow From e91a8b2fef92e7d31290e785a35f85a503fa1356 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Tue, 16 Sep 2014 15:12:06 -0400 Subject: [PATCH 55/59] block: vhdx - fix reading beyond pointer during image creation In vhdx_create_metadata(), we allocate 40 bytes to entry_buffer for the various metadata table entries. However, we write out 64kB from that buffer into the new file. Only write out the correct 40 bytes. Signed-off-by: Jeff Cody Reviewed-by: Markus Armbruster Signed-off-by: Stefan Hajnoczi --- block/vhdx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/block/vhdx.c b/block/vhdx.c index d9aa2c14ed..22a4fb832c 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1408,6 +1408,12 @@ exit: return ret; } +#define VHDX_METADATA_ENTRY_BUFFER_SIZE \ + (sizeof(VHDXFileParameters) +\ + sizeof(VHDXVirtualDiskSize) +\ + sizeof(VHDXPage83Data) +\ + sizeof(VHDXVirtualDiskLogicalSectorSize) +\ + sizeof(VHDXVirtualDiskPhysicalSectorSize)) /* * Create the Metadata entries. @@ -1446,11 +1452,7 @@ static int vhdx_create_new_metadata(BlockDriverState *bs, VHDXVirtualDiskLogicalSectorSize *mt_log_sector_size; VHDXVirtualDiskPhysicalSectorSize *mt_phys_sector_size; - entry_buffer = g_malloc0(sizeof(VHDXFileParameters) + - sizeof(VHDXVirtualDiskSize) + - sizeof(VHDXPage83Data) + - sizeof(VHDXVirtualDiskLogicalSectorSize) + - sizeof(VHDXVirtualDiskPhysicalSectorSize)); + entry_buffer = g_malloc0(VHDX_METADATA_ENTRY_BUFFER_SIZE); mt_file_params = entry_buffer; offset += sizeof(VHDXFileParameters); @@ -1531,7 +1533,7 @@ static int vhdx_create_new_metadata(BlockDriverState *bs, } ret = bdrv_pwrite(bs, metadata_offset + (64 * KiB), entry_buffer, - VHDX_HEADER_BLOCK_SIZE); + VHDX_METADATA_ENTRY_BUFFER_SIZE); if (ret < 0) { goto exit; } @@ -1726,7 +1728,6 @@ static int vhdx_create_new_region_table(BlockDriverState *bs, goto exit; } - exit: g_free(s); g_free(buffer); @@ -1877,7 +1878,6 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) } - delete_and_exit: bdrv_unref(bs); exit: From 2f78e491d7b46542158ce0b8132ee4e05bc0ade4 Mon Sep 17 00:00:00 2001 From: Chrysostomos Nanakos Date: Thu, 18 Sep 2014 14:30:49 +0300 Subject: [PATCH 56/59] async: aio_context_new(): Handle event_notifier_init failure On a system with a low limit of open files the initialization of the event notifier could fail and QEMU exits without printing any error information to the user. The problem can be easily reproduced by enforcing a low limit of open files and start QEMU with enough I/O threads to hit this limit. The same problem raises, without the creation of I/O threads, while QEMU initializes the main event loop by enforcing an even lower limit of open files. This commit adds an error message on failure: # qemu [...] -object iothread,id=iothread0 -object iothread,id=iothread1 qemu: Failed to initialize event notifier: Too many open files in system Signed-off-by: Chrysostomos Nanakos Signed-off-by: Stefan Hajnoczi --- async.c | 16 +++++++++++----- include/block/aio.h | 2 +- include/qemu/main-loop.h | 2 +- iothread.c | 11 ++++++++++- main-loop.c | 9 +++++++-- qemu-img.c | 8 +++++++- qemu-io.c | 7 ++++++- qemu-nbd.c | 6 +++++- tests/test-aio.c | 10 +++++++++- tests/test-thread-pool.c | 10 +++++++++- tests/test-throttle.c | 10 +++++++++- vl.c | 5 +++-- 12 files changed, 78 insertions(+), 18 deletions(-) diff --git a/async.c b/async.c index a99e7f639a..6e1b282aa7 100644 --- a/async.c +++ b/async.c @@ -289,18 +289,24 @@ static void aio_rfifolock_cb(void *opaque) aio_notify(opaque); } -AioContext *aio_context_new(void) +AioContext *aio_context_new(Error **errp) { + int ret; AioContext *ctx; ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); + ret = event_notifier_init(&ctx->notifier, false); + if (ret < 0) { + g_source_destroy(&ctx->source); + error_setg_errno(errp, -ret, "Failed to initialize event notifier"); + return NULL; + } + aio_set_event_notifier(ctx, &ctx->notifier, + (EventNotifierHandler *) + event_notifier_test_and_clear); ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD)); ctx->thread_pool = NULL; qemu_mutex_init(&ctx->bh_lock); rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx); - event_notifier_init(&ctx->notifier, false); - aio_set_event_notifier(ctx, &ctx->notifier, - (EventNotifierHandler *) - event_notifier_test_and_clear); timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx); return ctx; diff --git a/include/block/aio.h b/include/block/aio.h index 67a75ddd56..156272177f 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -102,7 +102,7 @@ void aio_set_dispatching(AioContext *ctx, bool dispatching); * They also provide bottom halves, a service to execute a piece of code * as soon as possible. */ -AioContext *aio_context_new(void); +AioContext *aio_context_new(Error **errp); /** * aio_context_ref: diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h index 6f0200a7ac..62c68c0f32 100644 --- a/include/qemu/main-loop.h +++ b/include/qemu/main-loop.h @@ -42,7 +42,7 @@ * * In the case of QEMU tools, this will also start/initialize timers. */ -int qemu_init_main_loop(void); +int qemu_init_main_loop(Error **errp); /** * main_loop_wait: Run one iteration of the main loop. diff --git a/iothread.c b/iothread.c index d9403cf69e..342a23fcb0 100644 --- a/iothread.c +++ b/iothread.c @@ -17,6 +17,7 @@ #include "block/aio.h" #include "sysemu/iothread.h" #include "qmp-commands.h" +#include "qemu/error-report.h" #define IOTHREADS_PATH "/objects" @@ -53,6 +54,9 @@ static void iothread_instance_finalize(Object *obj) { IOThread *iothread = IOTHREAD(obj); + if (!iothread->ctx) { + return; + } iothread->stopping = true; aio_notify(iothread->ctx); qemu_thread_join(&iothread->thread); @@ -63,11 +67,16 @@ static void iothread_instance_finalize(Object *obj) static void iothread_complete(UserCreatable *obj, Error **errp) { + Error *local_error = NULL; IOThread *iothread = IOTHREAD(obj); iothread->stopping = false; - iothread->ctx = aio_context_new(); iothread->thread_id = -1; + iothread->ctx = aio_context_new(&local_error); + if (!iothread->ctx) { + error_propagate(errp, local_error); + return; + } qemu_mutex_init(&iothread->init_done_lock); qemu_cond_init(&iothread->init_done_cond); diff --git a/main-loop.c b/main-loop.c index 3cc79f82f9..53393a4b18 100644 --- a/main-loop.c +++ b/main-loop.c @@ -126,10 +126,11 @@ void qemu_notify_event(void) static GArray *gpollfds; -int qemu_init_main_loop(void) +int qemu_init_main_loop(Error **errp) { int ret; GSource *src; + Error *local_error = NULL; init_clocks(); @@ -138,8 +139,12 @@ int qemu_init_main_loop(void) return ret; } + qemu_aio_context = aio_context_new(&local_error); + if (!qemu_aio_context) { + error_propagate(errp, local_error); + return -EMFILE; + } gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD)); - qemu_aio_context = aio_context_new(); src = aio_get_g_source(qemu_aio_context); g_source_attach(src, NULL); g_source_unref(src); diff --git a/qemu-img.c b/qemu-img.c index 91d1ac3d7d..dbf0904dc0 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -2879,6 +2879,7 @@ int main(int argc, char **argv) { const img_cmd_t *cmd; const char *cmdname; + Error *local_error = NULL; int c; static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, @@ -2893,7 +2894,12 @@ int main(int argc, char **argv) error_set_progname(argv[0]); qemu_init_exec_dir(argv[0]); - qemu_init_main_loop(); + if (qemu_init_main_loop(&local_error)) { + error_report("%s", error_get_pretty(local_error)); + error_free(local_error); + exit(EXIT_FAILURE); + } + bdrv_init(); if (argc < 2) { error_exit("Not enough arguments"); diff --git a/qemu-io.c b/qemu-io.c index d2ab6946e8..66cf3ef4be 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -379,6 +379,7 @@ int main(int argc, char **argv) int c; int opt_index = 0; int flags = BDRV_O_UNMAP; + Error *local_error = NULL; #ifdef CONFIG_POSIX signal(SIGPIPE, SIG_IGN); @@ -444,7 +445,11 @@ int main(int argc, char **argv) exit(1); } - qemu_init_main_loop(); + if (qemu_init_main_loop(&local_error)) { + error_report("%s", error_get_pretty(local_error)); + error_free(local_error); + exit(1); + } bdrv_init(); /* initialize commands */ diff --git a/qemu-nbd.c b/qemu-nbd.c index 9bc152e6c3..de9963f8fb 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -674,7 +674,11 @@ int main(int argc, char **argv) snprintf(sockpath, 128, SOCKET_PATH, basename(device)); } - qemu_init_main_loop(); + if (qemu_init_main_loop(&local_err)) { + error_report("%s", error_get_pretty(local_err)); + error_free(local_err); + exit(EXIT_FAILURE); + } bdrv_init(); atexit(bdrv_close_all); diff --git a/tests/test-aio.c b/tests/test-aio.c index c6a8713e83..a7cb5c9915 100644 --- a/tests/test-aio.c +++ b/tests/test-aio.c @@ -14,6 +14,7 @@ #include "block/aio.h" #include "qemu/timer.h" #include "qemu/sockets.h" +#include "qemu/error-report.h" static AioContext *ctx; @@ -810,11 +811,18 @@ static void test_source_timer_schedule(void) int main(int argc, char **argv) { + Error *local_error = NULL; GSource *src; init_clocks(); - ctx = aio_context_new(); + ctx = aio_context_new(&local_error); + if (!ctx) { + error_report("Failed to create AIO Context: '%s'", + error_get_pretty(local_error)); + error_free(local_error); + exit(1); + } src = aio_get_g_source(ctx); g_source_attach(src, NULL); g_source_unref(src); diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c index ed2b25b8eb..8c4d68b345 100644 --- a/tests/test-thread-pool.c +++ b/tests/test-thread-pool.c @@ -4,6 +4,7 @@ #include "block/thread-pool.h" #include "block/block.h" #include "qemu/timer.h" +#include "qemu/error-report.h" static AioContext *ctx; static ThreadPool *pool; @@ -222,10 +223,17 @@ static void test_cancel_async(void) int main(int argc, char **argv) { int ret; + Error *local_error = NULL; init_clocks(); - ctx = aio_context_new(); + ctx = aio_context_new(&local_error); + if (!ctx) { + error_report("Failed to create AIO Context: '%s'", + error_get_pretty(local_error)); + error_free(local_error); + exit(1); + } pool = aio_get_thread_pool(ctx); g_test_init(&argc, &argv, NULL); diff --git a/tests/test-throttle.c b/tests/test-throttle.c index 000ae31af9..d8ba415e43 100644 --- a/tests/test-throttle.c +++ b/tests/test-throttle.c @@ -14,6 +14,7 @@ #include #include "block/aio.h" #include "qemu/throttle.h" +#include "qemu/error-report.h" static AioContext *ctx; static LeakyBucket bkt; @@ -492,10 +493,17 @@ static void test_accounting(void) int main(int argc, char **argv) { GSource *src; + Error *local_error = NULL; init_clocks(); - ctx = aio_context_new(); + ctx = aio_context_new(&local_error); + if (!ctx) { + error_report("Failed to create AIO Context: '%s'", + error_get_pretty(local_error)); + error_free(local_error); + exit(1); + } src = aio_get_g_source(ctx); g_source_attach(src, NULL); g_source_unref(src); diff --git a/vl.c b/vl.c index dc792fe08b..6d073c3576 100644 --- a/vl.c +++ b/vl.c @@ -2968,6 +2968,7 @@ int main(int argc, char **argv, char **envp) ram_addr_t maxram_size = default_ram_size; uint64_t ram_slots = 0; FILE *vmstate_dump_file = NULL; + Error *main_loop_err = NULL; atexit(qemu_run_exit_notifiers); error_set_progname(argv[0]); @@ -3998,8 +3999,8 @@ int main(int argc, char **argv, char **envp) os_daemonize(); - if (qemu_init_main_loop()) { - fprintf(stderr, "qemu_init_main_loop failed\n"); + if (qemu_init_main_loop(&main_loop_err)) { + error_report("%s", error_get_pretty(main_loop_err)); exit(1); } From d9612b43dd062309a6a4ad4e5cef2badc94f5385 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 10 Sep 2014 14:17:49 +0800 Subject: [PATCH 57/59] virtio: Import virtio_vring.h This header has no further dependencies. It only has some stable data types and primitive functions, so we can copy it to include/hw/virtio in order to allow vring code (and its user virtio-blk dataplane) to be built unconditionally, even for cross compiling. Suggested-by: Paolo Bonzini Signed-off-by: Fam Zheng Reviewed-by: Paolo Bonzini Message-id: 1410329871-28885-2-git-send-email-famz@redhat.com Signed-off-by: Stefan Hajnoczi --- hw/net/vhost_net.c | 2 +- include/hw/virtio/dataplane/vring.h | 2 +- include/hw/virtio/virtio_ring.h | 167 ++++++++++++++++++++++++++++ linux-headers/linux/vhost.h | 2 +- 4 files changed, 170 insertions(+), 3 deletions(-) create mode 100644 include/hw/virtio/virtio_ring.h diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 77bb93e4d7..4e3a061622 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -36,6 +35,7 @@ #include +#include "hw/virtio/virtio_ring.h" #include "hw/virtio/vhost.h" #include "hw/virtio/virtio-bus.h" diff --git a/include/hw/virtio/dataplane/vring.h b/include/hw/virtio/dataplane/vring.h index af73ee2ae3..d3e086aefc 100644 --- a/include/hw/virtio/dataplane/vring.h +++ b/include/hw/virtio/dataplane/vring.h @@ -17,8 +17,8 @@ #ifndef VRING_H #define VRING_H -#include #include "qemu-common.h" +#include "hw/virtio/virtio_ring.h" #include "hw/virtio/virtio.h" typedef struct { diff --git a/include/hw/virtio/virtio_ring.h b/include/hw/virtio/virtio_ring.h new file mode 100644 index 0000000000..8f58bc975e --- /dev/null +++ b/include/hw/virtio/virtio_ring.h @@ -0,0 +1,167 @@ +#ifndef _LINUX_VIRTIO_RING_H +#define _LINUX_VIRTIO_RING_H +/* + * This file is copied from /usr/include/linux while converting __uNN types + * to uXX_t, __inline__ to inline, and tab to spaces. + * */ + +/* An interface for efficient virtio implementation, currently for use by KVM + * and lguest, but hopefully others soon. Do NOT change this since it will + * break existing servers and clients. + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright Rusty Russell IBM Corporation 2007. */ + +/* This marks a buffer as continuing via the next field. */ +#define VRING_DESC_F_NEXT 1 +/* This marks a buffer as write-only (otherwise read-only). */ +#define VRING_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors. */ +#define VRING_DESC_F_INDIRECT 4 + +/* The Host uses this in used->flags to advise the Guest: don't kick me when + * you add a buffer. It's unreliable, so it's simply an optimization. Guest + * will still kick if it's out of buffers. */ +#define VRING_USED_F_NO_NOTIFY 1 +/* The Guest uses this in avail->flags to advise the Host: don't interrupt me + * when you consume a buffer. It's unreliable, so it's simply an + * optimization. */ +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +/* We support indirect buffer descriptors */ +#define VIRTIO_RING_F_INDIRECT_DESC 28 + +/* The Guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. */ +/* The Host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. */ +#define VIRTIO_RING_F_EVENT_IDX 29 + +/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ +struct vring_desc { + /* Address (guest-physical). */ + uint64_t addr; + /* Length. */ + uint32_t len; + /* The flags as indicated above. */ + uint16_t flags; + /* We chain unused descriptors via this, too */ + uint16_t next; +}; + +struct vring_avail { + uint16_t flags; + uint16_t idx; + uint16_t ring[]; +}; + +/* u32 is used here for ids for padding reasons. */ +struct vring_used_elem { + /* Index of start of used descriptor chain. */ + uint32_t id; + /* Total length of the descriptor chain which was used (written to) */ + uint32_t len; +}; + +struct vring_used { + uint16_t flags; + uint16_t idx; + struct vring_used_elem ring[]; +}; + +struct vring { + unsigned int num; + + struct vring_desc *desc; + + struct vring_avail *avail; + + struct vring_used *used; +}; + +/* The standard layout for the ring is a continuous chunk of memory which looks + * like this. We assume num is a power of 2. + * + * struct vring + * { + * // The actual descriptors (16 bytes each) + * struct vring_desc desc[num]; + * + * // A ring of available descriptor heads with free-running index. + * uint16_t avail_flags; + * uint16_t avail_idx; + * uint16_t available[num]; + * uint16_t used_event_idx; + * + * // Padding to the next align boundary. + * char pad[]; + * + * // A ring of used descriptor heads with free-running index. + * uint16_t used_flags; + * uint16_t used_idx; + * struct vring_used_elem used[num]; + * uint16_t avail_event_idx; + * }; + */ +/* We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(uint16_t *)&(vr)->used->ring[(vr)->num]) + +static inline void vring_init(struct vring *vr, unsigned int num, void *p, + unsigned long align) +{ + vr->num = num; + vr->desc = p; + vr->avail = p + num*sizeof(struct vring_desc); + vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(uint16_t) + + align-1) & ~(align - 1)); +} + +static inline unsigned vring_size(unsigned int num, unsigned long align) +{ + return ((sizeof(struct vring_desc) * num + sizeof(uint16_t) * (3 + num) + + align - 1) & ~(align - 1)) + + sizeof(uint16_t) * 3 + sizeof(struct vring_used_elem) * num; +} + +/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ +/* Assuming a given event_idx value from the other size, if + * we have just incremented index from old to new_idx, + * should we trigger an event? */ +static inline int vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old) +{ + /* Note: Xen has similar logic for notification hold-off + * in include/xen/interface/io/ring.h with req_event and req_prod + * corresponding to event_idx + 1 and new_idx respectively. + * Note also that req_event and req_prod in Xen start at 1, + * event indexes in virtio start at 0. */ + return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old); +} + +#endif /* _LINUX_VIRTIO_RING_H */ diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index c656f61cfc..bb5df43874 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -14,7 +14,7 @@ #include #include -#include +#include "hw/virtio/virtio_ring.h" struct vhost_vring_state { unsigned int index; From 032f8b8158eaab94573340c883cb6cd03cea1117 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 10 Sep 2014 14:17:50 +0800 Subject: [PATCH 58/59] vring: Better error handling if num is too large To be more consistent inside this function. Signed-off-by: Fam Zheng Reviewed-by: Paolo Bonzini Message-id: 1410329871-28885-3-git-send-email-famz@redhat.com Signed-off-by: Stefan Hajnoczi --- hw/virtio/dataplane/vring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c index 67cb2b823e..372706a234 100644 --- a/hw/virtio/dataplane/vring.c +++ b/hw/virtio/dataplane/vring.c @@ -181,7 +181,8 @@ static int get_desc(Vring *vring, VirtQueueElement *elem, /* Stop for now if there are not enough iovecs available. */ if (*num >= VIRTQUEUE_MAX_SIZE) { - return -ENOBUFS; + error_report("Invalid SG num: %u", *num); + return -EFAULT; } /* TODO handle non-contiguous memory across region boundaries */ From 52b53c04faab9f7a9879c8dc014930649a3e698d Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 10 Sep 2014 14:17:51 +0800 Subject: [PATCH 59/59] block: Always compile virtio-blk dataplane Dataplane doesn't depend on linux-aio any more, so we don't need the compiling condition now. Configure options are kept but just print a message. Signed-off-by: Fam Zheng Reviewed-by: Paolo Bonzini Message-id: 1410329871-28885-4-git-send-email-famz@redhat.com Signed-off-by: Stefan Hajnoczi --- configure | 21 ++------------------- hw/block/Makefile.objs | 2 +- hw/block/virtio-blk.c | 20 ++------------------ hw/virtio/Makefile.objs | 2 +- include/hw/virtio/virtio-blk.h | 2 -- 5 files changed, 6 insertions(+), 41 deletions(-) diff --git a/configure b/configure index 6c3d6cde14..4b35be432b 100755 --- a/configure +++ b/configure @@ -327,7 +327,6 @@ glusterfs="" glusterfs_discard="no" glusterfs_zerofill="no" archipelago="" -virtio_blk_data_plane="" gtk="" gtkabi="" vte="" @@ -1093,9 +1092,8 @@ for opt do ;; --enable-archipelago) archipelago="yes" ;; - --disable-virtio-blk-data-plane) virtio_blk_data_plane="no" - ;; - --enable-virtio-blk-data-plane) virtio_blk_data_plane="yes" + --disable-virtio-blk-data-plane|--enable-virtio-blk-data-plane) + echo "$0: $opt is obsolete, virtio-blk data-plane is always on" >&2 ;; --disable-gtk) gtk="no" ;; @@ -2943,16 +2941,6 @@ else tpm_passthrough=no fi -########################################## -# adjust virtio-blk-data-plane based on linux-aio - -if test "$virtio_blk_data_plane" = "yes" -a \ - "$linux_aio" != "yes" ; then - error_exit "virtio-blk-data-plane requires Linux AIO, please try --enable-linux-aio" -elif test -z "$virtio_blk_data_plane" ; then - virtio_blk_data_plane=$linux_aio -fi - ########################################## # attr probe @@ -4327,7 +4315,6 @@ echo "coroutine backend $coroutine" echo "coroutine pool $coroutine_pool" echo "GlusterFS support $glusterfs" echo "Archipelago support $archipelago" -echo "virtio-blk-data-plane $virtio_blk_data_plane" echo "gcov $gcov_tool" echo "gcov enabled $gcov" echo "TPM support $tpm" @@ -4789,10 +4776,6 @@ if test "$quorum" = "yes" ; then echo "CONFIG_QUORUM=y" >> $config_host_mak fi -if test "$virtio_blk_data_plane" = "yes" ; then - echo 'CONFIG_VIRTIO_BLK_DATA_PLANE=$(CONFIG_VIRTIO)' >> $config_host_mak -fi - if test "$vhdx" = "yes" ; then echo "CONFIG_VHDX=y" >> $config_host_mak fi diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs index bf46f03b7e..d4c3ab758d 100644 --- a/hw/block/Makefile.objs +++ b/hw/block/Makefile.objs @@ -12,4 +12,4 @@ common-obj-$(CONFIG_NVME_PCI) += nvme.o obj-$(CONFIG_SH4) += tc58128.o obj-$(CONFIG_VIRTIO) += virtio-blk.o -obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ +obj-$(CONFIG_VIRTIO) += dataplane/ diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 38ad38f49f..45e0c8f6e9 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -18,10 +18,8 @@ #include "hw/block/block.h" #include "sysemu/blockdev.h" #include "hw/virtio/virtio-blk.h" -#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE -# include "dataplane/virtio-blk.h" -# include "migration/migration.h" -#endif +#include "dataplane/virtio-blk.h" +#include "migration/migration.h" #include "block/scsi.h" #ifdef __linux__ # include @@ -435,7 +433,6 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) .num_writes = 0, }; -#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start * dataplane here instead of waiting for .set_status(). */ @@ -443,7 +440,6 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) virtio_blk_data_plane_start(s->dataplane); return; } -#endif while ((req = virtio_blk_get_request(s))) { virtio_blk_handle_request(req, &mrb); @@ -500,11 +496,9 @@ static void virtio_blk_reset(VirtIODevice *vdev) { VirtIOBlock *s = VIRTIO_BLK(vdev); -#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE if (s->dataplane) { virtio_blk_data_plane_stop(s->dataplane); } -#endif /* * This should cancel pending requests, but can't do nicely until there @@ -594,12 +588,10 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) VirtIOBlock *s = VIRTIO_BLK(vdev); uint32_t features; -#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE if (s->dataplane && !(status & (VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK))) { virtio_blk_data_plane_stop(s->dataplane); } -#endif if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { return; @@ -694,7 +686,6 @@ static const BlockDevOps virtio_block_ops = { .resize_cb = virtio_blk_resize, }; -#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE /* Disable dataplane thread during live migration since it does not * update the dirty memory bitmap yet. */ @@ -725,7 +716,6 @@ static void virtio_blk_migration_state_changed(Notifier *notifier, void *data) } } } -#endif /* CONFIG_VIRTIO_BLK_DATA_PLANE */ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) { @@ -762,7 +752,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) s->vq = virtio_add_queue(vdev, 128, virtio_blk_handle_output); s->complete_request = virtio_blk_complete_request; -#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE virtio_blk_data_plane_create(vdev, blk, &s->dataplane, &err); if (err != NULL) { error_propagate(errp, err); @@ -771,7 +760,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) } s->migration_state_notifier.notify = virtio_blk_migration_state_changed; add_migration_state_change_notifier(&s->migration_state_notifier); -#endif s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); register_savevm(dev, "virtio-blk", virtio_blk_id++, 2, @@ -789,11 +777,9 @@ static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOBlock *s = VIRTIO_BLK(dev); -#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE remove_migration_state_change_notifier(&s->migration_state_notifier); virtio_blk_data_plane_destroy(s->dataplane); s->dataplane = NULL; -#endif qemu_del_vm_change_state_handler(s->change); unregister_savevm(dev, "virtio-blk", s); blockdev_mark_auto_del(s->bs); @@ -818,9 +804,7 @@ static Property virtio_blk_properties[] = { #ifdef __linux__ DEFINE_PROP_BIT("scsi", VirtIOBlock, blk.scsi, 0, true), #endif -#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE DEFINE_PROP_BIT("x-data-plane", VirtIOBlock, blk.data_plane, 0, false), -#endif DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index ec9e855bc1..d21c397756 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -2,7 +2,7 @@ common-obj-y += virtio-rng.o common-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o common-obj-y += virtio-bus.o common-obj-y += virtio-mmio.o -common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/ +common-obj-$(CONFIG_VIRTIO) += dataplane/ obj-y += virtio.o virtio-balloon.o obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index cf61154658..f3853f2b75 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -132,10 +132,8 @@ typedef struct VirtIOBlock { VMChangeStateEntry *change; /* Function to push to vq and notify guest */ void (*complete_request)(struct VirtIOBlockReq *req, unsigned char status); -#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE Notifier migration_state_notifier; struct VirtIOBlockDataPlane *dataplane; -#endif } VirtIOBlock; typedef struct MultiReqBuffer {