Block layer patches:

- Fix crashes and hangs related to iothreads, bdrv_drain and block jobs:
     - Fix some AIO context locking in jobs
     - Fix blk->in_flight during blk_wait_while_drained()
 - vpc: Don't round up already aligned BAT sizes
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJejI1UAAoJEH8JsnLIjy/WQhEQAIF2zkkdbT77VTMLvFmPU36J
 hPR2RSzd5egNtfn3zm7vGZJ3FEZb5SEAV5R9CnwVAJeQnPUll7bjkPsoynrEUFU2
 PSoFe8lKuYwoAOOSjqakASpKx/Xs3sctKc4fgRWr5/dbWFCC77Q+qxHiKO4KYfeh
 g00jsjeCLMVLop3r9uMDovlA81mJUIP5axSjSH7akgzLC+ZCfH2RFFu62D7wYv9w
 UgQM/NZt2YuLFm+BeQLB4K2BK+PZBCN1trPj+h11ecUwm9b1XZZfO/7R0T8Wrljc
 49fd5Z/GombCnyxuLCr6QrhLZcr8FffLJXQgkdHTkWUKQXqZUfmqLjVIAbli0ZiC
 hP8jE5EgdAXpBrGeM2oH+dk0iQSBGTIMaGlhDwxO32xOAQ8TKpRiMZMfwGHqB+vn
 m/EPoHLHL6vQGxISfGjj4k3fnSP74nvRrS656MhLuG03SkPZacyTZQpTkErg2imW
 AeU6g4afvvLtJBoF/YYA5Qhff1Ux5eh9jactIW1DRf/Q4tTc4ioTU25560Le4eGZ
 kex/AwIcf9P47eTUCP8L6iNKz8RU7bV4g9vl9zz7fQm3i9GEhly88XvOppnXRUvT
 XdkfdlSmUZ9vue6rAfgsL5fQIHtsGRfH90nT11/IW1X4baOImtcQBWg3xZdR4zPS
 W2H0J01PlSKE8l/OWQlo
 =oODf
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches:

- Fix crashes and hangs related to iothreads, bdrv_drain and block jobs:
    - Fix some AIO context locking in jobs
    - Fix blk->in_flight during blk_wait_while_drained()
- vpc: Don't round up already aligned BAT sizes

# gpg: Signature made Tue 07 Apr 2020 15:25:24 BST
# gpg:                using RSA key 7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  vpc: Don't round up already aligned BAT sizes
  block: Fix blk->in_flight during blk_wait_while_drained()
  block: Increase BB.in_flight for coroutine and sync interfaces
  block-backend: Reorder flush/pdiscard function definitions
  backup: don't acquire aio_context in backup_clean
  replication: assert we own context before job_cancel_sync
  job: take each job's lock individually in job_txn_apply

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2020-04-07 19:12:45 +01:00
commit 2f37b0222c
9 changed files with 193 additions and 95 deletions

View file

@ -126,11 +126,7 @@ static void backup_abort(Job *job)
static void backup_clean(Job *job)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
AioContext *aio_context = bdrv_get_aio_context(s->backup_top);
aio_context_acquire(aio_context);
bdrv_backup_top_drop(s->backup_top);
aio_context_release(aio_context);
}
void backup_do_checkpoint(BlockJob *job, Error **errp)

View file

@ -1140,16 +1140,22 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
return 0;
}
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
{
assert(blk->in_flight > 0);
if (blk->quiesce_counter && !blk->disable_request_queuing) {
blk_dec_in_flight(blk);
qemu_co_queue_wait(&blk->queued_requests, NULL);
blk_inc_in_flight(blk);
}
}
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
static int coroutine_fn
blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes,
QEMUIOVector *qiov, BdrvRequestFlags flags)
{
int ret;
BlockDriverState *bs;
@ -1178,10 +1184,24 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
return ret;
}
int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
unsigned int bytes,
QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags)
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
int ret;
blk_inc_in_flight(blk);
ret = blk_do_preadv(blk, offset, bytes, qiov, flags);
blk_dec_in_flight(blk);
return ret;
}
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
static int coroutine_fn
blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes,
QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags)
{
int ret;
BlockDriverState *bs;
@ -1214,6 +1234,20 @@ int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
return ret;
}
int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
unsigned int bytes,
QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags)
{
int ret;
blk_inc_in_flight(blk);
ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
blk_dec_in_flight(blk);
return ret;
}
int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
@ -1234,7 +1268,7 @@ static void blk_read_entry(void *opaque)
BlkRwCo *rwco = opaque;
QEMUIOVector *qiov = rwco->iobuf;
rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size,
qiov, rwco->flags);
aio_wait_kick();
}
@ -1244,8 +1278,8 @@ static void blk_write_entry(void *opaque)
BlkRwCo *rwco = opaque;
QEMUIOVector *qiov = rwco->iobuf;
rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
qiov, rwco->flags);
rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size,
qiov, 0, rwco->flags);
aio_wait_kick();
}
@ -1262,6 +1296,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
.ret = NOT_DONE,
};
blk_inc_in_flight(blk);
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
co_entry(&rwco);
@ -1270,6 +1305,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
bdrv_coroutine_enter(blk_bs(blk), co);
BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
}
blk_dec_in_flight(blk);
return rwco.ret;
}
@ -1387,14 +1423,8 @@ static void blk_aio_read_entry(void *opaque)
BlkRwCo *rwco = &acb->rwco;
QEMUIOVector *qiov = rwco->iobuf;
if (rwco->blk->quiesce_counter) {
blk_dec_in_flight(rwco->blk);
blk_wait_while_drained(rwco->blk);
blk_inc_in_flight(rwco->blk);
}
assert(qiov->size == acb->bytes);
rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes,
qiov, rwco->flags);
blk_aio_complete(acb);
}
@ -1405,15 +1435,9 @@ static void blk_aio_write_entry(void *opaque)
BlkRwCo *rwco = &acb->rwco;
QEMUIOVector *qiov = rwco->iobuf;
if (rwco->blk->quiesce_counter) {
blk_dec_in_flight(rwco->blk);
blk_wait_while_drained(rwco->blk);
blk_inc_in_flight(rwco->blk);
}
assert(!qiov || qiov->size == acb->bytes);
rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
qiov, rwco->flags);
rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
qiov, 0, rwco->flags);
blk_aio_complete(acb);
}
@ -1488,38 +1512,6 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
blk_aio_write_entry, flags, cb, opaque);
}
static void blk_aio_flush_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
rwco->ret = blk_co_flush(rwco->blk);
blk_aio_complete(acb);
}
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque)
{
return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
}
static void blk_aio_pdiscard_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes);
blk_aio_complete(acb);
}
BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
int64_t offset, int bytes,
BlockCompletionFunc *cb, void *opaque)
{
return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
cb, opaque);
}
void blk_aio_cancel(BlockAIOCB *acb)
{
bdrv_aio_cancel(acb);
@ -1530,7 +1522,9 @@ void blk_aio_cancel_async(BlockAIOCB *acb)
bdrv_aio_cancel_async(acb);
}
int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
static int coroutine_fn
blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
blk_wait_while_drained(blk);
@ -1546,8 +1540,7 @@ static void blk_ioctl_entry(void *opaque)
BlkRwCo *rwco = opaque;
QEMUIOVector *qiov = rwco->iobuf;
rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
qiov->iov[0].iov_base);
rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base);
aio_wait_kick();
}
@ -1561,7 +1554,7 @@ static void blk_aio_ioctl_entry(void *opaque)
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
blk_aio_complete(acb);
}
@ -1572,7 +1565,9 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
}
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
static int coroutine_fn
blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
{
int ret;
@ -1586,7 +1581,50 @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
return bdrv_co_pdiscard(blk->root, offset, bytes);
}
int blk_co_flush(BlockBackend *blk)
static void blk_aio_pdiscard_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
blk_aio_complete(acb);
}
BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
int64_t offset, int bytes,
BlockCompletionFunc *cb, void *opaque)
{
return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
cb, opaque);
}
int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
{
int ret;
blk_inc_in_flight(blk);
ret = blk_do_pdiscard(blk, offset, bytes);
blk_dec_in_flight(blk);
return ret;
}
static void blk_pdiscard_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
QEMUIOVector *qiov = rwco->iobuf;
rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size);
aio_wait_kick();
}
int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
{
return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
}
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
static int coroutine_fn blk_do_flush(BlockBackend *blk)
{
blk_wait_while_drained(blk);
@ -1597,10 +1635,36 @@ int blk_co_flush(BlockBackend *blk)
return bdrv_co_flush(blk_bs(blk));
}
static void blk_aio_flush_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
rwco->ret = blk_do_flush(rwco->blk);
blk_aio_complete(acb);
}
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque)
{
return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
}
int coroutine_fn blk_co_flush(BlockBackend *blk)
{
int ret;
blk_inc_in_flight(blk);
ret = blk_do_flush(blk);
blk_dec_in_flight(blk);
return ret;
}
static void blk_flush_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
rwco->ret = blk_co_flush(rwco->blk);
rwco->ret = blk_do_flush(rwco->blk);
aio_wait_kick();
}
@ -2083,20 +2147,6 @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
return bdrv_truncate(blk->root, offset, exact, prealloc, errp);
}
static void blk_pdiscard_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
QEMUIOVector *qiov = rwco->iobuf;
rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
aio_wait_kick();
}
int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
{
return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
}
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
int64_t pos, int size)
{

View file

@ -144,12 +144,15 @@ fail:
static void replication_close(BlockDriverState *bs)
{
BDRVReplicationState *s = bs->opaque;
Job *commit_job;
if (s->stage == BLOCK_REPLICATION_RUNNING) {
replication_stop(s->rs, false, NULL);
}
if (s->stage == BLOCK_REPLICATION_FAILOVER) {
job_cancel_sync(&s->commit_job->job);
commit_job = &s->commit_job->job;
assert(commit_job->aio_context == qemu_get_current_aio_context());
job_cancel_sync(commit_job);
}
if (s->mode == REPLICATION_MODE_SECONDARY) {

View file

@ -835,7 +835,7 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
/* Write the footer (twice: at the beginning and at the end) */
block_size = 0x200000;
num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
num_bat_entries = DIV_ROUND_UP(total_sectors, block_size / 512);
ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
if (ret < 0) {

View file

@ -3612,7 +3612,16 @@ void qmp_block_job_finalize(const char *id, Error **errp)
}
trace_qmp_block_job_finalize(job);
job_ref(&job->job);
job_finalize(&job->job, errp);
/*
* Job's context might have changed via job_finalize (and job_txn_apply
* automatically acquires the new one), so make sure we release the correct
* one.
*/
aio_context = blk_get_aio_context(job->blk);
job_unref(&job->job);
aio_context_release(aio_context);
}

View file

@ -171,7 +171,6 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int bytes,
BlockCompletionFunc *cb, void *opaque);
void blk_aio_cancel(BlockAIOCB *acb);
void blk_aio_cancel_async(BlockAIOCB *acb);
int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque);

View file

@ -114,7 +114,16 @@ void qmp_job_finalize(const char *id, Error **errp)
}
trace_qmp_job_finalize(job);
job_ref(job);
job_finalize(job, errp);
/*
* Job's context might have changed via job_finalize (and job_txn_apply
* automatically acquires the new one), so make sure we release the correct
* one.
*/
aio_context = job->aio_context;
job_unref(job);
aio_context_release(aio_context);
}

50
job.c
View file

@ -136,17 +136,38 @@ static void job_txn_del_job(Job *job)
}
}
static int job_txn_apply(JobTxn *txn, int fn(Job *))
static int job_txn_apply(Job *job, int fn(Job *))
{
Job *job, *next;
AioContext *inner_ctx;
Job *other_job, *next;
JobTxn *txn = job->txn;
int rc = 0;
QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) {
rc = fn(job);
/*
* Similar to job_completed_txn_abort, we take each job's lock before
* applying fn, but since we assume that outer_ctx is held by the caller,
* we need to release it here to avoid holding the lock twice - which would
* break AIO_WAIT_WHILE from within fn.
*/
job_ref(job);
aio_context_release(job->aio_context);
QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
inner_ctx = other_job->aio_context;
aio_context_acquire(inner_ctx);
rc = fn(other_job);
aio_context_release(inner_ctx);
if (rc) {
break;
}
}
/*
* Note that job->aio_context might have been changed by calling fn, so we
* can't use a local variable to cache it.
*/
aio_context_acquire(job->aio_context);
job_unref(job);
return rc;
}
@ -774,11 +795,11 @@ static void job_do_finalize(Job *job)
assert(job && job->txn);
/* prepare the transaction to complete */
rc = job_txn_apply(job->txn, job_prepare);
rc = job_txn_apply(job, job_prepare);
if (rc) {
job_completed_txn_abort(job);
} else {
job_txn_apply(job->txn, job_finalize_single);
job_txn_apply(job, job_finalize_single);
}
}
@ -824,10 +845,10 @@ static void job_completed_txn_success(Job *job)
assert(other_job->ret == 0);
}
job_txn_apply(txn, job_transition_to_pending);
job_txn_apply(job, job_transition_to_pending);
/* If no jobs need manual finalization, automatically do so */
if (job_txn_apply(txn, job_needs_finalize) == 0) {
if (job_txn_apply(job, job_needs_finalize) == 0) {
job_do_finalize(job);
}
}
@ -849,9 +870,10 @@ static void job_completed(Job *job)
static void job_exit(void *opaque)
{
Job *job = (Job *)opaque;
AioContext *ctx = job->aio_context;
AioContext *ctx;
aio_context_acquire(ctx);
job_ref(job);
aio_context_acquire(job->aio_context);
/* This is a lie, we're not quiescent, but still doing the completion
* callbacks. However, completion callbacks tend to involve operations that
@ -862,6 +884,14 @@ static void job_exit(void *opaque)
job_completed(job);
/*
* Note that calling job_completed can move the job to a different
* aio_context, so we cannot cache from above. job_txn_apply takes care of
* acquiring the new lock, and we ref/unref to avoid job_completed freeing
* the job underneath us.
*/
ctx = job->aio_context;
job_unref(job);
aio_context_release(ctx);
}

View file

@ -367,7 +367,9 @@ static void test_cancel_concluded(void)
aio_poll(qemu_get_aio_context(), true);
assert(job->status == JOB_STATUS_PENDING);
aio_context_acquire(job->aio_context);
job_finalize(job, &error_abort);
aio_context_release(job->aio_context);
assert(job->status == JOB_STATUS_CONCLUDED);
cancel_common(s);