Update version for v6.1.1 release

Signed-off-by: Michael Roth <michael.roth@amd.com>
tests: tcg: Fix PVH test with binutils 2.36+
2021-12-23 09:52:09 -06:00 · 2021-12-15 07:13:37 -06:00 · 2021-12-15 07:13:27 -06:00 · 2021-12-15 07:13:18 -06:00 · 2021-12-15 07:13:12 -06:00 · 2021-12-14 17:40:06 -06:00
2457 changed files with 54462 additions and 133503 deletions
--- a/.github/lockdown.yml
+++ b/.github/lockdown.yml
@ -0,0 +1,34 @@
+# Configuration for Repo Lockdown - https://github.com/dessant/repo-lockdown
+
+# Close issues and pull requests
+close: true
+
+# Lock issues and pull requests
+lock: true
+
+issues:
+  comment: |
+    Thank you for your interest in the QEMU project.
+
+    This repository is a read-only mirror of the project's repostories hosted
+    at https://gitlab.com/qemu-project/qemu.git.
+    The project does not process issues filed on GitHub.
+
+    The project issues are tracked on GitLab:
+    https://gitlab.com/qemu-project/qemu/-/issues
+
+    QEMU welcomes bug report contributions. You can file new ones on:
+    https://gitlab.com/qemu-project/qemu/-/issues/new
+
+pulls:
+  comment: |
+    Thank you for your interest in the QEMU project.
+
+    This repository is a read-only mirror of the project's repostories hosted
+    on https://gitlab.com/qemu-project/qemu.git.
+    The project does not process merge requests filed on GitHub.
+
+    QEMU welcomes contributions of code (either fixing bugs or adding new
+    functionality). However, we get a lot of patches, and so we have some
+    guidelines about contributing on the project website:
+    https://www.qemu.org/contribute/
--- a/.github/workflows/lockdown.yml
+++ b/.github/workflows/lockdown.yml
@ -1,30 +0,0 @@
-# Configuration for Repo Lockdown - https://github.com/dessant/repo-lockdown
-
-name: 'Repo Lockdown'
-
-on:
-  pull_request_target:
-    types: opened
-
-permissions:
-  pull-requests: write
-
-jobs:
-  action:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: dessant/repo-lockdown@v2
-        with:
-          pull-comment: |
-            Thank you for your interest in the QEMU project.
-
-            This repository is a read-only mirror of the project's repostories hosted
-            on https://gitlab.com/qemu-project/qemu.git.
-            The project does not process merge requests filed on GitHub.
-
-            QEMU welcomes contributions of code (either fixing bugs or adding new
-            functionality). However, we get a lot of patches, and so we have some
-            guidelines about contributing on the project website:
-            https://www.qemu.org/contribute/
-          lock-pull: true
-          close-pull: true
--- a/.gitlab-ci.d/buildtest-template.yml
+++ b/.gitlab-ci.d/buildtest-template.yml
@ -37,7 +37,7 @@
    # Avoid recompiling by hiding ninja with NINJA=":"
    - make NINJA=":" $MAKE_CHECK_ARGS

-.avocado_test_job_template:
+.acceptance_test_job_template:
  extends: .native_test_job_template
  cache:
    key: "${CI_JOB_NAME}-cache"
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@ -24,16 +24,16 @@ check-system-alpine:
      artifacts: true
  variables:
    IMAGE: alpine
-    MAKE_CHECK_ARGS: check-unit check-qtest
+    MAKE_CHECK_ARGS: check

-avocado-system-alpine:
-  extends: .avocado_test_job_template
+acceptance-system-alpine:
+  extends: .acceptance_test_job_template
  needs:
    - job: build-system-alpine
      artifacts: true
  variables:
    IMAGE: alpine
-    MAKE_CHECK_ARGS: check-avocado
+    MAKE_CHECK_ARGS: check-acceptance

 build-system-ubuntu:
  extends: .native_build_job_template
@ -59,14 +59,14 @@ check-system-ubuntu:
    IMAGE: ubuntu2004
    MAKE_CHECK_ARGS: check

-avocado-system-ubuntu:
-  extends: .avocado_test_job_template
+acceptance-system-ubuntu:
+  extends: .acceptance_test_job_template
  needs:
    - job: build-system-ubuntu
      artifacts: true
  variables:
    IMAGE: ubuntu2004
-    MAKE_CHECK_ARGS: check-avocado
+    MAKE_CHECK_ARGS: check-acceptance

 build-system-debian:
  extends: .native_build_job_template
@ -74,6 +74,7 @@ build-system-debian:
    job: amd64-debian-container
  variables:
    IMAGE: debian-amd64
+    CONFIGURE_ARGS: --enable-fdt=system
    TARGETS: arm-softmmu avr-softmmu i386-softmmu mipsel-softmmu
      riscv64-softmmu sh4eb-softmmu sparc-softmmu xtensaeb-softmmu
    MAKE_CHECK_ARGS: check-build
@ -91,25 +92,14 @@ check-system-debian:
    IMAGE: debian-amd64
    MAKE_CHECK_ARGS: check

-avocado-system-debian:
-  extends: .avocado_test_job_template
+acceptance-system-debian:
+  extends: .acceptance_test_job_template
  needs:
    - job: build-system-debian
      artifacts: true
  variables:
    IMAGE: debian-amd64
-    MAKE_CHECK_ARGS: check-avocado
-
-crash-test-debian:
-  extends: .native_test_job_template
-  needs:
-    - job: build-system-debian
-      artifacts: true
-  variables:
-    IMAGE: debian-amd64
-  script:
-    - cd build
-    - scripts/device-crash-test -q ./qemu-system-i386
+    MAKE_CHECK_ARGS: check-acceptance

 build-system-fedora:
  extends: .native_build_job_template
@ -136,26 +126,14 @@ check-system-fedora:
    IMAGE: fedora
    MAKE_CHECK_ARGS: check

-avocado-system-fedora:
-  extends: .avocado_test_job_template
+acceptance-system-fedora:
+  extends: .acceptance_test_job_template
  needs:
    - job: build-system-fedora
      artifacts: true
  variables:
    IMAGE: fedora
-    MAKE_CHECK_ARGS: check-avocado
-
-crash-test-fedora:
-  extends: .native_test_job_template
-  needs:
-    - job: build-system-fedora
-      artifacts: true
-  variables:
-    IMAGE: fedora
-  script:
-    - cd build
-    - scripts/device-crash-test -q ./qemu-system-ppc
-    - scripts/device-crash-test -q ./qemu-system-riscv32
+    MAKE_CHECK_ARGS: check-acceptance

 build-system-centos:
  extends: .native_build_job_template
@ -164,7 +142,7 @@ build-system-centos:
  variables:
    IMAGE: centos8
    CONFIGURE_ARGS: --disable-nettle --enable-gcrypt --enable-fdt=system
-      --enable-modules --enable-trace-backends=dtrace --enable-docs
+                    --enable-modules --enable-trace-backends=dtrace
    TARGETS: ppc64-softmmu or1k-softmmu s390x-softmmu
      x86_64-softmmu rx-softmmu sh4-softmmu nios2-softmmu
    MAKE_CHECK_ARGS: check-build
@ -182,14 +160,14 @@ check-system-centos:
    IMAGE: centos8
    MAKE_CHECK_ARGS: check

-avocado-system-centos:
-  extends: .avocado_test_job_template
+acceptance-system-centos:
+  extends: .acceptance_test_job_template
  needs:
    - job: build-system-centos
      artifacts: true
  variables:
    IMAGE: centos8
-    MAKE_CHECK_ARGS: check-avocado
+    MAKE_CHECK_ARGS: check-acceptance

 build-system-opensuse:
  extends: .native_build_job_template
@ -214,16 +192,95 @@ check-system-opensuse:
    IMAGE: opensuse-leap
    MAKE_CHECK_ARGS: check

-avocado-system-opensuse:
-  extends: .avocado_test_job_template
+acceptance-system-opensuse:
+  extends: .acceptance_test_job_template
  needs:
    - job: build-system-opensuse
      artifacts: true
  variables:
    IMAGE: opensuse-leap
-    MAKE_CHECK_ARGS: check-avocado
+    MAKE_CHECK_ARGS: check-acceptance


+build-disabled:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-fedora-container
+  variables:
+    IMAGE: fedora
+    CONFIGURE_ARGS:
+      --disable-attr
+      --disable-auth-pam
+      --disable-avx2
+      --disable-bochs
+      --disable-brlapi
+      --disable-bzip2
+      --disable-cap-ng
+      --disable-capstone
+      --disable-cloop
+      --disable-coroutine-pool
+      --disable-curl
+      --disable-curses
+      --disable-dmg
+      --disable-docs
+      --disable-gcrypt
+      --disable-glusterfs
+      --disable-gnutls
+      --disable-gtk
+      --disable-guest-agent
+      --disable-iconv
+      --disable-keyring
+      --disable-kvm
+      --disable-libiscsi
+      --disable-libpmem
+      --disable-libssh
+      --disable-libudev
+      --disable-libusb
+      --disable-libxml2
+      --disable-linux-aio
+      --disable-live-block-migration
+      --disable-lzo
+      --disable-malloc-trim
+      --disable-mpath
+      --disable-nettle
+      --disable-numa
+      --disable-opengl
+      --disable-parallels
+      --disable-pie
+      --disable-qcow1
+      --disable-qed
+      --disable-qom-cast-debug
+      --disable-rbd
+      --disable-rdma
+      --disable-replication
+      --disable-sdl
+      --disable-seccomp
+      --disable-slirp
+      --disable-smartcard
+      --disable-snappy
+      --disable-sparse
+      --disable-spice
+      --disable-strip
+      --disable-tpm
+      --disable-usb-redir
+      --disable-vdi
+      --disable-vhost-crypto
+      --disable-vhost-net
+      --disable-vhost-scsi
+      --disable-vhost-kernel
+      --disable-vhost-user
+      --disable-vhost-vdpa
+      --disable-vhost-vsock
+      --disable-virglrenderer
+      --disable-vnc
+      --disable-vte
+      --disable-vvfat
+      --disable-xen
+      --disable-zstd
+    TARGETS: arm-softmmu i386-softmmu ppc64-softmmu mips64-softmmu
+      s390x-softmmu i386-linux-user
+    MAKE_CHECK_ARGS: check-qtest SPEED=slow
+
 # This jobs explicitly disable TCG (--disable-tcg), KVM is detected by
 # the configure script. The container doesn't contain Xen headers so
 # Xen accelerator is not detected / selected. As result it build the
@ -248,11 +305,11 @@ build-tcg-disabled:
    - cd tests/qemu-iotests/
    - ./check -raw 001 002 003 004 005 008 009 010 011 012 021 025 032 033 048
            052 063 077 086 101 104 106 113 148 150 151 152 157 159 160 163
-            170 171 183 184 192 194 208 221 226 227 236 253 277 image-fleecing
+            170 171 183 184 192 194 208 221 222 226 227 236 253 277
    - ./check -qcow2 028 051 056 057 058 065 068 082 085 091 095 096 102 122
            124 132 139 142 144 145 151 152 155 157 165 194 196 200 202
-            208 209 216 218 227 234 246 247 248 250 254 255 257 258
-            260 261 262 263 264 270 272 273 277 279 image-fleecing
+            208 209 216 218 222 227 234 246 247 248 250 254 255 257 258
+            260 261 262 263 264 270 272 273 277 279

 build-user:
  extends: .native_build_job_template
@ -340,7 +397,7 @@ clang-user:
 # This can be accomplished by using -enable-slirp=git, which avoids the use of
 # a system-wide version of the library
 #
-# Split in three sets of build/check/avocado to limit the execution time of each
+# Split in three sets of build/check/acceptance to limit the execution time of each
 # job
 build-cfi-aarch64:
  extends: .native_build_job_template
@ -375,14 +432,14 @@ check-cfi-aarch64:
    IMAGE: fedora
    MAKE_CHECK_ARGS: check

-avocado-cfi-aarch64:
-  extends: .avocado_test_job_template
+acceptance-cfi-aarch64:
+  extends: .acceptance_test_job_template
  needs:
    - job: build-cfi-aarch64
      artifacts: true
  variables:
    IMAGE: fedora
-    MAKE_CHECK_ARGS: check-avocado
+    MAKE_CHECK_ARGS: check-acceptance

 build-cfi-ppc64-s390x:
  extends: .native_build_job_template
@ -417,14 +474,14 @@ check-cfi-ppc64-s390x:
    IMAGE: fedora
    MAKE_CHECK_ARGS: check

-avocado-cfi-ppc64-s390x:
-  extends: .avocado_test_job_template
+acceptance-cfi-ppc64-s390x:
+  extends: .acceptance_test_job_template
  needs:
    - job: build-cfi-ppc64-s390x
      artifacts: true
  variables:
    IMAGE: fedora
-    MAKE_CHECK_ARGS: check-avocado
+    MAKE_CHECK_ARGS: check-acceptance

 build-cfi-x86_64:
  extends: .native_build_job_template
@ -453,14 +510,14 @@ check-cfi-x86_64:
    IMAGE: fedora
    MAKE_CHECK_ARGS: check

-avocado-cfi-x86_64:
-  extends: .avocado_test_job_template
+acceptance-cfi-x86_64:
+  extends: .acceptance_test_job_template
  needs:
    - job: build-cfi-x86_64
      artifacts: true
  variables:
    IMAGE: fedora
-    MAKE_CHECK_ARGS: check-avocado
+    MAKE_CHECK_ARGS: check-acceptance

 tsan-build:
  extends: .native_build_job_template
@ -592,25 +649,20 @@ build-without-default-devices:
 build-without-default-features:
  extends: .native_build_job_template
  needs:
-    job: amd64-fedora-container
+    job: amd64-debian-container
  variables:
-    IMAGE: fedora
-    CONFIGURE_ARGS:
-      --without-default-features
-      --disable-capstone
-      --disable-pie
-      --disable-qom-cast-debug
-      --disable-slirp
-      --disable-strip
-    TARGETS: avr-softmmu i386-softmmu mips64-softmmu s390x-softmmu sh4-softmmu
-      sparc64-softmmu hexagon-linux-user i386-linux-user s390x-linux-user
-    MAKE_CHECK_ARGS: check-unit check-qtest SPEED=slow
+    IMAGE: debian-amd64
+    CONFIGURE_ARGS: --without-default-features --disable-user
+        --target-list-exclude=arm-softmmu,i386-softmmu,mipsel-softmmu,mips64-softmmu,ppc-softmmu
+    MAKE_CHECK_ARGS: check-unit

 build-libvhost-user:
  stage: build
  image: $CI_REGISTRY_IMAGE/qemu/fedora:latest
  needs:
    job: amd64-fedora-container
+  before_script:
+    - dnf install -y meson ninja-build
  script:
    - mkdir subprojects/libvhost-user/build
    - cd subprojects/libvhost-user/build
--- a/.gitlab-ci.d/cirrus.yml
+++ b/.gitlab-ci.d/cirrus.yml
@ -14,7 +14,6 @@
  stage: build
  image: registry.gitlab.com/libvirt/libvirt-ci/cirrus-run:master
  needs: []
-  timeout: 80m
  allow_failure: true
  script:
    - source .gitlab-ci.d/cirrus/$NAME.vars
@ -36,14 +35,11 @@
          -e "s|[@]PIP3@|$PIP3|g"
          -e "s|[@]PYPI_PKGS@|$PYPI_PKGS|g"
          -e "s|[@]CONFIGURE_ARGS@|$CONFIGURE_ARGS|g"
-          -e "s|[@]TEST_TARGETS@|$TEST_TARGETS|g"
+          -e "s|[@]TEST_TARGETSS@|$TEST_TARGETSS|g"
      <.gitlab-ci.d/cirrus/build.yml >.gitlab-ci.d/cirrus/$NAME.yml
    - cat .gitlab-ci.d/cirrus/$NAME.yml
    - cirrus-run -v --show-build-log always .gitlab-ci.d/cirrus/$NAME.yml
  rules:
-    # Allow on 'staging' branch and 'stable-X.Y-staging' branches only
-    - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH !~ /staging/'
-      when: never
    - if: "$CIRRUS_GITHUB_REPO && $CIRRUS_API_TOKEN"

 x64-freebsd-12-build:
@ -52,11 +48,14 @@ x64-freebsd-12-build:
    NAME: freebsd-12
    CIRRUS_VM_INSTANCE_TYPE: freebsd_instance
    CIRRUS_VM_IMAGE_SELECTOR: image_family
-    CIRRUS_VM_IMAGE_NAME: freebsd-12-3
+    CIRRUS_VM_IMAGE_NAME: freebsd-12-2
    CIRRUS_VM_CPUS: 8
    CIRRUS_VM_RAM: 8G
    UPDATE_COMMAND: pkg update
    INSTALL_COMMAND: pkg install -y
+    # TODO: Enable gnutls again once FreeBSD's libtasn1 got fixed
+    # See: https://gitlab.com/gnutls/libtasn1/-/merge_requests/71
+    CONFIGURE_ARGS: --disable-gnutls
    TEST_TARGETS: check

 x64-freebsd-13-build:
@ -86,38 +85,3 @@ x64-macos-11-base-build:
    PATH_EXTRA: /usr/local/opt/ccache/libexec:/usr/local/opt/gettext/bin
    PKG_CONFIG_PATH: /usr/local/opt/curl/lib/pkgconfig:/usr/local/opt/ncurses/lib/pkgconfig:/usr/local/opt/readline/lib/pkgconfig
    TEST_TARGETS: check-unit check-block check-qapi-schema check-softfloat check-qtest-x86_64
-
-
-# The following jobs run VM-based tests via KVM on a Linux-based Cirrus-CI job
-.cirrus_kvm_job:
-  stage: build
-  image: registry.gitlab.com/libvirt/libvirt-ci/cirrus-run:master
-  needs: []
-  timeout: 80m
-  allow_failure: true
-  script:
-    - sed -e "s|[@]CI_REPOSITORY_URL@|$CI_REPOSITORY_URL|g"
-          -e "s|[@]CI_COMMIT_REF_NAME@|$CI_COMMIT_REF_NAME|g"
-          -e "s|[@]CI_COMMIT_SHA@|$CI_COMMIT_SHA|g"
-          -e "s|[@]NAME@|$NAME|g"
-          -e "s|[@]CONFIGURE_ARGS@|$CONFIGURE_ARGS|g"
-          -e "s|[@]TEST_TARGETS@|$TEST_TARGETS|g"
-      <.gitlab-ci.d/cirrus/kvm-build.yml >.gitlab-ci.d/cirrus/$NAME.yml
-    - cat .gitlab-ci.d/cirrus/$NAME.yml
-    - cirrus-run -v --show-build-log always .gitlab-ci.d/cirrus/$NAME.yml
-  rules:
-    - when: manual
-
-x86-netbsd:
-  extends: .cirrus_kvm_job
-  variables:
-    NAME: netbsd
-    CONFIGURE_ARGS: --target-list=x86_64-softmmu,ppc64-softmmu,aarch64-softmmu
-    TEST_TARGETS: check
-
-x86-openbsd:
-  extends: .cirrus_kvm_job
-  variables:
-    NAME: openbsd
-    CONFIGURE_ARGS: --target-list=i386-softmmu,riscv64-softmmu,mips64-softmmu
-    TEST_TARGETS: check
--- a/.gitlab-ci.d/cirrus/build.yml
+++ b/.gitlab-ci.d/cirrus/build.yml
@ -13,7 +13,6 @@ env:
  PYTHON: "@PYTHON@"
  MAKE: "@MAKE@"
  CONFIGURE_ARGS: "@CONFIGURE_ARGS@"
-  TEST_TARGETS: "@TEST_TARGETS@"

 build_task:
  install_script:
--- a/.gitlab-ci.d/cirrus/freebsd-12.vars
+++ b/.gitlab-ci.d/cirrus/freebsd-12.vars
@ -2,15 +2,12 @@
 #
 #  $ lcitool variables freebsd-12 qemu
 #
-# https://gitlab.com/libvirt/libvirt-ci
+# https://gitlab.com/libvirt/libvirt-ci/-/commit/c7e275ab27ac0dcd09da290817b9adeea1fd1eb1

+PACKAGING_COMMAND='pkg'
 CCACHE='/usr/local/bin/ccache'
-CPAN_PKGS=''
-CROSS_PKGS=''
 MAKE='/usr/local/bin/gmake'
 NINJA='/usr/local/bin/ninja'
-PACKAGING_COMMAND='pkg'
-PIP3='/usr/local/bin/pip-3.8'
-PKGS='alsa-lib bash bzip2 ca_root_nss capstone4 ccache cdrkit-genisoimage ctags curl cyrus-sasl dbus diffutils dtc gettext git glib gmake gnutls gsed gtk3 libepoxy libffi libgcrypt libjpeg-turbo libnfs libspice-server libssh libtasn1 libxml2 llvm lttng-ust lzo2 meson ncurses nettle ninja opencv p5-Test-Harness perl5 pixman pkgconf png py38-numpy py38-pillow py38-pip py38-sphinx py38-sphinx_rtd_theme py38-virtualenv py38-yaml python3 rpm2cpio sdl2 sdl2_image snappy spice-protocol tesseract texinfo usbredir virglrenderer vte3 zstd'
-PYPI_PKGS=''
 PYTHON='/usr/local/bin/python3'
+PIP3='/usr/local/bin/pip-3.8'
+PKGS='alsa-lib bash bzip2 ca_root_nss capstone4 ccache cdrkit-genisoimage ctags curl cyrus-sasl dbus diffutils gettext git glib gmake gnutls gsed gtk3 libepoxy libffi libgcrypt libjpeg-turbo libnfs libspice-server libssh libtasn1 libxml2 llvm lttng-ust lzo2 meson ncurses nettle ninja opencv p5-Test-Harness perl5 pixman pkgconf png py38-numpy py38-pillow py38-pip py38-sphinx py38-sphinx_rtd_theme py38-virtualenv py38-yaml python3 rpm2cpio sdl2 sdl2_image snappy spice-protocol tesseract texinfo usbredir virglrenderer vte3 zstd'
--- a/.gitlab-ci.d/cirrus/freebsd-13.vars
+++ b/.gitlab-ci.d/cirrus/freebsd-13.vars
@ -2,15 +2,12 @@
 #
 #  $ lcitool variables freebsd-13 qemu
 #
-# https://gitlab.com/libvirt/libvirt-ci
+# https://gitlab.com/libvirt/libvirt-ci/-/commit/c7e275ab27ac0dcd09da290817b9adeea1fd1eb1

+PACKAGING_COMMAND='pkg'
 CCACHE='/usr/local/bin/ccache'
-CPAN_PKGS=''
-CROSS_PKGS=''
 MAKE='/usr/local/bin/gmake'
 NINJA='/usr/local/bin/ninja'
-PACKAGING_COMMAND='pkg'
-PIP3='/usr/local/bin/pip-3.8'
-PKGS='alsa-lib bash bzip2 ca_root_nss capstone4 ccache cdrkit-genisoimage ctags curl cyrus-sasl dbus diffutils dtc gettext git glib gmake gnutls gsed gtk3 libepoxy libffi libgcrypt libjpeg-turbo libnfs libspice-server libssh libtasn1 libxml2 llvm lttng-ust lzo2 meson ncurses nettle ninja opencv p5-Test-Harness perl5 pixman pkgconf png py38-numpy py38-pillow py38-pip py38-sphinx py38-sphinx_rtd_theme py38-virtualenv py38-yaml python3 rpm2cpio sdl2 sdl2_image snappy spice-protocol tesseract texinfo usbredir virglrenderer vte3 zstd'
-PYPI_PKGS=''
 PYTHON='/usr/local/bin/python3'
+PIP3='/usr/local/bin/pip-3.8'
+PKGS='alsa-lib bash bzip2 ca_root_nss capstone4 ccache cdrkit-genisoimage ctags curl cyrus-sasl dbus diffutils gettext git glib gmake gnutls gsed gtk3 libepoxy libffi libgcrypt libjpeg-turbo libnfs libspice-server libssh libtasn1 libxml2 llvm lttng-ust lzo2 meson ncurses nettle ninja opencv p5-Test-Harness perl5 pixman pkgconf png py38-numpy py38-pillow py38-pip py38-sphinx py38-sphinx_rtd_theme py38-virtualenv py38-yaml python3 rpm2cpio sdl2 sdl2_image snappy spice-protocol tesseract texinfo usbredir virglrenderer vte3 zstd'
--- a/.gitlab-ci.d/cirrus/kvm-build.yml
+++ b/.gitlab-ci.d/cirrus/kvm-build.yml
@ -1,31 +0,0 @@
-container:
-  image: fedora:35
-  cpu: 4
-  memory: 8Gb
-  kvm: true
-
-env:
-  CIRRUS_CLONE_DEPTH: 1
-  CI_REPOSITORY_URL: "@CI_REPOSITORY_URL@"
-  CI_COMMIT_REF_NAME: "@CI_COMMIT_REF_NAME@"
-  CI_COMMIT_SHA: "@CI_COMMIT_SHA@"
-
-@NAME@_task:
-  @NAME@_vm_cache:
-    folder: $HOME/.cache/qemu-vm
-  install_script:
-    - dnf update -y
-    - dnf install -y git make openssh-clients qemu-img qemu-system-x86 wget
-  clone_script:
-    - git clone --depth 100 "$CI_REPOSITORY_URL" .
-    - git fetch origin "$CI_COMMIT_REF_NAME"
-    - git reset --hard "$CI_COMMIT_SHA"
-  build_script:
-    - if [ -f $HOME/.cache/qemu-vm/images/@NAME@.img ]; then
-        make vm-build-@NAME@ J=$(getconf _NPROCESSORS_ONLN)
-          EXTRA_CONFIGURE_OPTS="@CONFIGURE_ARGS@"
-          BUILD_TARGET="@TEST_TARGETS@" ;
-      else
-        make vm-build-@NAME@ J=$(getconf _NPROCESSORS_ONLN) BUILD_TARGET=help
-          EXTRA_CONFIGURE_OPTS="--disable-system --disable-user --disable-tools" ;
-      fi
--- a/.gitlab-ci.d/cirrus/macos-11.vars
+++ b/.gitlab-ci.d/cirrus/macos-11.vars
@ -2,15 +2,14 @@
 #
 #  $ lcitool variables macos-11 qemu
 #
-# https://gitlab.com/libvirt/libvirt-ci
+# https://gitlab.com/libvirt/libvirt-ci/-/commit/c7e275ab27ac0dcd09da290817b9adeea1fd1eb1

+PACKAGING_COMMAND='brew'
 CCACHE='/usr/local/bin/ccache'
-CPAN_PKGS='Test::Harness'
-CROSS_PKGS=''
 MAKE='/usr/local/bin/gmake'
 NINJA='/usr/local/bin/ninja'
-PACKAGING_COMMAND='brew'
-PIP3='/usr/local/bin/pip3'
-PKGS='bash bc bzip2 capstone ccache cpanminus ctags curl dbus diffutils dtc gcovr gettext git glib gnu-sed gnutls gtk+3 jemalloc jpeg-turbo libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb libxml2 llvm lzo make meson ncurses nettle ninja perl pixman pkg-config python3 rpm2cpio sdl2 sdl2_image snappy sparse spice-protocol tesseract texinfo usbredir vde vte3 zlib zstd'
-PYPI_PKGS='PyYAML numpy pillow sphinx sphinx-rtd-theme virtualenv'
 PYTHON='/usr/local/bin/python3'
+PIP3='/usr/local/bin/pip3'
+PKGS='bash bc bzip2 capstone ccache cpanminus ctags curl dbus diffutils gcovr gettext git glib gnu-sed gnutls gtk+3 jemalloc jpeg-turbo libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb libxml2 llvm lzo make meson ncurses nettle ninja perl pixman pkg-config python3 rpm2cpio sdl2 sdl2_image snappy sparse spice-protocol tesseract texinfo usbredir vde vte3 zlib zstd'
+PYPI_PKGS='PyYAML numpy pillow sphinx sphinx-rtd-theme virtualenv'
+CPAN_PKGS='Test::Harness'
--- a/.gitlab-ci.d/container-cross.yml
+++ b/.gitlab-ci.d/container-cross.yml
@ -134,8 +134,7 @@ ppc64el-debian-cross-container:
 riscv64-debian-cross-container:
  extends: .container_job_template
  stage: containers-layer2
-  # as we are currently based on 'sid/unstable' we may break so...
-  allow_failure: true
+  needs: ['amd64-debian10-container']
  variables:
    NAME: debian-riscv64-cross

--- a/.gitlab-ci.d/containers.yml
+++ b/.gitlab-ci.d/containers.yml
@ -29,6 +29,11 @@ amd64-ubuntu2004-container:
  variables:
    NAME: ubuntu2004

+amd64-ubuntu-container:
+  extends: .container_job_template
+  variables:
+    NAME: ubuntu
+
 amd64-opensuse-leap-container:
  extends: .container_job_template
  variables:
--- a/.gitlab-ci.d/crossbuilds.yml
+++ b/.gitlab-ci.d/crossbuilds.yml
@ -124,25 +124,6 @@ cross-ppc64el-user:
  variables:
    IMAGE: debian-ppc64el-cross

-# The riscv64 cross-builds currently use a 'sid' container to get
-# compilers and libraries. Until something more stable is found we
-# allow_failure so as not to block CI.
-cross-riscv64-system:
-  extends: .cross_system_build_job
-  allow_failure: true
-  needs:
-    job: riscv64-debian-cross-container
-  variables:
-    IMAGE: debian-riscv64-cross
-
-cross-riscv64-user:
-  extends: .cross_user_build_job
-  allow_failure: true
-  needs:
-    job: riscv64-debian-cross-container
-  variables:
-    IMAGE: debian-riscv64-cross
-
 cross-s390x-system:
  extends: .cross_system_build_job
  needs:
--- a/.gitlab-ci.d/custom-runners.yml
+++ b/.gitlab-ci.d/custom-runners.yml
@ -13,7 +13,226 @@
 variables:
  GIT_STRATEGY: clone

-include:
-  - local: '/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml'
-  - local: '/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml'
-  - local: '/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml'
+# All ubuntu-18.04 jobs should run successfully in an environment
+# setup by the scripts/ci/setup/build-environment.yml task
+# "Install basic packages to build QEMU on Ubuntu 18.04/20.04"
+ubuntu-18.04-s390x-all-linux-static:
+ allow_failure: true
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$S390X_RUNNER_AVAILABLE"
+ script:
+ # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763
+ # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages
+ - mkdir build
+ - cd build
+ - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+ - make --output-sync -j`nproc` check-tcg V=1
+
+ubuntu-18.04-s390x-all:
+ allow_failure: true
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$S390X_RUNNER_AVAILABLE"
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-18.04-s390x-alldbg:
+ allow_failure: true
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$S390X_RUNNER_AVAILABLE"
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --enable-debug --disable-libssh
+ - make clean
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-18.04-s390x-clang:
+ allow_failure: true
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+ - if: "$S390X_RUNNER_AVAILABLE"
+   when: manual
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --cc=clang --cxx=clang++ --enable-sanitizers
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-18.04-s390x-tci:
+ allow_failure: true
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$S390X_RUNNER_AVAILABLE"
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --enable-tcg-interpreter
+ - make --output-sync -j`nproc`
+
+ubuntu-18.04-s390x-notcg:
+ allow_failure: true
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+ - if: "$S390X_RUNNER_AVAILABLE"
+   when: manual
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --disable-tcg
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+# All ubuntu-20.04 jobs should run successfully in an environment
+# setup by the scripts/ci/setup/qemu/build-environment.yml task
+# "Install basic packages to build QEMU on Ubuntu 18.04/20.04"
+ubuntu-20.04-aarch64-all-linux-static:
+ allow_failure: true
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$S390X_RUNNER_AVAILABLE"
+ script:
+ # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763
+ # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages
+ - mkdir build
+ - cd build
+ - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+ - make --output-sync -j`nproc` check-tcg V=1
+
+ubuntu-20.04-aarch64-all:
+ allow_failure: true
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$S390X_RUNNER_AVAILABLE"
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-20.04-aarch64-alldbg:
+ allow_failure: true
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$S390X_RUNNER_AVAILABLE"
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --enable-debug --disable-libssh
+ - make clean
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-20.04-aarch64-clang:
+ allow_failure: true
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+ - if: "$S390X_RUNNER_AVAILABLE"
+   when: manual
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --cc=clang-10 --cxx=clang++-10 --enable-sanitizers
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-20.04-aarch64-tci:
+ allow_failure: true
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$S390X_RUNNER_AVAILABLE"
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --enable-tcg-interpreter
+ - make --output-sync -j`nproc`
+
+ubuntu-20.04-aarch64-notcg:
+ allow_failure: true
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+ - if: "$S390X_RUNNER_AVAILABLE"
+   when: manual
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --disable-tcg
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
--- a/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml
+++ b/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml
@ -1,28 +0,0 @@
-centos-stream-8-x86_64:
- allow_failure: true
- needs: []
- stage: build
- tags:
- - centos_stream_8
- - x86_64
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
- - if: "$CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE"
- artifacts:
-   name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
-   when: on_failure
-   expire_in: 7 days
-   paths:
-     - build/tests/results/latest/results.xml
-     - build/tests/results/latest/test-results
-   reports:
-     junit: build/tests/results/latest/results.xml
- before_script:
- - JOBS=$(expr $(nproc) + 1)
- script:
- - mkdir build
- - cd build
- - ../scripts/ci/org.centos/stream/8/x86_64/configure
- - make -j"$JOBS"
- - make NINJA=":" check
- - ../scripts/ci/org.centos/stream/8/x86_64/test-avocado
--- a/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml
+++ b/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml
@ -1,118 +0,0 @@
-# All ubuntu-18.04 jobs should run successfully in an environment
-# setup by the scripts/ci/setup/build-environment.yml task
-# "Install basic packages to build QEMU on Ubuntu 18.04/20.04"
-
-ubuntu-18.04-s390x-all-linux-static:
- needs: []
- stage: build
- tags:
- - ubuntu_18.04
- - s390x
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
- - if: "$S390X_RUNNER_AVAILABLE"
- script:
- # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763
- # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages
- - mkdir build
- - cd build
- - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
- - make --output-sync -j`nproc` check-tcg V=1
-
-ubuntu-18.04-s390x-all:
- needs: []
- stage: build
- tags:
- - ubuntu_18.04
- - s390x
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
- - if: "$S390X_RUNNER_AVAILABLE"
- script:
- - mkdir build
- - cd build
- - ../configure --disable-libssh
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
-
-ubuntu-18.04-s390x-alldbg:
- needs: []
- stage: build
- tags:
- - ubuntu_18.04
- - s390x
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$S390X_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --enable-debug --disable-libssh
- - make clean
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
-
-ubuntu-18.04-s390x-clang:
- needs: []
- stage: build
- tags:
- - ubuntu_18.04
- - s390x
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$S390X_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --disable-libssh --cc=clang --cxx=clang++ --enable-sanitizers
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
-
-ubuntu-18.04-s390x-tci:
- needs: []
- stage: build
- tags:
- - ubuntu_18.04
- - s390x
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$S390X_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --disable-libssh --enable-tcg-interpreter
- - make --output-sync -j`nproc`
-
-ubuntu-18.04-s390x-notcg:
- needs: []
- stage: build
- tags:
- - ubuntu_18.04
- - s390x
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$S390X_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --disable-libssh --disable-tcg
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
--- a/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml
+++ b/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml
@ -1,118 +0,0 @@
-# All ubuntu-20.04 jobs should run successfully in an environment
-# setup by the scripts/ci/setup/qemu/build-environment.yml task
-# "Install basic packages to build QEMU on Ubuntu 18.04/20.04"
-
-ubuntu-20.04-aarch64-all-linux-static:
- needs: []
- stage: build
- tags:
- - ubuntu_20.04
- - aarch64
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
- - if: "$AARCH64_RUNNER_AVAILABLE"
- script:
- # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763
- # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages
- - mkdir build
- - cd build
- - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
- - make --output-sync -j`nproc` check-tcg V=1
-
-ubuntu-20.04-aarch64-all:
- needs: []
- stage: build
- tags:
- - ubuntu_20.04
- - aarch64
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$AARCH64_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --disable-libssh
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
-
-ubuntu-20.04-aarch64-alldbg:
- needs: []
- stage: build
- tags:
- - ubuntu_20.04
- - aarch64
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
- - if: "$AARCH64_RUNNER_AVAILABLE"
- script:
- - mkdir build
- - cd build
- - ../configure --enable-debug --disable-libssh
- - make clean
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
-
-ubuntu-20.04-aarch64-clang:
- needs: []
- stage: build
- tags:
- - ubuntu_20.04
- - aarch64
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$AARCH64_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --disable-libssh --cc=clang-10 --cxx=clang++-10 --enable-sanitizers
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
-
-ubuntu-20.04-aarch64-tci:
- needs: []
- stage: build
- tags:
- - ubuntu_20.04
- - aarch64
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$AARCH64_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --disable-libssh --enable-tcg-interpreter
- - make --output-sync -j`nproc`
-
-ubuntu-20.04-aarch64-notcg:
- needs: []
- stage: build
- tags:
- - ubuntu_20.04
- - aarch64
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$AARCH64_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --disable-libssh --disable-tcg
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
--- a/.gitlab-ci.d/edk2.yml
+++ b/.gitlab-ci.d/edk2.yml
@ -50,11 +50,7 @@ build-edk2:
   GIT_DEPTH: 3
 script: # Clone the required submodules and build EDK2
 - git submodule update --init roms/edk2
- - git -C roms/edk2 submodule update --init --
-     ArmPkg/Library/ArmSoftFloatLib/berkeley-softfloat-3
-     BaseTools/Source/C/BrotliCompress/brotli
-     CryptoPkg/Library/OpensslLib/openssl
-     MdeModulePkg/Library/BrotliCustomDecompressLib/brotli
+ - git -C roms/edk2 submodule update --init
 - export JOBS=$(($(getconf _NPROCESSORS_ONLN) + 1))
 - echo "=== Using ${JOBS} simultaneous jobs ==="
 - make -j${JOBS} -C roms efi 2>&1 1>edk2-stdout.log | tee -a edk2-stderr.log >&2
--- a/.gitlab-ci.d/edk2/Dockerfile
+++ b/.gitlab-ci.d/edk2/Dockerfile
@ -3,7 +3,7 @@
 #
 FROM ubuntu:16.04

-MAINTAINER Philippe Mathieu-Daudé <f4bug@amsat.org>
+MAINTAINER Philippe Mathieu-Daudé <philmd@redhat.com>

 # Install packages required to build EDK2
 RUN apt update \
--- a/.gitlab-ci.d/opensbi.yml
+++ b/.gitlab-ci.d/opensbi.yml
@ -43,7 +43,9 @@ build-opensbi:
 artifacts:
   paths: # 'artifacts.zip' will contains the following files:
   - pc-bios/opensbi-riscv32-generic-fw_dynamic.bin
+   - pc-bios/opensbi-riscv32-generic-fw_dynamic.elf
   - pc-bios/opensbi-riscv64-generic-fw_dynamic.bin
+   - pc-bios/opensbi-riscv64-generic-fw_dynamic.elf
   - opensbi32-generic-stdout.log
   - opensbi32-generic-stderr.log
   - opensbi64-generic-stdout.log
--- a/.gitlab-ci.d/qemu-project.yml
+++ b/.gitlab-ci.d/qemu-project.yml
@ -11,4 +11,3 @@ include:
  - local: '/.gitlab-ci.d/static_checks.yml'
  - local: '/.gitlab-ci.d/custom-runners.yml'
  - local: '/.gitlab-ci.d/cirrus.yml'
-  - local: '/.gitlab-ci.d/windows.yml'
--- a/.gitlab-ci.d/static_checks.yml
+++ b/.gitlab-ci.d/static_checks.yml
@ -8,7 +8,7 @@ check-patch:
  variables:
    GIT_DEPTH: 1000
  rules:
-    - if: '$CI_PROJECT_NAMESPACE == "qemu-project"'
+    - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH'
      when: never
    - when: on_success
      allow_failure: true
@ -46,6 +46,4 @@ check-python-tox:
    QEMU_TOX_EXTRA_ARGS: --skip-missing-interpreters=false
  needs:
    job: python-container
-  rules:
-    - when: manual
  allow_failure: true
--- a/.gitlab-ci.d/windows.yml
+++ b/.gitlab-ci.d/windows.yml
@ -1,98 +0,0 @@
-.shared_msys2_builder:
-  tags:
-  - shared-windows
-  - windows
-  - windows-1809
-  cache:
-    key: "${CI_JOB_NAME}-cache"
-    paths:
-      - ${CI_PROJECT_DIR}/msys64/var/cache
-  needs: []
-  stage: build
-  timeout: 70m
-  before_script:
-  - If ( !(Test-Path -Path msys64\var\cache ) ) {
-      mkdir msys64\var\cache
-    }
-  - If ( !(Test-Path -Path msys64\var\cache\msys2.exe ) ) {
-      Invoke-WebRequest
-      "https://github.com/msys2/msys2-installer/releases/download/2021-07-25/msys2-base-x86_64-20210725.sfx.exe"
-      -outfile "msys64\var\cache\msys2.exe"
-    }
-  - msys64\var\cache\msys2.exe -y
-  - ((Get-Content -path .\msys64\etc\\post-install\\07-pacman-key.post -Raw)
-      -replace '--refresh-keys', '--version') |
-     Set-Content -Path ${CI_PROJECT_DIR}\msys64\etc\\post-install\\07-pacman-key.post
-  - .\msys64\usr\bin\bash -lc "sed -i 's/^CheckSpace/#CheckSpace/g' /etc/pacman.conf"
-  - .\msys64\usr\bin\bash -lc 'pacman --noconfirm -Syuu'  # Core update
-  - .\msys64\usr\bin\bash -lc 'pacman --noconfirm -Syuu'  # Normal update
-  - taskkill /F /FI "MODULES eq msys-2.0.dll"
-
-msys2-64bit:
-  extends: .shared_msys2_builder
-  script:
-  - .\msys64\usr\bin\bash -lc "pacman -Sy --noconfirm --needed
-      diffutils git grep make sed
-      mingw-w64-x86_64-capstone
-      mingw-w64-x86_64-curl
-      mingw-w64-x86_64-cyrus-sasl
-      mingw-w64-x86_64-gcc
-      mingw-w64-x86_64-glib2
-      mingw-w64-x86_64-gnutls
-      mingw-w64-x86_64-libnfs
-      mingw-w64-x86_64-libpng
-      mingw-w64-x86_64-libssh
-      mingw-w64-x86_64-libtasn1
-      mingw-w64-x86_64-libusb
-      mingw-w64-x86_64-libxml2
-      mingw-w64-x86_64-nettle
-      mingw-w64-x86_64-ninja
-      mingw-w64-x86_64-pixman
-      mingw-w64-x86_64-pkgconf
-      mingw-w64-x86_64-python
-      mingw-w64-x86_64-SDL2
-      mingw-w64-x86_64-SDL2_image
-      mingw-w64-x86_64-snappy
-      mingw-w64-x86_64-usbredir
-      mingw-w64-x86_64-zstd "
-  - $env:CHERE_INVOKING = 'yes'  # Preserve the current working directory
-  - $env:MSYSTEM = 'MINGW64'     # Start a 64 bit Mingw environment
-  - .\msys64\usr\bin\bash -lc './configure --target-list=x86_64-softmmu
-      --enable-capstone=system --without-default-devices'
-  - .\msys64\usr\bin\bash -lc "sed -i '/^ROMS=/d' build/config-host.mak"
-  - .\msys64\usr\bin\bash -lc 'make -j2'
-  - .\msys64\usr\bin\bash -lc 'make check'
-
-msys2-32bit:
-  extends: .shared_msys2_builder
-  script:
-  - .\msys64\usr\bin\bash -lc "pacman -Sy --noconfirm --needed
-      diffutils git grep make sed
-      mingw-w64-i686-capstone
-      mingw-w64-i686-curl
-      mingw-w64-i686-cyrus-sasl
-      mingw-w64-i686-gcc
-      mingw-w64-i686-glib2
-      mingw-w64-i686-gnutls
-      mingw-w64-i686-gtk3
-      mingw-w64-i686-libgcrypt
-      mingw-w64-i686-libjpeg-turbo
-      mingw-w64-i686-libssh
-      mingw-w64-i686-libtasn1
-      mingw-w64-i686-libusb
-      mingw-w64-i686-libxml2
-      mingw-w64-i686-lzo2
-      mingw-w64-i686-ninja
-      mingw-w64-i686-pixman
-      mingw-w64-i686-pkgconf
-      mingw-w64-i686-python
-      mingw-w64-i686-snappy
-      mingw-w64-i686-usbredir "
-  - $env:CHERE_INVOKING = 'yes'  # Preserve the current working directory
-  - $env:MSYSTEM = 'MINGW32'     # Start a 32-bit MinG environment
-  - mkdir output
-  - cd output
-  - ..\msys64\usr\bin\bash -lc "../configure --target-list=ppc64-softmmu
-      --enable-capstone=system"
-  - ..\msys64\usr\bin\bash -lc 'make -j2'
-  - ..\msys64\usr\bin\bash -lc 'make check'
--- a/.gitmodules
+++ b/.gitmodules
@ -64,6 +64,3 @@
 [submodule "roms/vbootrom"]
 	path = roms/vbootrom
 	url = https://gitlab.com/qemu-project/vbootrom.git
-[submodule "tests/lcitool/libvirt-ci"]
-	path = tests/lcitool/libvirt-ci
-	url = http://gitlab.com/libvirt/libvirt-ci
--- a/.mailmap
+++ b/.mailmap
@ -50,7 +50,6 @@ Aleksandar Rikalo <aleksandar.rikalo@syrmia.com> <arikalo@wavecomp.com>
 Aleksandar Rikalo <aleksandar.rikalo@syrmia.com> <aleksandar.rikalo@rt-rk.com>
 Alexander Graf <agraf@csgraf.de> <agraf@suse.de>
 Anthony Liguori <anthony@codemonkey.ws> Anthony Liguori <aliguori@us.ibm.com>
-Christian Borntraeger <borntraeger@linux.ibm.com> <borntraeger@de.ibm.com>
 Filip Bozuta <filip.bozuta@syrmia.com> <filip.bozuta@rt-rk.com.com>
 Frederic Konrad <konrad@adacore.com> <fred.konrad@greensocs.com>
 Greg Kurz <groug@kaod.org> <gkurz@linux.vnet.ibm.com>
@ -63,7 +62,6 @@ Paul Burton <paulburton@kernel.org> <paul.burton@mips.com>
 Paul Burton <paulburton@kernel.org> <paul.burton@imgtec.com>
 Paul Burton <paulburton@kernel.org> <paul@archlinuxmips.org>
 Paul Burton <paulburton@kernel.org> <pburton@wavecomp.com>
-Philippe Mathieu-Daudé <f4bug@amsat.org> <philmd@redhat.com>
 Stefan Brankovic <stefan.brankovic@syrmia.com> <stefan.brankovic@rt-rk.com.com>
 Yongbok Kim <yongbok.kim@mips.com> <yongbok.kim@imgtec.com>

@ -71,7 +69,6 @@ Yongbok Kim <yongbok.kim@mips.com> <yongbok.kim@imgtec.com>
 # git author config, or had utf8/latin1 encoding issues.
 Aaron Lindsay <aaron@os.amperecomputing.com>
 Alexey Gerasimenko <x1917x@gmail.com>
-Alex Chen <alex.chen@huawei.com>
 Alex Ivanov <void@aleksoft.net>
 Andreas Färber <afaerber@suse.de>
 Bandan Das <bsd@redhat.com>
@ -102,11 +99,9 @@ Gautham R. Shenoy <ego@in.ibm.com>
 Gautham R. Shenoy <ego@linux.vnet.ibm.com>
 Gonglei (Arei) <arei.gonglei@huawei.com>
 Guang Wang <wang.guang55@zte.com.cn>
-Haibin Zhang <haibinzhang@tencent.com>
 Hailiang Zhang <zhang.zhanghailiang@huawei.com>
 Hanna Reitz <hreitz@redhat.com> <mreitz@redhat.com>
 Hervé Poussineau <hpoussin@reactos.org>
-Hyman Huang <huangy81@chinatelecom.cn>
 Jakub Jermář <jakub@jermar.eu>
 Jakub Jermář <jakub.jermar@kernkonzept.com>
 Jean-Christophe Dubois <jcd@tribudubois.net>
@ -140,7 +135,6 @@ Nicholas Thomas <nick@bytemark.co.uk>
 Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
 Orit Wasserman <owasserm@redhat.com>
 Paolo Bonzini <pbonzini@redhat.com>
-Pan Nengyuan <pannengyuan@huawei.com>
 Pavel Dovgaluk <dovgaluk@ispras.ru>
 Pavel Dovgaluk <pavel.dovgaluk@gmail.com>
 Pavel Dovgaluk <Pavel.Dovgaluk@ispras.ru>
--- a/.travis.yml
+++ b/.travis.yml
@ -305,3 +305,26 @@ jobs:
        - CONFIG="--disable-containers --disable-tcg --enable-kvm
                  --disable-tools --host-cc=clang --cxx=clang++"
        - UNRELIABLE=true
+
+    # Release builds
+    # The make-release script expect a QEMU version, so our tag must start with a 'v'.
+    # This is the case when release candidate tags are created.
+    - name: "Release tarball"
+      if: tag IS present AND tag =~ /^v\d+\.\d+(\.\d+)?(-\S*)?$/
+      env:
+        # We want to build from the release tarball
+        - BUILD_DIR="release/build/dir" SRC_DIR="../../.."
+        - BASE_CONFIG="--prefix=$PWD/dist"
+        - CONFIG="--target-list=x86_64-softmmu,aarch64-softmmu,armeb-linux-user,ppc-linux-user"
+        - TEST_CMD="make install -j${JOBS}"
+        - QEMU_VERSION="${TRAVIS_TAG:1}"
+        - CACHE_NAME="${TRAVIS_BRANCH}-linux-gcc-default"
+      script:
+        - make -C ${SRC_DIR} qemu-${QEMU_VERSION}.tar.bz2
+        - ls -l ${SRC_DIR}/qemu-${QEMU_VERSION}.tar.bz2
+        - tar -xf ${SRC_DIR}/qemu-${QEMU_VERSION}.tar.bz2 && cd qemu-${QEMU_VERSION}
+        - mkdir -p release-build && cd release-build
+        - ../configure ${BASE_CONFIG} ${CONFIG} || { cat config.log meson-logs/meson-log.txt && exit 1; }
+        - make install
+  allow_failures:
+    - env: UNRELIABLE=true
--- a/Kconfig.host
+++ b/Kconfig.host
@ -41,7 +41,3 @@ config PVRDMA
 config MULTIPROCESS_ALLOWED
    bool
    imply MULTIPROCESS
-
-config FUZZ
-    bool
-    select SPARSE_MEM
--- a/449
+++ b/449
--- a/29
+++ b/29
@ -87,7 +87,7 @@ x := $(shell rm -rf meson-private meson-info meson-logs)
 endif

 # 1. ensure config-host.mak is up-to-date
-config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/scripts/meson-buildoptions.sh $(SRC_PATH)/pc-bios $(SRC_PATH)/VERSION
+config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/pc-bios $(SRC_PATH)/VERSION
 	@echo config-host.mak is out-of-date, running configure
 	@if test -f meson-private/coredata.dat; then \
 	  ./config.status --skip-meson; \
@ -124,12 +124,6 @@ ifneq ($(MESON),)
 Makefile.mtest: build.ninja scripts/mtest2make.py
 	$(MESON) introspect --targets --tests --benchmarks | $(PYTHON) scripts/mtest2make.py > $@
 -include Makefile.mtest
-
-.PHONY: update-buildoptions
-all update-buildoptions: $(SRC_PATH)/scripts/meson-buildoptions.sh
-$(SRC_PATH)/scripts/meson-buildoptions.sh: $(SRC_PATH)/meson_options.txt
-	$(MESON) introspect --buildoptions $(SRC_PATH)/meson.build | $(PYTHON) \
-	  scripts/meson-buildoptions.py > $@.tmp && mv $@.tmp $@
 endif

 # 4. Rules to bridge to other makefiles
@ -145,8 +139,7 @@ NINJAFLAGS = $(if $V,-v) $(if $(MAKE.n), -n) $(if $(MAKE.k), -k0) \
        $(filter-out -j, $(lastword -j1 $(filter -l% -j%, $(MAKEFLAGS)))) \

 ninja-cmd-goals = $(or $(MAKECMDGOALS), all)
-ninja-cmd-goals += $(foreach t, $(.check.build-suites), $(.check-$t.deps))
-ninja-cmd-goals += $(foreach t, $(.bench.build-suites), $(.bench-$t.deps))
+ninja-cmd-goals += $(foreach t, $(.tests), $(.test.deps.$t))

 makefile-targets := build.ninja ctags TAGS cscope dist clean uninstall
 # "ninja -t targets" also lists all prerequisites.  If build system
@ -206,11 +199,14 @@ recurse-clean: $(addsuffix /clean, $(ROM_DIRS))
 clean: recurse-clean
 	-$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) -t clean || :
 	-$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) clean-ctlist || :
+# avoid old build problems by removing potentially incorrect old files
+	rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h
 	find . \( -name '*.so' -o -name '*.dll' -o -name '*.[oda]' \) -type f \
 		! -path ./roms/edk2/ArmPkg/Library/GccLto/liblto-aarch64.a \
 		! -path ./roms/edk2/ArmPkg/Library/GccLto/liblto-arm.a \
 		-exec rm {} +
-	rm -f TAGS cscope.* *~ */*~
+	rm -f TAGS cscope.* *.pod *~ */*~
+	rm -f fsdev/*.pod scsi/*.pod

 VERSION = $(shell cat $(SRC_PATH)/VERSION)

@ -221,10 +217,10 @@ qemu-%.tar.bz2:

 distclean: clean
 	-$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) -t clean -g || :
-	rm -f config-host.mak
+	rm -f config-host.mak config-host.h* config-poison.h
 	rm -f tests/tcg/config-*.mak
-	rm -f config.status
-	rm -f roms/seabios/config.mak
+	rm -f config-all-disas.mak config.status
+	rm -f roms/seabios/config.mak roms/vgabios/config.mak
 	rm -f qemu-plugins-ld.symbols qemu-plugins-ld64.symbols
 	rm -f *-config-target.h *-config-devices.mak *-config-devices.h
 	rm -rf meson-private meson-logs meson-info compile_commands.json
@ -233,8 +229,7 @@ distclean: clean
 	rm -f linux-headers/asm
 	rm -Rf .sdk

-find-src-path = find "$(SRC_PATH)" -path "$(SRC_PATH)/meson" -prune -o \
-	-type l -prune -o \( -name "*.[chsS]" -o -name "*.[ch].inc" \)
+find-src-path = find "$(SRC_PATH)/" -path "$(SRC_PATH)/meson" -prune -o \( -name "*.[chsS]" -o -name "*.[ch].inc" \)

 .PHONY: ctags
 ctags:
@ -255,7 +250,7 @@ gtags:
 		"GTAGS", "Remove old $@ files")
 	$(call quiet-command, 				\
 	        (cd $(SRC_PATH) && 			\
-		 $(find-src-path) -print | gtags -f -),	\
+		 $(find-src-path) | gtags -f -), 	\
 		"GTAGS", "Re-index $(SRC_PATH)")

 .PHONY: TAGS
@ -285,7 +280,6 @@ cscope:
 # Needed by "meson install"
 export DESTDIR

-include $(SRC_PATH)/tests/lcitool/Makefile.include
 include $(SRC_PATH)/tests/docker/Makefile.include
 include $(SRC_PATH)/tests/vm/Makefile.include

@ -315,7 +309,6 @@ endif
 	@echo  'Test targets:'
 	$(call print-help,check,Run all tests (check-help for details))
 	$(call print-help,bench,Run all benchmarks)
-	$(call print-help,lcitool-help,Help about targets for managing build environment manifests)
 	$(call print-help,docker-help,Help about targets running tests inside containers)
 	$(call print-help,vm-help,Help about targets running tests inside VM)
 	@echo  ''
--- a/README.rst
+++ b/README.rst
@ -59,9 +59,9 @@ of other UNIX targets. The simple steps to build QEMU are:

 Additional information can also be found online via the QEMU website:

-* `<https://wiki.qemu.org/Hosts/Linux>`_
-* `<https://wiki.qemu.org/Hosts/Mac>`_
-* `<https://wiki.qemu.org/Hosts/W32>`_
+* `<https://qemu.org/Hosts/Linux>`_
+* `<https://qemu.org/Hosts/Mac>`_
+* `<https://qemu.org/Hosts/W32>`_


 Submitting patches
@ -84,8 +84,8 @@ the Developers Guide.
 Additional information on submitting patches can be found online via
 the QEMU website

-* `<https://wiki.qemu.org/Contribute/SubmitAPatch>`_
-* `<https://wiki.qemu.org/Contribute/TrivialPatches>`_
+* `<https://qemu.org/Contribute/SubmitAPatch>`_
+* `<https://qemu.org/Contribute/TrivialPatches>`_

 The QEMU website is also maintained under source control.

@ -144,7 +144,7 @@ reported via GitLab.

 For additional information on bug reporting consult:

-* `<https://wiki.qemu.org/Contribute/ReportABug>`_
+* `<https://qemu.org/Contribute/ReportABug>`_


 ChangeLog
@ -168,4 +168,4 @@ main methods being email and IRC
 Information on additional methods of contacting the community can be
 found online via the QEMU website:

-* `<https://wiki.qemu.org/Contribute/StartHere>`_
+* `<https://qemu.org/Contribute/StartHere>`_
--- a/2
+++ b/2
@ -1 +1 @@
-6.2.50
+6.1.1
--- a/accel/hvf/hvf-accel-ops.c
+++ b/accel/hvf/hvf-accel-ops.c
@ -60,10 +60,6 @@

 HVFState *hvf_state;

-#ifdef __aarch64__
-#define HV_VM_DEFAULT NULL
-#endif
-
 /* Memory slots */

 hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
@ -122,7 +118,6 @@ static void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
    MemoryRegion *area = section->mr;
    bool writeable = !area->readonly && !area->rom_device;
    hv_memory_flags_t flags;
-    uint64_t page_size = qemu_real_host_page_size;

    if (!memory_region_is_ram(area)) {
        if (writeable) {
@ -136,12 +131,6 @@ static void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
        }
    }

-    if (!QEMU_IS_ALIGNED(int128_get64(section->size), page_size) ||
-        !QEMU_IS_ALIGNED(section->offset_within_address_space, page_size)) {
-        /* Not page aligned, so we can not map as RAM */
-        add = false;
-    }
-
    mem = hvf_find_overlap_slot(
            section->offset_within_address_space,
            int128_get64(section->size));
@ -250,12 +239,12 @@ static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
    if (on) {
        slot->flags |= HVF_SLOT_LOG;
        hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size,
-                      HV_MEMORY_READ | HV_MEMORY_EXEC);
+                      HV_MEMORY_READ);
    /* stop tracking region*/
    } else {
        slot->flags &= ~HVF_SLOT_LOG;
        hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size,
-                      HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC);
+                      HV_MEMORY_READ | HV_MEMORY_WRITE);
    }
 }

@ -302,7 +291,6 @@ static void hvf_region_del(MemoryListener *listener,
 }

 static MemoryListener hvf_memory_listener = {
-    .name = "hvf",
    .priority = 10,
    .region_add = hvf_region_add,
    .region_del = hvf_region_del,
@ -328,7 +316,7 @@ static int hvf_accel_init(MachineState *ms)

    s = g_new0(HVFState, 1);

-    s->num_slots = ARRAY_SIZE(s->slots);
+    s->num_slots = 32;
    for (x = 0; x < s->num_slots; ++x) {
        s->slots[x].size = 0;
        s->slots[x].slot_id = x;
@ -336,8 +324,7 @@ static int hvf_accel_init(MachineState *ms)

    hvf_state = s;
    memory_listener_register(&hvf_memory_listener, &address_space_memory);
-
-    return hvf_arch_init();
+    return 0;
 }

 static void hvf_accel_class_init(ObjectClass *oc, void *data)
@ -378,20 +365,17 @@ static int hvf_init_vcpu(CPUState *cpu)
    cpu->hvf = g_malloc0(sizeof(*cpu->hvf));

    /* init cpu signals */
+    sigset_t set;
    struct sigaction sigact;

    memset(&sigact, 0, sizeof(sigact));
    sigact.sa_handler = dummy_signal;
    sigaction(SIG_IPI, &sigact, NULL);

-    pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask);
-    sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI);
+    pthread_sigmask(SIG_BLOCK, NULL, &set);
+    sigdelset(&set, SIG_IPI);

-#ifdef __aarch64__
-    r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL);
-#else
    r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf->fd, HV_VCPU_DEFAULT);
-#endif
    cpu->vcpu_dirty = 1;
    assert_hvf_ok(r);

@ -467,7 +451,6 @@ static void hvf_accel_ops_class_init(ObjectClass *oc, void *data)
    AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);

    ops->create_vcpu_thread = hvf_start_vcpu_thread;
-    ops->kick_vcpu_thread = hvf_kick_vcpu_thread;

    ops->synchronize_post_reset = hvf_cpu_synchronize_post_reset;
    ops->synchronize_post_init = hvf_cpu_synchronize_post_init;
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@ -61,10 +61,6 @@
 #endif
 #define PAGE_SIZE qemu_real_host_page_size

-#ifndef KVM_GUESTDBG_BLOCKIRQ
-#define KVM_GUESTDBG_BLOCKIRQ 0
-#endif
-
 //#define DEBUG_KVM

 #ifdef DEBUG_KVM
@ -172,8 +168,6 @@ bool kvm_vm_attributes_allowed;
 bool kvm_direct_msi_allowed;
 bool kvm_ioeventfd_any_length_allowed;
 bool kvm_msi_use_devid;
-bool kvm_has_guest_debug;
-int kvm_sstep_flags;
 static bool kvm_immediate_exit;
 static hwaddr kvm_max_slot_size = ~0;

@ -475,7 +469,6 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
    cpu->kvm_fd = ret;
    cpu->kvm_state = s;
    cpu->vcpu_dirty = true;
-    cpu->dirty_pages = 0;

    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
    if (mmap_size < 0) {
@ -750,7 +743,6 @@ static uint32_t kvm_dirty_ring_reap_one(KVMState *s, CPUState *cpu)
        count++;
    }
    cpu->kvm_fetch_index = fetch;
-    cpu->dirty_pages += count;

    return count;
 }
@ -1137,7 +1129,6 @@ static void kvm_coalesce_pio_del(MemoryListener *listener,
 }

 static MemoryListener kvm_coalesced_pio_listener = {
-    .name = "kvm-coalesced-pio",
    .coalesced_io_add = kvm_coalesce_pio_add,
    .coalesced_io_del = kvm_coalesce_pio_del,
 };
@ -1642,7 +1633,7 @@ static void kvm_io_ioeventfd_del(MemoryListener *listener,
 }

 void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
-                                  AddressSpace *as, int as_id, const char *name)
+                                  AddressSpace *as, int as_id)
 {
    int i;

@ -1658,7 +1649,6 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
    kml->listener.log_start = kvm_log_start;
    kml->listener.log_stop = kvm_log_stop;
    kml->listener.priority = 10;
-    kml->listener.name = name;

    if (s->kvm_dirty_ring_size) {
        kml->listener.log_sync_global = kvm_log_sync_global;
@ -1679,7 +1669,6 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
 }

 static MemoryListener kvm_io_listener = {
-    .name = "kvm-io",
    .eventfd_add = kvm_io_ioeventfd_add,
    .eventfd_del = kvm_io_ioeventfd_del,
    .priority = 10,
@ -2304,11 +2293,6 @@ bool kvm_vcpu_id_is_valid(int vcpu_id)
    return vcpu_id >= 0 && vcpu_id < kvm_max_vcpu_id(s);
 }

-bool kvm_dirty_ring_enabled(void)
-{
-    return kvm_state->kvm_dirty_ring_size ? true : false;
-}
-
 static int kvm_init(MachineState *ms)
 {
    MachineClass *mc = MACHINE_GET_CLASS(ms);
@ -2485,7 +2469,7 @@ static int kvm_init(MachineState *ms)
            ret = kvm_vm_enable_cap(s, KVM_CAP_DIRTY_LOG_RING, 0, ring_bytes);
            if (ret) {
                error_report("Enabling of KVM dirty ring failed: %s. "
-                             "Suggested minimum value is 1024.", strerror(-ret));
+                             "Suggested mininum value is 1024.", strerror(-ret));
                goto err;
            }

@ -2570,25 +2554,6 @@ static int kvm_init(MachineState *ms)
    kvm_ioeventfd_any_length_allowed =
        (kvm_check_extension(s, KVM_CAP_IOEVENTFD_ANY_LENGTH) > 0);

-#ifdef KVM_CAP_SET_GUEST_DEBUG
-    kvm_has_guest_debug =
-        (kvm_check_extension(s, KVM_CAP_SET_GUEST_DEBUG) > 0);
-#endif
-
-    kvm_sstep_flags = 0;
-    if (kvm_has_guest_debug) {
-        kvm_sstep_flags = SSTEP_ENABLE;
-
-#if defined KVM_CAP_SET_GUEST_DEBUG2
-        int guest_debug_flags =
-            kvm_check_extension(s, KVM_CAP_SET_GUEST_DEBUG2);
-
-        if (guest_debug_flags & KVM_GUESTDBG_BLOCKIRQ) {
-            kvm_sstep_flags |= SSTEP_NOIRQ;
-        }
-#endif
-    }
-
    kvm_state = s;

    ret = kvm_arch_init(ms, s);
@ -2614,7 +2579,7 @@ static int kvm_init(MachineState *ms)
    s->memory_listener.listener.coalesced_io_del = kvm_uncoalesce_mmio_region;

    kvm_memory_listener_register(s, &s->memory_listener,
-                                 &address_space_memory, 0, "kvm-memory");
+                                 &address_space_memory, 0);
    if (kvm_eventfds_allowed) {
        memory_listener_register(&kvm_io_listener,
                                 &address_space_io);
@ -3218,10 +3183,6 @@ int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)

    if (cpu->singlestep_enabled) {
        data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
-
-        if (cpu->singlestep_enabled & SSTEP_NOIRQ) {
-            data.dbg.control |= KVM_GUESTDBG_BLOCKIRQ;
-        }
    }
    kvm_arch_update_guest_debug(cpu, &data.dbg);

--- a/accel/kvm/meson.build
+++ b/accel/kvm/meson.build
@ -3,5 +3,6 @@ kvm_ss.add(files(
  'kvm-all.c',
  'kvm-accel-ops.c',
 ))
+kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c'))

 specific_ss.add_all(when: 'CONFIG_KVM', if_true: kvm_ss)
--- a/target/i386/kvm/sev-stub.c
+++ b/target/i386/kvm/sev-stub.c
@ -13,7 +13,7 @@

 #include "qemu/osdep.h"
 #include "qemu-common.h"
-#include "sev.h"
+#include "sysemu/sev.h"

 int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
 {
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@ -147,9 +147,4 @@ bool kvm_arm_supports_user_irq(void)
 {
    return false;
 }
-
-bool kvm_dirty_ring_enabled(void)
-{
-    return false;
-}
 #endif
--- a/accel/tcg/atomic_common.c.inc
+++ b/accel/tcg/atomic_common.c.inc
@ -13,43 +13,56 @@
 * See the COPYING file in the top-level directory.
 */

-static void atomic_trace_rmw_pre(CPUArchState *env, target_ulong addr,
-                                 MemOpIdx oi)
+static uint16_t atomic_trace_rmw_pre(CPUArchState *env, target_ulong addr,
+                                     TCGMemOpIdx oi)
 {
    CPUState *cpu = env_cpu(env);
+    uint16_t info = trace_mem_get_info(get_memop(oi), get_mmuidx(oi), false);

-    trace_guest_rmw_before_exec(cpu, addr, oi);
+    trace_guest_mem_before_exec(cpu, addr, info);
+    trace_guest_mem_before_exec(cpu, addr, info | TRACE_MEM_ST);
+
+    return info;
 }

 static void atomic_trace_rmw_post(CPUArchState *env, target_ulong addr,
-                                  MemOpIdx oi)
+                                  uint16_t info)
 {
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW);
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info);
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info | TRACE_MEM_ST);
 }

 #if HAVE_ATOMIC128
-static void atomic_trace_ld_pre(CPUArchState *env, target_ulong addr,
-                                MemOpIdx oi)
+static uint16_t atomic_trace_ld_pre(CPUArchState *env, target_ulong addr,
+                                    TCGMemOpIdx oi)
 {
-    trace_guest_ld_before_exec(env_cpu(env), addr, oi);
+    uint16_t info = trace_mem_get_info(get_memop(oi), get_mmuidx(oi), false);
+
+    trace_guest_mem_before_exec(env_cpu(env), addr, info);
+
+    return info;
 }

 static void atomic_trace_ld_post(CPUArchState *env, target_ulong addr,
-                                 MemOpIdx oi)
+                                 uint16_t info)
 {
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info);
 }

-static void atomic_trace_st_pre(CPUArchState *env, target_ulong addr,
-                                MemOpIdx oi)
+static uint16_t atomic_trace_st_pre(CPUArchState *env, target_ulong addr,
+                                    TCGMemOpIdx oi)
 {
-    trace_guest_st_before_exec(env_cpu(env), addr, oi);
+    uint16_t info = trace_mem_get_info(get_memop(oi), get_mmuidx(oi), true);
+
+    trace_guest_mem_before_exec(env_cpu(env), addr, info);
+
+    return info;
 }

 static void atomic_trace_st_post(CPUArchState *env, target_ulong addr,
-                                 MemOpIdx oi)
+                                 uint16_t info)
 {
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info);
 }
 #endif

--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@ -19,6 +19,7 @@
 */

 #include "qemu/plugin.h"
+#include "trace/mem.h"

 #if DATA_SIZE == 16
 # define SUFFIX     o
@ -71,77 +72,77 @@

 ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
                              ABI_TYPE cmpv, ABI_TYPE newv,
-                              MemOpIdx oi, uintptr_t retaddr)
+                              TCGMemOpIdx oi, uintptr_t retaddr)
 {
    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
                                         PAGE_READ | PAGE_WRITE, retaddr);
    DATA_TYPE ret;
+    uint16_t info = atomic_trace_rmw_pre(env, addr, oi);

-    atomic_trace_rmw_pre(env, addr, oi);
 #if DATA_SIZE == 16
    ret = atomic16_cmpxchg(haddr, cmpv, newv);
 #else
    ret = qatomic_cmpxchg__nocheck(haddr, cmpv, newv);
 #endif
    ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr, oi);
+    atomic_trace_rmw_post(env, addr, info);
    return ret;
 }

 #if DATA_SIZE >= 16
 #if HAVE_ATOMIC128
 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
-                         MemOpIdx oi, uintptr_t retaddr)
+                         TCGMemOpIdx oi, uintptr_t retaddr)
 {
    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
                                         PAGE_READ, retaddr);
    DATA_TYPE val;
+    uint16_t info = atomic_trace_ld_pre(env, addr, oi);

-    atomic_trace_ld_pre(env, addr, oi);
    val = atomic16_read(haddr);
    ATOMIC_MMU_CLEANUP;
-    atomic_trace_ld_post(env, addr, oi);
+    atomic_trace_ld_post(env, addr, info);
    return val;
 }

 void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
-                     MemOpIdx oi, uintptr_t retaddr)
+                     TCGMemOpIdx oi, uintptr_t retaddr)
 {
    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
                                         PAGE_WRITE, retaddr);
+    uint16_t info = atomic_trace_st_pre(env, addr, oi);

-    atomic_trace_st_pre(env, addr, oi);
    atomic16_set(haddr, val);
    ATOMIC_MMU_CLEANUP;
-    atomic_trace_st_post(env, addr, oi);
+    atomic_trace_st_post(env, addr, info);
 }
 #endif
 #else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
-                           MemOpIdx oi, uintptr_t retaddr)
+                           TCGMemOpIdx oi, uintptr_t retaddr)
 {
    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
                                         PAGE_READ | PAGE_WRITE, retaddr);
    DATA_TYPE ret;
+    uint16_t info = atomic_trace_rmw_pre(env, addr, oi);

-    atomic_trace_rmw_pre(env, addr, oi);
    ret = qatomic_xchg__nocheck(haddr, val);
    ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr, oi);
+    atomic_trace_rmw_post(env, addr, info);
    return ret;
 }

 #define GEN_ATOMIC_HELPER(X)                                        \
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
-                        ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
+                        ABI_TYPE val, TCGMemOpIdx oi, uintptr_t retaddr) \
 {                                                                   \
    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,  \
                                         PAGE_READ | PAGE_WRITE, retaddr); \
    DATA_TYPE ret;                                                  \
-    atomic_trace_rmw_pre(env, addr, oi);                            \
+    uint16_t info = atomic_trace_rmw_pre(env, addr, oi);            \
    ret = qatomic_##X(haddr, val);                                  \
    ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr, oi);                           \
+    atomic_trace_rmw_post(env, addr, info);                         \
    return ret;                                                     \
 }

@ -166,12 +167,12 @@ GEN_ATOMIC_HELPER(xor_fetch)
 */
 #define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET)                \
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
-                        ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
+                        ABI_TYPE xval, TCGMemOpIdx oi, uintptr_t retaddr) \
 {                                                                   \
    XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
                                          PAGE_READ | PAGE_WRITE, retaddr); \
    XDATA_TYPE cmp, old, new, val = xval;                           \
-    atomic_trace_rmw_pre(env, addr, oi);                            \
+    uint16_t info = atomic_trace_rmw_pre(env, addr, oi);            \
    smp_mb();                                                       \
    cmp = qatomic_read__nocheck(haddr);                             \
    do {                                                            \
@ -179,7 +180,7 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
        cmp = qatomic_cmpxchg__nocheck(haddr, old, new);            \
    } while (cmp != old);                                           \
    ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr, oi);                           \
+    atomic_trace_rmw_post(env, addr, info);                         \
    return RET;                                                     \
 }

@ -210,78 +211,78 @@ GEN_ATOMIC_HELPER_FN(umax_fetch, MAX,  DATA_TYPE, new)

 ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
                              ABI_TYPE cmpv, ABI_TYPE newv,
-                              MemOpIdx oi, uintptr_t retaddr)
+                              TCGMemOpIdx oi, uintptr_t retaddr)
 {
    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
                                         PAGE_READ | PAGE_WRITE, retaddr);
    DATA_TYPE ret;
+    uint16_t info = atomic_trace_rmw_pre(env, addr, oi);

-    atomic_trace_rmw_pre(env, addr, oi);
 #if DATA_SIZE == 16
    ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv));
 #else
    ret = qatomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv));
 #endif
    ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr, oi);
+    atomic_trace_rmw_post(env, addr, info);
    return BSWAP(ret);
 }

 #if DATA_SIZE >= 16
 #if HAVE_ATOMIC128
 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
-                         MemOpIdx oi, uintptr_t retaddr)
+                         TCGMemOpIdx oi, uintptr_t retaddr)
 {
    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
                                         PAGE_READ, retaddr);
    DATA_TYPE val;
+    uint16_t info = atomic_trace_ld_pre(env, addr, oi);

-    atomic_trace_ld_pre(env, addr, oi);
    val = atomic16_read(haddr);
    ATOMIC_MMU_CLEANUP;
-    atomic_trace_ld_post(env, addr, oi);
+    atomic_trace_ld_post(env, addr, info);
    return BSWAP(val);
 }

 void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
-                     MemOpIdx oi, uintptr_t retaddr)
+                     TCGMemOpIdx oi, uintptr_t retaddr)
 {
    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
                                         PAGE_WRITE, retaddr);
+    uint16_t info = atomic_trace_st_pre(env, addr, oi);

-    atomic_trace_st_pre(env, addr, oi);
    val = BSWAP(val);
    atomic16_set(haddr, val);
    ATOMIC_MMU_CLEANUP;
-    atomic_trace_st_post(env, addr, oi);
+    atomic_trace_st_post(env, addr, info);
 }
 #endif
 #else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
-                           MemOpIdx oi, uintptr_t retaddr)
+                           TCGMemOpIdx oi, uintptr_t retaddr)
 {
    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
                                         PAGE_READ | PAGE_WRITE, retaddr);
    ABI_TYPE ret;
+    uint16_t info = atomic_trace_rmw_pre(env, addr, oi);

-    atomic_trace_rmw_pre(env, addr, oi);
    ret = qatomic_xchg__nocheck(haddr, BSWAP(val));
    ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr, oi);
+    atomic_trace_rmw_post(env, addr, info);
    return BSWAP(ret);
 }

 #define GEN_ATOMIC_HELPER(X)                                        \
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
-                        ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
+                        ABI_TYPE val, TCGMemOpIdx oi, uintptr_t retaddr) \
 {                                                                   \
    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,  \
                                         PAGE_READ | PAGE_WRITE, retaddr); \
    DATA_TYPE ret;                                                  \
-    atomic_trace_rmw_pre(env, addr, oi);                            \
+    uint16_t info = atomic_trace_rmw_pre(env, addr, oi);            \
    ret = qatomic_##X(haddr, BSWAP(val));                           \
    ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr, oi);                           \
+    atomic_trace_rmw_post(env, addr, info);                         \
    return BSWAP(ret);                                              \
 }

@ -303,12 +304,12 @@ GEN_ATOMIC_HELPER(xor_fetch)
 */
 #define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET)                \
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
-                        ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
+                        ABI_TYPE xval, TCGMemOpIdx oi, uintptr_t retaddr) \
 {                                                                   \
    XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
                                          PAGE_READ | PAGE_WRITE, retaddr); \
    XDATA_TYPE ldo, ldn, old, new, val = xval;                      \
-    atomic_trace_rmw_pre(env, addr, oi);                            \
+    uint16_t info = atomic_trace_rmw_pre(env, addr, oi);            \
    smp_mb();                                                       \
    ldn = qatomic_read__nocheck(haddr);                             \
    do {                                                            \
@ -316,7 +317,7 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
        ldn = qatomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new));     \
    } while (ldo != ldn);                                           \
    ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr, oi);                           \
+    atomic_trace_rmw_post(env, addr, info);                         \
    return RET;                                                     \
 }

--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@ -20,9 +20,6 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/qemu-print.h"
-#include "qapi/error.h"
-#include "qapi/qapi-commands-machine.h"
-#include "qapi/type-helpers.h"
 #include "hw/core/tcg-cpu-ops.h"
 #include "trace.h"
 #include "disas/disas.h"
@ -41,7 +38,6 @@
 #include "exec/cpu-all.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/replay.h"
-#include "sysemu/tcg.h"
 #include "exec/helper-proto.h"
 #include "tb-hash.h"
 #include "tb-context.h"
@ -387,17 +383,6 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
            cc->set_pc(cpu, last_tb->pc);
        }
    }
-
-    /*
-     * If gdb single-step, and we haven't raised another exception,
-     * raise a debug exception.  Single-step with another exception
-     * is handled in cpu_handle_exception.
-     */
-    if (unlikely(cpu->singlestep_enabled) && cpu->exception_index == -1) {
-        cpu->exception_index = EXCP_DEBUG;
-        cpu_loop_exit(cpu);
-    }
-
    return last_tb;
 }

@ -466,7 +451,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
         * memory.
         */
 #ifndef CONFIG_SOFTMMU
-        clear_helper_retaddr();
        tcg_debug_assert(!have_mmap_lock());
 #endif
        if (qemu_mutex_iothread_locked()) {
@ -476,6 +460,7 @@ void cpu_exec_step_atomic(CPUState *cpu)
        qemu_plugin_disable_mem_helpers(cpu);
    }

+
    /*
     * As we start the exclusive region before codegen we must still
     * be in the region if we longjump out of either the codegen or
@ -603,9 +588,8 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,

 static inline bool cpu_handle_halt(CPUState *cpu)
 {
-#ifndef CONFIG_USER_ONLY
    if (cpu->halted) {
-#if defined(TARGET_I386)
+#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
        if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
            X86CPU *x86_cpu = X86_CPU(cpu);
            qemu_mutex_lock_iothread();
@ -613,14 +597,13 @@ static inline bool cpu_handle_halt(CPUState *cpu)
            cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
            qemu_mutex_unlock_iothread();
        }
-#endif /* TARGET_I386 */
+#endif
        if (!cpu_has_work(cpu)) {
            return true;
        }

        cpu->halted = 0;
    }
-#endif /* !CONFIG_USER_ONLY */

    return false;
 }
@ -668,8 +651,8 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
           loop */
 #if defined(TARGET_I386)
        CPUClass *cc = CPU_GET_CLASS(cpu);
-        cc->tcg_ops->fake_user_interrupt(cpu);
-#endif /* TARGET_I386 */
+        cc->tcg_ops->do_interrupt(cpu);
+#endif
        *ret = cpu->exception_index;
        cpu->exception_index = -1;
        return true;
@ -702,7 +685,6 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
    return false;
 }

-#ifndef CONFIG_USER_ONLY
 /*
 * CPU_INTERRUPT_POLL is a virtual event which gets converted into a
 * "real" interrupt event later. It does not need to be recorded for
@ -716,19 +698,11 @@ static inline bool need_replay_interrupt(int interrupt_request)
    return true;
 #endif
 }
-#endif /* !CONFIG_USER_ONLY */

 static inline bool cpu_handle_interrupt(CPUState *cpu,
                                        TranslationBlock **last_tb)
 {
-    /*
-     * If we have requested custom cflags with CF_NOIRQ we should
-     * skip checking here. Any pending interrupts will get picked up
-     * by the next TB we execute under normal cflags.
-     */
-    if (cpu->cflags_next_tb != -1 && cpu->cflags_next_tb & CF_NOIRQ) {
-        return false;
-    }
+    CPUClass *cc = CPU_GET_CLASS(cpu);

    /* Clear the interrupt flag now since we're processing
     * cpu->interrupt_request and cpu->exit_request.
@ -751,7 +725,6 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
            qemu_mutex_unlock_iothread();
            return true;
        }
-#if !defined(CONFIG_USER_ONLY)
        if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
            /* Do nothing */
        } else if (interrupt_request & CPU_INTERRUPT_HALT) {
@ -780,14 +753,12 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
            qemu_mutex_unlock_iothread();
            return true;
        }
-#endif /* !TARGET_I386 */
+#endif
        /* The target hook has 3 exit conditions:
           False when the interrupt isn't processed,
           True when it is, and we should restart on a new TB,
           and via longjmp via cpu_loop_exit.  */
        else {
-            CPUClass *cc = CPU_GET_CLASS(cpu);
-
            if (cc->tcg_ops->cpu_exec_interrupt &&
                cc->tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) {
                if (need_replay_interrupt(interrupt_request)) {
@ -806,7 +777,6 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
             * reload the 'interrupt_request' value */
            interrupt_request = cpu->interrupt_request;
        }
-#endif /* !CONFIG_USER_ONLY */
        if (interrupt_request & CPU_INTERRUPT_EXITTB) {
            cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
            /* ensure that no TB jump will be modified as
@ -929,7 +899,6 @@ int cpu_exec(CPUState *cpu)
 #endif

 #ifndef CONFIG_SOFTMMU
-        clear_helper_retaddr();
        tcg_debug_assert(!have_mmap_lock());
 #endif
        if (qemu_mutex_iothread_locked()) {
@ -1042,83 +1011,23 @@ void tcg_exec_unrealizefn(CPUState *cpu)

 #ifndef CONFIG_USER_ONLY

-void dump_drift_info(GString *buf)
+void dump_drift_info(void)
 {
    if (!icount_enabled()) {
        return;
    }

-    g_string_append_printf(buf, "Host - Guest clock  %"PRIi64" ms\n",
-                           (cpu_get_clock() - icount_get()) / SCALE_MS);
+    qemu_printf("Host - Guest clock  %"PRIi64" ms\n",
+                (cpu_get_clock() - icount_get()) / SCALE_MS);
    if (icount_align_option) {
-        g_string_append_printf(buf, "Max guest delay     %"PRIi64" ms\n",
-                               -max_delay / SCALE_MS);
-        g_string_append_printf(buf, "Max guest advance   %"PRIi64" ms\n",
-                               max_advance / SCALE_MS);
+        qemu_printf("Max guest delay     %"PRIi64" ms\n",
+                    -max_delay / SCALE_MS);
+        qemu_printf("Max guest advance   %"PRIi64" ms\n",
+                    max_advance / SCALE_MS);
    } else {
-        g_string_append_printf(buf, "Max guest delay     NA\n");
-        g_string_append_printf(buf, "Max guest advance   NA\n");
+        qemu_printf("Max guest delay     NA\n");
+        qemu_printf("Max guest advance   NA\n");
    }
 }

-HumanReadableText *qmp_x_query_jit(Error **errp)
-{
-    g_autoptr(GString) buf = g_string_new("");
-
-    if (!tcg_enabled()) {
-        error_setg(errp, "JIT information is only available with accel=tcg");
-        return NULL;
-    }
-
-    dump_exec_info(buf);
-    dump_drift_info(buf);
-
-    return human_readable_text_from_str(buf);
-}
-
-HumanReadableText *qmp_x_query_opcount(Error **errp)
-{
-    g_autoptr(GString) buf = g_string_new("");
-
-    if (!tcg_enabled()) {
-        error_setg(errp, "Opcode count information is only available with accel=tcg");
-        return NULL;
-    }
-
-    dump_opcount_info(buf);
-
-    return human_readable_text_from_str(buf);
-}
-
-#ifdef CONFIG_PROFILER
-
-int64_t dev_time;
-
-HumanReadableText *qmp_x_query_profile(Error **errp)
-{
-    g_autoptr(GString) buf = g_string_new("");
-    static int64_t last_cpu_exec_time;
-    int64_t cpu_exec_time;
-    int64_t delta;
-
-    cpu_exec_time = tcg_cpu_exec_time();
-    delta = cpu_exec_time - last_cpu_exec_time;
-
-    g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
-                           dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
-    g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
-                           delta, delta / (double)NANOSECONDS_PER_SECOND);
-    last_cpu_exec_time = cpu_exec_time;
-    dev_time = 0;
-
-    return human_readable_text_from_str(buf);
-}
-#else
-HumanReadableText *qmp_x_query_profile(Error **errp)
-{
-    error_setg(errp, "Internal profiler not compiled");
-    return NULL;
-}
-#endif
-
 #endif /* !CONFIG_USER_ONLY */
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@ -34,12 +34,12 @@
 #include "qemu/atomic128.h"
 #include "exec/translate-all.h"
 #include "trace/trace-root.h"
+#include "trace/mem.h"
 #include "tb-hash.h"
 #include "internal.h"
 #ifdef CONFIG_PLUGIN
 #include "qemu/plugin-memory.h"
 #endif
-#include "tcg/tcg-ldst.h"

 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
 /* #define DEBUG_TLB */
@ -1749,7 +1749,7 @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
 * @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
 */
 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
-                               MemOpIdx oi, int size, int prot,
+                               TCGMemOpIdx oi, int size, int prot,
                               uintptr_t retaddr)
 {
    size_t mmu_idx = get_mmuidx(oi);
@ -1840,25 +1840,6 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
    cpu_loop_exit_atomic(env_cpu(env), retaddr);
 }

-/*
- * Verify that we have passed the correct MemOp to the correct function.
- *
- * In the case of the helper_*_mmu functions, we will have done this by
- * using the MemOp to look up the helper during code generation.
- *
- * In the case of the cpu_*_mmu functions, this is up to the caller.
- * We could present one function to target code, and dispatch based on
- * the MemOp, but so far we have worked hard to avoid an indirect function
- * call along the memory path.
- */
-static void validate_memop(MemOpIdx oi, MemOp expected)
-{
-#ifdef CONFIG_DEBUG_TCG
-    MemOp have = get_memop(oi) & (MO_SIZE | MO_BSWAP);
-    assert(have == expected);
-#endif
-}
-
 /*
 * Load Helpers
 *
@ -1869,7 +1850,7 @@ static void validate_memop(MemOpIdx oi, MemOp expected)
 */

 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
-                                MemOpIdx oi, uintptr_t retaddr);
+                                TCGMemOpIdx oi, uintptr_t retaddr);

 static inline uint64_t QEMU_ALWAYS_INLINE
 load_memop(const void *haddr, MemOp op)
@ -1885,9 +1866,9 @@ load_memop(const void *haddr, MemOp op)
        return (uint32_t)ldl_be_p(haddr);
    case MO_LEUL:
        return (uint32_t)ldl_le_p(haddr);
-    case MO_BEUQ:
+    case MO_BEQ:
        return ldq_be_p(haddr);
-    case MO_LEUQ:
+    case MO_LEQ:
        return ldq_le_p(haddr);
    default:
        qemu_build_not_reached();
@ -1895,7 +1876,7 @@ load_memop(const void *haddr, MemOp op)
 }

 static inline uint64_t QEMU_ALWAYS_INLINE
-load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
+load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
            uintptr_t retaddr, MemOp op, bool code_read,
            FullLoadHelper *full_load)
 {
@ -2010,87 +1991,80 @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
 */

 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
-                              MemOpIdx oi, uintptr_t retaddr)
+                              TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    validate_memop(oi, MO_UB);
    return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
 }

 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
-                                     MemOpIdx oi, uintptr_t retaddr)
+                                     TCGMemOpIdx oi, uintptr_t retaddr)
 {
    return full_ldub_mmu(env, addr, oi, retaddr);
 }

 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
-                                 MemOpIdx oi, uintptr_t retaddr)
+                                 TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    validate_memop(oi, MO_LEUW);
    return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
                       full_le_lduw_mmu);
 }

 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
-                                    MemOpIdx oi, uintptr_t retaddr)
+                                    TCGMemOpIdx oi, uintptr_t retaddr)
 {
    return full_le_lduw_mmu(env, addr, oi, retaddr);
 }

 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
-                                 MemOpIdx oi, uintptr_t retaddr)
+                                 TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    validate_memop(oi, MO_BEUW);
    return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
                       full_be_lduw_mmu);
 }

 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
-                                    MemOpIdx oi, uintptr_t retaddr)
+                                    TCGMemOpIdx oi, uintptr_t retaddr)
 {
    return full_be_lduw_mmu(env, addr, oi, retaddr);
 }

 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
-                                 MemOpIdx oi, uintptr_t retaddr)
+                                 TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    validate_memop(oi, MO_LEUL);
    return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
                       full_le_ldul_mmu);
 }

 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
-                                    MemOpIdx oi, uintptr_t retaddr)
+                                    TCGMemOpIdx oi, uintptr_t retaddr)
 {
    return full_le_ldul_mmu(env, addr, oi, retaddr);
 }

 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
-                                 MemOpIdx oi, uintptr_t retaddr)
+                                 TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    validate_memop(oi, MO_BEUL);
    return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
                       full_be_ldul_mmu);
 }

 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
-                                    MemOpIdx oi, uintptr_t retaddr)
+                                    TCGMemOpIdx oi, uintptr_t retaddr)
 {
    return full_be_ldul_mmu(env, addr, oi, retaddr);
 }

 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
-                           MemOpIdx oi, uintptr_t retaddr)
+                           TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    validate_memop(oi, MO_LEUQ);
-    return load_helper(env, addr, oi, retaddr, MO_LEUQ, false,
+    return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
                       helper_le_ldq_mmu);
 }

 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
-                           MemOpIdx oi, uintptr_t retaddr)
+                           TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    validate_memop(oi, MO_BEUQ);
-    return load_helper(env, addr, oi, retaddr, MO_BEUQ, false,
+    return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
                       helper_be_ldq_mmu);
 }

@ -2101,31 +2075,31 @@ uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,


 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
-                                     MemOpIdx oi, uintptr_t retaddr)
+                                     TCGMemOpIdx oi, uintptr_t retaddr)
 {
    return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
 }

 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
-                                    MemOpIdx oi, uintptr_t retaddr)
+                                    TCGMemOpIdx oi, uintptr_t retaddr)
 {
    return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
 }

 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
-                                    MemOpIdx oi, uintptr_t retaddr)
+                                    TCGMemOpIdx oi, uintptr_t retaddr)
 {
    return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
 }

 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
-                                    MemOpIdx oi, uintptr_t retaddr)
+                                    TCGMemOpIdx oi, uintptr_t retaddr)
 {
    return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
 }

 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
-                                    MemOpIdx oi, uintptr_t retaddr)
+                                    TCGMemOpIdx oi, uintptr_t retaddr)
 {
    return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
 }
@ -2135,56 +2109,193 @@ tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
 */

 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
-                                       MemOpIdx oi, uintptr_t retaddr,
-                                       FullLoadHelper *full_load)
+                                       int mmu_idx, uintptr_t retaddr,
+                                       MemOp op, FullLoadHelper *full_load)
 {
+    uint16_t meminfo;
+    TCGMemOpIdx oi;
    uint64_t ret;

-    trace_guest_ld_before_exec(env_cpu(env), addr, oi);
+    meminfo = trace_mem_get_info(op, mmu_idx, false);
+    trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
+
+    op &= ~MO_SIGN;
+    oi = make_memop_idx(op, mmu_idx);
    ret = full_load(env, addr, oi, retaddr);
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
+
    return ret;
 }

-uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
+uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                            int mmu_idx, uintptr_t ra)
 {
-    return cpu_load_helper(env, addr, oi, ra, full_ldub_mmu);
+    return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu);
 }

-uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
-                        MemOpIdx oi, uintptr_t ra)
+int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                       int mmu_idx, uintptr_t ra)
 {
-    return cpu_load_helper(env, addr, oi, ra, full_be_lduw_mmu);
+    return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB,
+                                   full_ldub_mmu);
 }

-uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
-                        MemOpIdx oi, uintptr_t ra)
+uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                               int mmu_idx, uintptr_t ra)
 {
-    return cpu_load_helper(env, addr, oi, ra, full_be_ldul_mmu);
+    return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu);
 }

-uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
-                        MemOpIdx oi, uintptr_t ra)
+int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                          int mmu_idx, uintptr_t ra)
 {
-    return cpu_load_helper(env, addr, oi, MO_BEUQ, helper_be_ldq_mmu);
+    return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW,
+                                    full_be_lduw_mmu);
 }

-uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
-                        MemOpIdx oi, uintptr_t ra)
+uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                              int mmu_idx, uintptr_t ra)
 {
-    return cpu_load_helper(env, addr, oi, ra, full_le_lduw_mmu);
+    return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu);
 }

-uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
-                        MemOpIdx oi, uintptr_t ra)
+uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                              int mmu_idx, uintptr_t ra)
 {
-    return cpu_load_helper(env, addr, oi, ra, full_le_ldul_mmu);
+    return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu);
 }

-uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
-                        MemOpIdx oi, uintptr_t ra)
+uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                               int mmu_idx, uintptr_t ra)
 {
-    return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu);
+    return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu);
+}
+
+int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                          int mmu_idx, uintptr_t ra)
+{
+    return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW,
+                                    full_le_lduw_mmu);
+}
+
+uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                              int mmu_idx, uintptr_t ra)
+{
+    return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu);
+}
+
+uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                              int mmu_idx, uintptr_t ra)
+{
+    return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu);
+}
+
+uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr,
+                          uintptr_t retaddr)
+{
+    return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+}
+
+int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
+{
+    return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+}
+
+uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr,
+                             uintptr_t retaddr)
+{
+    return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+}
+
+int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
+{
+    return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+}
+
+uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr,
+                            uintptr_t retaddr)
+{
+    return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+}
+
+uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr,
+                            uintptr_t retaddr)
+{
+    return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+}
+
+uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr,
+                             uintptr_t retaddr)
+{
+    return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+}
+
+int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
+{
+    return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+}
+
+uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr,
+                            uintptr_t retaddr)
+{
+    return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+}
+
+uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr,
+                            uintptr_t retaddr)
+{
+    return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+}
+
+uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr)
+{
+    return cpu_ldub_data_ra(env, ptr, 0);
+}
+
+int cpu_ldsb_data(CPUArchState *env, target_ulong ptr)
+{
+    return cpu_ldsb_data_ra(env, ptr, 0);
+}
+
+uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr)
+{
+    return cpu_lduw_be_data_ra(env, ptr, 0);
+}
+
+int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr)
+{
+    return cpu_ldsw_be_data_ra(env, ptr, 0);
+}
+
+uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr)
+{
+    return cpu_ldl_be_data_ra(env, ptr, 0);
+}
+
+uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr)
+{
+    return cpu_ldq_be_data_ra(env, ptr, 0);
+}
+
+uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr)
+{
+    return cpu_lduw_le_data_ra(env, ptr, 0);
+}
+
+int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr)
+{
+    return cpu_ldsw_le_data_ra(env, ptr, 0);
+}
+
+uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr)
+{
+    return cpu_ldl_le_data_ra(env, ptr, 0);
+}
+
+uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr)
+{
+    return cpu_ldq_le_data_ra(env, ptr, 0);
 }

 /*
@ -2210,10 +2321,10 @@ store_memop(void *haddr, uint64_t val, MemOp op)
    case MO_LEUL:
        stl_le_p(haddr, val);
        break;
-    case MO_BEUQ:
+    case MO_BEQ:
        stq_be_p(haddr, val);
        break;
-    case MO_LEUQ:
+    case MO_LEQ:
        stq_le_p(haddr, val);
        break;
    default:
@ -2221,9 +2332,6 @@ store_memop(void *haddr, uint64_t val, MemOp op)
    }
 }

-static void full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
-                         MemOpIdx oi, uintptr_t retaddr);
-
 static void __attribute__((noinline))
 store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
                       uintptr_t retaddr, size_t size, uintptr_t mmu_idx,
@ -2233,7 +2341,7 @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
    uintptr_t index, index2;
    CPUTLBEntry *entry, *entry2;
    target_ulong page2, tlb_addr, tlb_addr2;
-    MemOpIdx oi;
+    TCGMemOpIdx oi;
    size_t size2;
    int i;

@ -2287,20 +2395,20 @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
        for (i = 0; i < size; ++i) {
            /* Big-endian extract.  */
            uint8_t val8 = val >> (((size - 1) * 8) - (i * 8));
-            full_stb_mmu(env, addr + i, val8, oi, retaddr);
+            helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
        }
    } else {
        for (i = 0; i < size; ++i) {
            /* Little-endian extract.  */
            uint8_t val8 = val >> (i * 8);
-            full_stb_mmu(env, addr + i, val8, oi, retaddr);
+            helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
        }
    }
 }

 static inline void QEMU_ALWAYS_INLINE
 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
-             MemOpIdx oi, uintptr_t retaddr, MemOp op)
+             TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
 {
    uintptr_t mmu_idx = get_mmuidx(oi);
    uintptr_t index = tlb_index(env, mmu_idx, addr);
@ -2396,145 +2504,187 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
    store_memop(haddr, val, op);
 }

-static void __attribute__((noinline))
-full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
-             MemOpIdx oi, uintptr_t retaddr)
+void __attribute__((noinline))
+helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
+                   TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    validate_memop(oi, MO_UB);
    store_helper(env, addr, val, oi, retaddr, MO_UB);
 }

-void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
-                        MemOpIdx oi, uintptr_t retaddr)
+void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+                       TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    full_stb_mmu(env, addr, val, oi, retaddr);
-}
-
-static void full_le_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
-                            MemOpIdx oi, uintptr_t retaddr)
-{
-    validate_memop(oi, MO_LEUW);
    store_helper(env, addr, val, oi, retaddr, MO_LEUW);
 }

-void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
-                       MemOpIdx oi, uintptr_t retaddr)
+void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+                       TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    full_le_stw_mmu(env, addr, val, oi, retaddr);
-}
-
-static void full_be_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
-                            MemOpIdx oi, uintptr_t retaddr)
-{
-    validate_memop(oi, MO_BEUW);
    store_helper(env, addr, val, oi, retaddr, MO_BEUW);
 }

-void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
-                       MemOpIdx oi, uintptr_t retaddr)
+void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+                       TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    full_be_stw_mmu(env, addr, val, oi, retaddr);
-}
-
-static void full_le_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
-                            MemOpIdx oi, uintptr_t retaddr)
-{
-    validate_memop(oi, MO_LEUL);
    store_helper(env, addr, val, oi, retaddr, MO_LEUL);
 }

-void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
-                       MemOpIdx oi, uintptr_t retaddr)
+void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+                       TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    full_le_stl_mmu(env, addr, val, oi, retaddr);
-}
-
-static void full_be_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
-                            MemOpIdx oi, uintptr_t retaddr)
-{
-    validate_memop(oi, MO_BEUL);
    store_helper(env, addr, val, oi, retaddr, MO_BEUL);
 }

-void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
-                       MemOpIdx oi, uintptr_t retaddr)
-{
-    full_be_stl_mmu(env, addr, val, oi, retaddr);
-}
-
 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
-                       MemOpIdx oi, uintptr_t retaddr)
+                       TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    validate_memop(oi, MO_LEUQ);
-    store_helper(env, addr, val, oi, retaddr, MO_LEUQ);
+    store_helper(env, addr, val, oi, retaddr, MO_LEQ);
 }

 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
-                       MemOpIdx oi, uintptr_t retaddr)
+                       TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    validate_memop(oi, MO_BEUQ);
-    store_helper(env, addr, val, oi, retaddr, MO_BEUQ);
+    store_helper(env, addr, val, oi, retaddr, MO_BEQ);
 }

 /*
 * Store Helpers for cpu_ldst.h
 */

-typedef void FullStoreHelper(CPUArchState *env, target_ulong addr,
-                             uint64_t val, MemOpIdx oi, uintptr_t retaddr);
-
-static inline void cpu_store_helper(CPUArchState *env, target_ulong addr,
-                                    uint64_t val, MemOpIdx oi, uintptr_t ra,
-                                    FullStoreHelper *full_store)
+static inline void QEMU_ALWAYS_INLINE
+cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
+                 int mmu_idx, uintptr_t retaddr, MemOp op)
 {
-    trace_guest_st_before_exec(env_cpu(env), addr, oi);
-    full_store(env, addr, val, oi, ra);
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+    TCGMemOpIdx oi;
+    uint16_t meminfo;
+
+    meminfo = trace_mem_get_info(op, mmu_idx, true);
+    trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
+
+    oi = make_memop_idx(op, mmu_idx);
+    store_helper(env, addr, val, oi, retaddr, op);
+
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
 }

-void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
-                 MemOpIdx oi, uintptr_t retaddr)
+void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
+                       int mmu_idx, uintptr_t retaddr)
 {
-    cpu_store_helper(env, addr, val, oi, retaddr, full_stb_mmu);
+    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB);
 }

-void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
-                    MemOpIdx oi, uintptr_t retaddr)
+void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
+                          int mmu_idx, uintptr_t retaddr)
 {
-    cpu_store_helper(env, addr, val, oi, retaddr, full_be_stw_mmu);
+    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW);
 }

-void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
-                    MemOpIdx oi, uintptr_t retaddr)
+void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
+                          int mmu_idx, uintptr_t retaddr)
 {
-    cpu_store_helper(env, addr, val, oi, retaddr, full_be_stl_mmu);
+    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL);
 }

-void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
-                    MemOpIdx oi, uintptr_t retaddr)
+void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
+                          int mmu_idx, uintptr_t retaddr)
 {
-    cpu_store_helper(env, addr, val, oi, retaddr, helper_be_stq_mmu);
+    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ);
 }

-void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
-                    MemOpIdx oi, uintptr_t retaddr)
+void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
+                          int mmu_idx, uintptr_t retaddr)
 {
-    cpu_store_helper(env, addr, val, oi, retaddr, full_le_stw_mmu);
+    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW);
 }

-void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
-                    MemOpIdx oi, uintptr_t retaddr)
+void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
+                          int mmu_idx, uintptr_t retaddr)
 {
-    cpu_store_helper(env, addr, val, oi, retaddr, full_le_stl_mmu);
+    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL);
 }

-void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
-                    MemOpIdx oi, uintptr_t retaddr)
+void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
+                          int mmu_idx, uintptr_t retaddr)
 {
-    cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu);
+    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ);
 }

-#include "ldst_common.c.inc"
+void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr,
+                     uint32_t val, uintptr_t retaddr)
+{
+    cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+}
+
+void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr,
+                        uint32_t val, uintptr_t retaddr)
+{
+    cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+}
+
+void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr,
+                        uint32_t val, uintptr_t retaddr)
+{
+    cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+}
+
+void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr,
+                        uint64_t val, uintptr_t retaddr)
+{
+    cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+}
+
+void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr,
+                        uint32_t val, uintptr_t retaddr)
+{
+    cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+}
+
+void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr,
+                        uint32_t val, uintptr_t retaddr)
+{
+    cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+}
+
+void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr,
+                        uint64_t val, uintptr_t retaddr)
+{
+    cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+}
+
+void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val)
+{
+    cpu_stb_data_ra(env, ptr, val, 0);
+}
+
+void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
+{
+    cpu_stw_be_data_ra(env, ptr, val, 0);
+}
+
+void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
+{
+    cpu_stl_be_data_ra(env, ptr, val, 0);
+}
+
+void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val)
+{
+    cpu_stq_be_data_ra(env, ptr, val, 0);
+}
+
+void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
+{
+    cpu_stw_le_data_ra(env, ptr, val, 0);
+}
+
+void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
+{
+    cpu_stl_le_data_ra(env, ptr, val, 0);
+}
+
+void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val)
+{
+    cpu_stq_le_data_ra(env, ptr, val, 0);
+}

 /*
 * First set of functions passes in OI and RETADDR.
@ -2571,49 +2721,49 @@ void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
 /* Code access functions.  */

 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
-                               MemOpIdx oi, uintptr_t retaddr)
+                               TCGMemOpIdx oi, uintptr_t retaddr)
 {
    return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
 }

 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
 {
-    MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
+    TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
    return full_ldub_code(env, addr, oi, 0);
 }

 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
-                               MemOpIdx oi, uintptr_t retaddr)
+                               TCGMemOpIdx oi, uintptr_t retaddr)
 {
    return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
 }

 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
 {
-    MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
+    TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
    return full_lduw_code(env, addr, oi, 0);
 }

 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
-                              MemOpIdx oi, uintptr_t retaddr)
+                              TCGMemOpIdx oi, uintptr_t retaddr)
 {
    return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
 }

 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
 {
-    MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
+    TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
    return full_ldl_code(env, addr, oi, 0);
 }

 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
-                              MemOpIdx oi, uintptr_t retaddr)
+                              TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    return load_helper(env, addr, oi, retaddr, MO_TEUQ, true, full_ldq_code);
+    return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code);
 }

 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
 {
-    MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
+    TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
    return full_ldq_code(env, addr, oi, 0);
 }
--- a/accel/tcg/hmp.c
+++ b/accel/tcg/hmp.c
@ -1,15 +1,29 @@
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
-#include "qapi/error.h"
-#include "qapi/qapi-commands-machine.h"
 #include "exec/exec-all.h"
 #include "monitor/monitor.h"
 #include "sysemu/tcg.h"

+static void hmp_info_jit(Monitor *mon, const QDict *qdict)
+{
+    if (!tcg_enabled()) {
+        error_report("JIT information is only available with accel=tcg");
+        return;
+    }
+
+    dump_exec_info();
+    dump_drift_info();
+}
+
+static void hmp_info_opcount(Monitor *mon, const QDict *qdict)
+{
+    dump_opcount_info();
+}
+
 static void hmp_tcg_register(void)
 {
-    monitor_register_hmp_info_hrt("jit", qmp_x_query_jit);
-    monitor_register_hmp_info_hrt("opcount", qmp_x_query_opcount);
+    monitor_register_hmp("jit", true, hmp_info_jit);
+    monitor_register_hmp("opcount", true, hmp_info_opcount);
 }

 type_init(hmp_tcg_register);
--- a/accel/tcg/ldst_common.c.inc
+++ b/accel/tcg/ldst_common.c.inc
@ -1,307 +0,0 @@
-/*
- * Routines common to user and system emulation of load/store.
- *
- *  Copyright (c) 2003 Fabrice Bellard
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                            int mmu_idx, uintptr_t ra)
-{
-    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
-    return cpu_ldb_mmu(env, addr, oi, ra);
-}
-
-int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                       int mmu_idx, uintptr_t ra)
-{
-    return (int8_t)cpu_ldub_mmuidx_ra(env, addr, mmu_idx, ra);
-}
-
-uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                               int mmu_idx, uintptr_t ra)
-{
-    MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx);
-    return cpu_ldw_be_mmu(env, addr, oi, ra);
-}
-
-int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                          int mmu_idx, uintptr_t ra)
-{
-    return (int16_t)cpu_lduw_be_mmuidx_ra(env, addr, mmu_idx, ra);
-}
-
-uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                              int mmu_idx, uintptr_t ra)
-{
-    MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx);
-    return cpu_ldl_be_mmu(env, addr, oi, ra);
-}
-
-uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                              int mmu_idx, uintptr_t ra)
-{
-    MemOpIdx oi = make_memop_idx(MO_BEUQ | MO_UNALN, mmu_idx);
-    return cpu_ldq_be_mmu(env, addr, oi, ra);
-}
-
-uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                               int mmu_idx, uintptr_t ra)
-{
-    MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx);
-    return cpu_ldw_le_mmu(env, addr, oi, ra);
-}
-
-int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                          int mmu_idx, uintptr_t ra)
-{
-    return (int16_t)cpu_lduw_le_mmuidx_ra(env, addr, mmu_idx, ra);
-}
-
-uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                              int mmu_idx, uintptr_t ra)
-{
-    MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx);
-    return cpu_ldl_le_mmu(env, addr, oi, ra);
-}
-
-uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                              int mmu_idx, uintptr_t ra)
-{
-    MemOpIdx oi = make_memop_idx(MO_LEUQ | MO_UNALN, mmu_idx);
-    return cpu_ldq_le_mmu(env, addr, oi, ra);
-}
-
-void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                       int mmu_idx, uintptr_t ra)
-{
-    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
-    cpu_stb_mmu(env, addr, val, oi, ra);
-}
-
-void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                          int mmu_idx, uintptr_t ra)
-{
-    MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx);
-    cpu_stw_be_mmu(env, addr, val, oi, ra);
-}
-
-void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                          int mmu_idx, uintptr_t ra)
-{
-    MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx);
-    cpu_stl_be_mmu(env, addr, val, oi, ra);
-}
-
-void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
-                          int mmu_idx, uintptr_t ra)
-{
-    MemOpIdx oi = make_memop_idx(MO_BEUQ | MO_UNALN, mmu_idx);
-    cpu_stq_be_mmu(env, addr, val, oi, ra);
-}
-
-void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                          int mmu_idx, uintptr_t ra)
-{
-    MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx);
-    cpu_stw_le_mmu(env, addr, val, oi, ra);
-}
-
-void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                          int mmu_idx, uintptr_t ra)
-{
-    MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx);
-    cpu_stl_le_mmu(env, addr, val, oi, ra);
-}
-
-void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
-                          int mmu_idx, uintptr_t ra)
-{
-    MemOpIdx oi = make_memop_idx(MO_LEUQ | MO_UNALN, mmu_idx);
-    cpu_stq_le_mmu(env, addr, val, oi, ra);
-}
-
-/*--------------------------*/
-
-uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
-    return cpu_ldub_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
-}
-
-int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
-    return (int8_t)cpu_ldub_data_ra(env, addr, ra);
-}
-
-uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
-    return cpu_lduw_be_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
-}
-
-int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
-    return (int16_t)cpu_lduw_be_data_ra(env, addr, ra);
-}
-
-uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
-    return cpu_ldl_be_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
-}
-
-uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
-    return cpu_ldq_be_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
-}
-
-uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
-    return cpu_lduw_le_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
-}
-
-int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
-    return (int16_t)cpu_lduw_le_data_ra(env, addr, ra);
-}
-
-uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
-    return cpu_ldl_le_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
-}
-
-uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
-    return cpu_ldq_le_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
-}
-
-void cpu_stb_data_ra(CPUArchState *env, abi_ptr addr,
-                     uint32_t val, uintptr_t ra)
-{
-    cpu_stb_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
-}
-
-void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr addr,
-                        uint32_t val, uintptr_t ra)
-{
-    cpu_stw_be_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
-}
-
-void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr addr,
-                        uint32_t val, uintptr_t ra)
-{
-    cpu_stl_be_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
-}
-
-void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr addr,
-                        uint64_t val, uintptr_t ra)
-{
-    cpu_stq_be_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
-}
-
-void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr addr,
-                        uint32_t val, uintptr_t ra)
-{
-    cpu_stw_le_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
-}
-
-void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr addr,
-                        uint32_t val, uintptr_t ra)
-{
-    cpu_stl_le_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
-}
-
-void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr addr,
-                        uint64_t val, uintptr_t ra)
-{
-    cpu_stq_le_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
-}
-
-/*--------------------------*/
-
-uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr addr)
-{
-    return cpu_ldub_data_ra(env, addr, 0);
-}
-
-int cpu_ldsb_data(CPUArchState *env, abi_ptr addr)
-{
-    return (int8_t)cpu_ldub_data(env, addr);
-}
-
-uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr addr)
-{
-    return cpu_lduw_be_data_ra(env, addr, 0);
-}
-
-int cpu_ldsw_be_data(CPUArchState *env, abi_ptr addr)
-{
-    return (int16_t)cpu_lduw_be_data(env, addr);
-}
-
-uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr addr)
-{
-    return cpu_ldl_be_data_ra(env, addr, 0);
-}
-
-uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr addr)
-{
-    return cpu_ldq_be_data_ra(env, addr, 0);
-}
-
-uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr addr)
-{
-    return cpu_lduw_le_data_ra(env, addr, 0);
-}
-
-int cpu_ldsw_le_data(CPUArchState *env, abi_ptr addr)
-{
-    return (int16_t)cpu_lduw_le_data(env, addr);
-}
-
-uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr addr)
-{
-    return cpu_ldl_le_data_ra(env, addr, 0);
-}
-
-uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr addr)
-{
-    return cpu_ldq_le_data_ra(env, addr, 0);
-}
-
-void cpu_stb_data(CPUArchState *env, abi_ptr addr, uint32_t val)
-{
-    cpu_stb_data_ra(env, addr, val, 0);
-}
-
-void cpu_stw_be_data(CPUArchState *env, abi_ptr addr, uint32_t val)
-{
-    cpu_stw_be_data_ra(env, addr, val, 0);
-}
-
-void cpu_stl_be_data(CPUArchState *env, abi_ptr addr, uint32_t val)
-{
-    cpu_stl_be_data_ra(env, addr, val, 0);
-}
-
-void cpu_stq_be_data(CPUArchState *env, abi_ptr addr, uint64_t val)
-{
-    cpu_stq_be_data_ra(env, addr, val, 0);
-}
-
-void cpu_stw_le_data(CPUArchState *env, abi_ptr addr, uint32_t val)
-{
-    cpu_stw_le_data_ra(env, addr, val, 0);
-}
-
-void cpu_stl_le_data(CPUArchState *env, abi_ptr addr, uint32_t val)
-{
-    cpu_stl_le_data_ra(env, addr, val, 0);
-}
-
-void cpu_stq_le_data(CPUArchState *env, abi_ptr addr, uint64_t val)
-{
-    cpu_stq_le_data_ra(env, addr, val, 0);
-}
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@ -10,7 +10,7 @@ tcg_ss.add(files(
 ))
 tcg_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c'))
 tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c'))
-tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c')])
+tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c'), libdl])
 specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)

 specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@ -45,6 +45,7 @@
 #include "qemu/osdep.h"
 #include "tcg/tcg.h"
 #include "tcg/tcg-op.h"
+#include "trace/mem.h"
 #include "exec/exec-all.h"
 #include "exec/plugin-gen.h"
 #include "exec/translator.h"
@ -162,7 +163,11 @@ static void gen_empty_mem_helper(void)
 static void gen_plugin_cb_start(enum plugin_gen_from from,
                                enum plugin_gen_cb type, unsigned wr)
 {
+    TCGOp *op;
+
    tcg_gen_plugin_cb_start(from, type, wr);
+    op = tcg_last_op();
+    QSIMPLEQ_INSERT_TAIL(&tcg_ctx->plugin_ops, op, plugin_link);
 }

 static void gen_wrapped(enum plugin_gen_from from,
@ -206,9 +211,9 @@ static void gen_mem_wrapped(enum plugin_gen_cb type,
                            const union mem_gen_fn *f, TCGv addr,
                            uint32_t info, bool is_mem)
 {
-    enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
+    int wr = !!(info & TRACE_MEM_ST);

-    gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, type, rw);
+    gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, type, wr);
    if (is_mem) {
        f->mem_fn(addr, info);
    } else {
@ -702,6 +707,62 @@ static void plugin_gen_disable_mem_helper(const struct qemu_plugin_tb *ptb,
    inject_mem_disable_helper(insn, begin_op);
 }

+static void plugin_inject_cb(const struct qemu_plugin_tb *ptb, TCGOp *begin_op,
+                             int insn_idx)
+{
+    enum plugin_gen_from from = begin_op->args[0];
+    enum plugin_gen_cb type = begin_op->args[1];
+
+    switch (from) {
+    case PLUGIN_GEN_FROM_TB:
+        switch (type) {
+        case PLUGIN_GEN_CB_UDATA:
+            plugin_gen_tb_udata(ptb, begin_op);
+            return;
+        case PLUGIN_GEN_CB_INLINE:
+            plugin_gen_tb_inline(ptb, begin_op);
+            return;
+        default:
+            g_assert_not_reached();
+        }
+    case PLUGIN_GEN_FROM_INSN:
+        switch (type) {
+        case PLUGIN_GEN_CB_UDATA:
+            plugin_gen_insn_udata(ptb, begin_op, insn_idx);
+            return;
+        case PLUGIN_GEN_CB_INLINE:
+            plugin_gen_insn_inline(ptb, begin_op, insn_idx);
+            return;
+        case PLUGIN_GEN_ENABLE_MEM_HELPER:
+            plugin_gen_enable_mem_helper(ptb, begin_op, insn_idx);
+            return;
+        default:
+            g_assert_not_reached();
+        }
+    case PLUGIN_GEN_FROM_MEM:
+        switch (type) {
+        case PLUGIN_GEN_CB_MEM:
+            plugin_gen_mem_regular(ptb, begin_op, insn_idx);
+            return;
+        case PLUGIN_GEN_CB_INLINE:
+            plugin_gen_mem_inline(ptb, begin_op, insn_idx);
+            return;
+        default:
+            g_assert_not_reached();
+        }
+    case PLUGIN_GEN_AFTER_INSN:
+        switch (type) {
+        case PLUGIN_GEN_DISABLE_MEM_HELPER:
+            plugin_gen_disable_mem_helper(ptb, begin_op, insn_idx);
+            return;
+        default:
+            g_assert_not_reached();
+        }
+    default:
+        g_assert_not_reached();
+    }
+}
+
 /* #define DEBUG_PLUGIN_GEN_OPS */
 static void pr_ops(void)
 {
@ -759,95 +820,21 @@ static void pr_ops(void)
 static void plugin_gen_inject(const struct qemu_plugin_tb *plugin_tb)
 {
    TCGOp *op;
-    int insn_idx = -1;
+    int insn_idx;

    pr_ops();
+    insn_idx = -1;
+    QSIMPLEQ_FOREACH(op, &tcg_ctx->plugin_ops, plugin_link) {
+        enum plugin_gen_from from = op->args[0];
+        enum plugin_gen_cb type = op->args[1];

-    QTAILQ_FOREACH(op, &tcg_ctx->ops, link) {
-        switch (op->opc) {
-        case INDEX_op_insn_start:
+        tcg_debug_assert(op->opc == INDEX_op_plugin_cb_start);
+        /* ENABLE_MEM_HELPER is the first callback of an instruction */
+        if (from == PLUGIN_GEN_FROM_INSN &&
+            type == PLUGIN_GEN_ENABLE_MEM_HELPER) {
            insn_idx++;
-            break;
-        case INDEX_op_plugin_cb_start:
-        {
-            enum plugin_gen_from from = op->args[0];
-            enum plugin_gen_cb type = op->args[1];
-
-            switch (from) {
-            case PLUGIN_GEN_FROM_TB:
-            {
-                g_assert(insn_idx == -1);
-
-                switch (type) {
-                case PLUGIN_GEN_CB_UDATA:
-                    plugin_gen_tb_udata(plugin_tb, op);
-                    break;
-                case PLUGIN_GEN_CB_INLINE:
-                    plugin_gen_tb_inline(plugin_tb, op);
-                    break;
-                default:
-                    g_assert_not_reached();
-                }
-                break;
-            }
-            case PLUGIN_GEN_FROM_INSN:
-            {
-                g_assert(insn_idx >= 0);
-
-                switch (type) {
-                case PLUGIN_GEN_CB_UDATA:
-                    plugin_gen_insn_udata(plugin_tb, op, insn_idx);
-                    break;
-                case PLUGIN_GEN_CB_INLINE:
-                    plugin_gen_insn_inline(plugin_tb, op, insn_idx);
-                    break;
-                case PLUGIN_GEN_ENABLE_MEM_HELPER:
-                    plugin_gen_enable_mem_helper(plugin_tb, op, insn_idx);
-                    break;
-                default:
-                    g_assert_not_reached();
-                }
-                break;
-            }
-            case PLUGIN_GEN_FROM_MEM:
-            {
-                g_assert(insn_idx >= 0);
-
-                switch (type) {
-                case PLUGIN_GEN_CB_MEM:
-                    plugin_gen_mem_regular(plugin_tb, op, insn_idx);
-                    break;
-                case PLUGIN_GEN_CB_INLINE:
-                    plugin_gen_mem_inline(plugin_tb, op, insn_idx);
-                    break;
-                default:
-                    g_assert_not_reached();
-                }
-
-                break;
-            }
-            case PLUGIN_GEN_AFTER_INSN:
-            {
-                g_assert(insn_idx >= 0);
-
-                switch (type) {
-                case PLUGIN_GEN_DISABLE_MEM_HELPER:
-                    plugin_gen_disable_mem_helper(plugin_tb, op, insn_idx);
-                    break;
-                default:
-                    g_assert_not_reached();
-                }
-                break;
-            }
-            default:
-                g_assert_not_reached();
-            }
-            break;
-        }
-        default:
-            /* plugins don't care about any other ops */
-            break;
        }
+        plugin_inject_cb(plugin_tb, op, insn_idx);
    }
    pr_ops();
 }
@ -860,6 +847,7 @@ bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_onl
    if (test_bit(QEMU_PLUGIN_EV_VCPU_TB_TRANS, cpu->plugin_mask)) {
        ret = true;

+        QSIMPLEQ_INIT(&tcg_ctx->plugin_ops);
        ptb->vaddr = tb->pc;
        ptb->vaddr2 = -1;
        get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1);
@ -876,8 +864,9 @@ void plugin_gen_insn_start(CPUState *cpu, const DisasContextBase *db)
    struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb;
    struct qemu_plugin_insn *pinsn;

-    pinsn = qemu_plugin_tb_insn_get(ptb, db->pc_next);
+    pinsn = qemu_plugin_tb_insn_get(ptb);
    tcg_ctx->plugin_insn = pinsn;
+    pinsn->vaddr = db->pc_next;
    plugin_gen_empty_callback(PLUGIN_GEN_FROM_INSN);

    /*
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@ -61,6 +61,8 @@ void rr_kick_vcpu_thread(CPUState *unused)
 static QEMUTimer *rr_kick_vcpu_timer;
 static CPUState *rr_current_cpu;

+#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
+
 static inline int64_t rr_next_kick_time(void)
 {
    return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@ -1297,8 +1297,31 @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
    invalidate_page_bitmap(p);

 #if defined(CONFIG_USER_ONLY)
-    /* translator_loop() must have made all TB pages non-writable */
-    assert(!(p->flags & PAGE_WRITE));
+    if (p->flags & PAGE_WRITE) {
+        target_ulong addr;
+        PageDesc *p2;
+        int prot;
+
+        /* force the host page as non writable (writes will have a
+           page fault + mprotect overhead) */
+        page_addr &= qemu_host_page_mask;
+        prot = 0;
+        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
+            addr += TARGET_PAGE_SIZE) {
+
+            p2 = page_find(addr >> TARGET_PAGE_BITS);
+            if (!p2) {
+                continue;
+            }
+            prot |= p2->flags;
+            p2->flags &= ~PAGE_WRITE;
+          }
+        mprotect(g2h_untagged(page_addr), qemu_host_page_size,
+                 (prot & PAGE_BITS) & ~PAGE_WRITE);
+        if (DEBUG_TB_INVALIDATE_GATE) {
+            printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
+        }
+    }
 #else
    /* if some code is already present, then the pages are already
       protected. So we handle the case where only the first TB is
@ -1738,7 +1761,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
    if (current_tb_modified) {
        page_collection_unlock(pages);
        /* Force execution of one insn next time.  */
-        cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
+        cpu->cflags_next_tb = 1 | curr_cflags(cpu);
        mmap_unlock();
        cpu_loop_exit_noexc(cpu);
    }
@ -1906,7 +1929,7 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
 #ifdef TARGET_HAS_PRECISE_SMC
    if (current_tb_modified) {
        /* Force execution of one insn next time.  */
-        cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
+        cpu->cflags_next_tb = 1 | curr_cflags(cpu);
        return true;
    }
 #endif
@ -1991,7 +2014,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
    cpu_loop_exit_noexc(cpu);
 }

-static void print_qht_statistics(struct qht_stats hst, GString *buf)
+static void print_qht_statistics(struct qht_stats hst)
 {
    uint32_t hgram_opts;
    size_t hgram_bins;
@ -2000,11 +2023,9 @@ static void print_qht_statistics(struct qht_stats hst, GString *buf)
    if (!hst.head_buckets) {
        return;
    }
-    g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
-                           "(%0.2f%% head buckets used)\n",
-                           hst.used_head_buckets, hst.head_buckets,
-                           (double)hst.used_head_buckets /
-                           hst.head_buckets * 100);
+    qemu_printf("TB hash buckets     %zu/%zu (%0.2f%% head buckets used)\n",
+                hst.used_head_buckets, hst.head_buckets,
+                (double)hst.used_head_buckets / hst.head_buckets * 100);

    hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
    hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
@ -2012,9 +2033,8 @@ static void print_qht_statistics(struct qht_stats hst, GString *buf)
        hgram_opts |= QDIST_PR_NODECIMAL;
    }
    hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
-    g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
-                           "Histogram: %s\n",
-                           qdist_avg(&hst.occupancy) * 100, hgram);
+    qemu_printf("TB hash occupancy   %0.2f%% avg chain occ. Histogram: %s\n",
+                qdist_avg(&hst.occupancy) * 100, hgram);
    g_free(hgram);

    hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
@ -2026,9 +2046,8 @@ static void print_qht_statistics(struct qht_stats hst, GString *buf)
        hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
    }
    hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
-    g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
-                           "Histogram: %s\n",
-                           qdist_avg(&hst.chain), hgram);
+    qemu_printf("TB hash avg chain   %0.3f buckets. Histogram: %s\n",
+                qdist_avg(&hst.chain), hgram);
    g_free(hgram);
 }

@ -2065,7 +2084,7 @@ static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
    return false;
 }

-void dump_exec_info(GString *buf)
+void dump_exec_info(void)
 {
    struct tb_tree_stats tst = {};
    struct qht_stats hst;
@ -2074,53 +2093,49 @@ void dump_exec_info(GString *buf)
    tcg_tb_foreach(tb_tree_stats_iter, &tst);
    nb_tbs = tst.nb_tbs;
    /* XXX: avoid using doubles ? */
-    g_string_append_printf(buf, "Translation buffer state:\n");
+    qemu_printf("Translation buffer state:\n");
    /*
     * Report total code size including the padding and TB structs;
     * otherwise users might think "-accel tcg,tb-size" is not honoured.
     * For avg host size we use the precise numbers from tb_tree_stats though.
     */
-    g_string_append_printf(buf, "gen code size       %zu/%zu\n",
-                           tcg_code_size(), tcg_code_capacity());
-    g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
-    g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
-                           nb_tbs ? tst.target_size / nb_tbs : 0,
-                           tst.max_target_size);
-    g_string_append_printf(buf, "TB avg host size    %zu bytes "
-                           "(expansion ratio: %0.1f)\n",
-                           nb_tbs ? tst.host_size / nb_tbs : 0,
-                           tst.target_size ?
-                           (double)tst.host_size / tst.target_size : 0);
-    g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
-                           tst.cross_page,
-                           nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
-    g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
-                           "(2 jumps=%zu %zu%%)\n",
-                           tst.direct_jmp_count,
-                           nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
-                           tst.direct_jmp2_count,
-                           nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
+    qemu_printf("gen code size       %zu/%zu\n",
+                tcg_code_size(), tcg_code_capacity());
+    qemu_printf("TB count            %zu\n", nb_tbs);
+    qemu_printf("TB avg target size  %zu max=%zu bytes\n",
+                nb_tbs ? tst.target_size / nb_tbs : 0,
+                tst.max_target_size);
+    qemu_printf("TB avg host size    %zu bytes (expansion ratio: %0.1f)\n",
+                nb_tbs ? tst.host_size / nb_tbs : 0,
+                tst.target_size ? (double)tst.host_size / tst.target_size : 0);
+    qemu_printf("cross page TB count %zu (%zu%%)\n", tst.cross_page,
+                nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
+    qemu_printf("direct jump count   %zu (%zu%%) (2 jumps=%zu %zu%%)\n",
+                tst.direct_jmp_count,
+                nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
+                tst.direct_jmp2_count,
+                nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);

    qht_statistics_init(&tb_ctx.htable, &hst);
-    print_qht_statistics(hst, buf);
+    print_qht_statistics(hst);
    qht_statistics_destroy(&hst);

-    g_string_append_printf(buf, "\nStatistics:\n");
-    g_string_append_printf(buf, "TB flush count      %u\n",
-                           qatomic_read(&tb_ctx.tb_flush_count));
-    g_string_append_printf(buf, "TB invalidate count %u\n",
-                           qatomic_read(&tb_ctx.tb_phys_invalidate_count));
+    qemu_printf("\nStatistics:\n");
+    qemu_printf("TB flush count      %u\n",
+                qatomic_read(&tb_ctx.tb_flush_count));
+    qemu_printf("TB invalidate count %u\n",
+                qatomic_read(&tb_ctx.tb_phys_invalidate_count));

    tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
-    g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
-    g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
-    g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
-    tcg_dump_info(buf);
+    qemu_printf("TLB full flushes    %zu\n", flush_full);
+    qemu_printf("TLB partial flushes %zu\n", flush_part);
+    qemu_printf("TLB elided flushes  %zu\n", flush_elide);
+    tcg_dump_info();
 }

-void dump_opcount_info(GString *buf)
+void dump_opcount_info(void)
 {
-    tcg_dump_op_count(buf);
+    tcg_dump_op_count();
 }

 #else /* CONFIG_USER_ONLY */
@ -2379,38 +2394,6 @@ int page_check_range(target_ulong start, target_ulong len, int flags)
    return 0;
 }

-void page_protect(tb_page_addr_t page_addr)
-{
-    target_ulong addr;
-    PageDesc *p;
-    int prot;
-
-    p = page_find(page_addr >> TARGET_PAGE_BITS);
-    if (p && (p->flags & PAGE_WRITE)) {
-        /*
-         * Force the host page as non writable (writes will have a page fault +
-         * mprotect overhead).
-         */
-        page_addr &= qemu_host_page_mask;
-        prot = 0;
-        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
-             addr += TARGET_PAGE_SIZE) {
-
-            p = page_find(addr >> TARGET_PAGE_BITS);
-            if (!p) {
-                continue;
-            }
-            prot |= p->flags;
-            p->flags &= ~PAGE_WRITE;
-        }
-        mprotect(g2h_untagged(page_addr), qemu_host_page_size,
-                 (prot & PAGE_BITS) & ~PAGE_WRITE);
-        if (DEBUG_TB_INVALIDATE_GATE) {
-            printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
-        }
-    }
-}
-
 /* called from signal handler: invalidate the code and unprotect the
 * page. Return 0 if the fault was not handled, 1 if it was handled,
 * and 2 if it was handled but the caller must cause the TB to be
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@ -42,15 +42,6 @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest)
    return ((db->pc_first ^ dest) & TARGET_PAGE_MASK) == 0;
 }

-static inline void translator_page_protect(DisasContextBase *dcbase,
-                                           target_ulong pc)
-{
-#ifdef CONFIG_USER_ONLY
-    dcbase->page_protect_end = pc | ~TARGET_PAGE_MASK;
-    page_protect(pc);
-#endif
-}
-
 void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
                     CPUState *cpu, TranslationBlock *tb, int max_insns)
 {
@ -65,7 +56,6 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
    db->num_insns = 0;
    db->max_insns = max_insns;
    db->singlestep_enabled = cflags & CF_SINGLE_STEP;
-    translator_page_protect(db, db->pc_next);

    ops->init_disas_context(db, cpu);
    tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
@ -147,32 +137,3 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
    }
 #endif
 }
-
-static inline void translator_maybe_page_protect(DisasContextBase *dcbase,
-                                                 target_ulong pc, size_t len)
-{
-#ifdef CONFIG_USER_ONLY
-    target_ulong end = pc + len - 1;
-
-    if (end > dcbase->page_protect_end) {
-        translator_page_protect(dcbase, end);
-    }
-#endif
-}
-
-#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn)             \
-    type fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \
-                           abi_ptr pc, bool do_swap)                    \
-    {                                                                   \
-        translator_maybe_page_protect(dcbase, pc, sizeof(type));        \
-        type ret = load_fn(env, pc);                                    \
-        if (do_swap) {                                                  \
-            ret = swap_fn(ret);                                         \
-        }                                                               \
-        plugin_insn_append(pc, &ret, sizeof(ret));                      \
-        return ret;                                                     \
-    }
-
-FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
-
-#undef GEN_TRANSLATOR_LD
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
--- a/audio/audio.c
+++ b/audio/audio.c
@ -2000,7 +2000,6 @@ void audio_create_pdos(Audiodev *dev)
        CASE(NONE, none, );
        CASE(ALSA, alsa, Alsa);
        CASE(COREAUDIO, coreaudio, Coreaudio);
-        CASE(DBUS, dbus, );
        CASE(DSOUND, dsound, );
        CASE(JACK, jack, Jack);
        CASE(OSS, oss, Oss);
--- a/audio/audio_int.h
+++ b/audio/audio_int.h
@ -31,10 +31,6 @@
 #endif
 #include "mixeng.h"

-#ifdef CONFIG_GIO
-#include <gio/gio.h>
-#endif
-
 struct audio_pcm_ops;

 struct audio_callback {
@ -144,9 +140,6 @@ struct audio_driver {
    const char *descr;
    void *(*init) (Audiodev *);
    void (*fini) (void *);
-#ifdef CONFIG_GIO
-    void (*set_dbus_server) (AudioState *s, GDBusObjectManagerServer *manager);
-#endif
    struct audio_pcm_ops *pcm_ops;
    int can_be_default;
    int max_voices_out;
--- a/audio/audio_template.h
+++ b/audio/audio_template.h
@ -327,8 +327,6 @@ AudiodevPerDirectionOptions *glue(audio_get_pdo_, TYPE)(Audiodev *dev)
    case AUDIODEV_DRIVER_COREAUDIO:
        return qapi_AudiodevCoreaudioPerDirectionOptions_base(
            dev->u.coreaudio.TYPE);
-    case AUDIODEV_DRIVER_DBUS:
-        return dev->u.dbus.TYPE;
    case AUDIODEV_DRIVER_DSOUND:
        return dev->u.dsound.TYPE;
    case AUDIODEV_DRIVER_JACK:
--- a/audio/dbusaudio.c
+++ b/audio/dbusaudio.c
@ -1,654 +0,0 @@
-/*
- * QEMU DBus audio
- *
- * Copyright (c) 2021 Red Hat, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/error-report.h"
-#include "qemu/host-utils.h"
-#include "qemu/module.h"
-#include "qemu/timer.h"
-#include "qemu/dbus.h"
-
-#include <gio/gunixfdlist.h>
-#include "ui/dbus-display1.h"
-
-#define AUDIO_CAP "dbus"
-#include "audio.h"
-#include "audio_int.h"
-#include "trace.h"
-
-#define DBUS_DISPLAY1_AUDIO_PATH DBUS_DISPLAY1_ROOT "/Audio"
-
-#define DBUS_AUDIO_NSAMPLES 1024 /* could be configured? */
-
-typedef struct DBusAudio {
-    GDBusObjectManagerServer *server;
-    GDBusObjectSkeleton *audio;
-    QemuDBusDisplay1Audio *iface;
-    GHashTable *out_listeners;
-    GHashTable *in_listeners;
-} DBusAudio;
-
-typedef struct DBusVoiceOut {
-    HWVoiceOut hw;
-    bool enabled;
-    RateCtl rate;
-
-    void *buf;
-    size_t buf_pos;
-    size_t buf_size;
-
-    bool has_volume;
-    Volume volume;
-} DBusVoiceOut;
-
-typedef struct DBusVoiceIn {
-    HWVoiceIn hw;
-    bool enabled;
-    RateCtl rate;
-
-    bool has_volume;
-    Volume volume;
-} DBusVoiceIn;
-
-static void *dbus_get_buffer_out(HWVoiceOut *hw, size_t *size)
-{
-    DBusVoiceOut *vo = container_of(hw, DBusVoiceOut, hw);
-
-    if (!vo->buf) {
-        vo->buf_size = hw->samples * hw->info.bytes_per_frame;
-        vo->buf = g_malloc(vo->buf_size);
-        vo->buf_pos = 0;
-    }
-
-    *size = MIN(vo->buf_size - vo->buf_pos, *size);
-    *size = audio_rate_get_bytes(&hw->info, &vo->rate, *size);
-
-    return vo->buf + vo->buf_pos;
-
-}
-
-static size_t dbus_put_buffer_out(HWVoiceOut *hw, void *buf, size_t size)
-{
-    DBusAudio *da = (DBusAudio *)hw->s->drv_opaque;
-    DBusVoiceOut *vo = container_of(hw, DBusVoiceOut, hw);
-    GHashTableIter iter;
-    QemuDBusDisplay1AudioOutListener *listener = NULL;
-    g_autoptr(GBytes) bytes = NULL;
-    g_autoptr(GVariant) v_data = NULL;
-
-    assert(buf == vo->buf + vo->buf_pos && vo->buf_pos + size <= vo->buf_size);
-    vo->buf_pos += size;
-
-    trace_dbus_audio_put_buffer_out(size);
-
-    if (vo->buf_pos < vo->buf_size) {
-        return size;
-    }
-
-    bytes = g_bytes_new_take(g_steal_pointer(&vo->buf), vo->buf_size);
-    v_data = g_variant_new_from_bytes(G_VARIANT_TYPE("ay"), bytes, TRUE);
-    g_variant_ref_sink(v_data);
-
-    g_hash_table_iter_init(&iter, da->out_listeners);
-    while (g_hash_table_iter_next(&iter, NULL, (void **)&listener)) {
-        qemu_dbus_display1_audio_out_listener_call_write(
-            listener,
-            (uintptr_t)hw,
-            v_data,
-            G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL, NULL);
-    }
-
-    return size;
-}
-
-#ifdef HOST_WORDS_BIGENDIAN
-#define AUDIO_HOST_BE TRUE
-#else
-#define AUDIO_HOST_BE FALSE
-#endif
-
-static void
-dbus_init_out_listener(QemuDBusDisplay1AudioOutListener *listener,
-                       HWVoiceOut *hw)
-{
-    qemu_dbus_display1_audio_out_listener_call_init(
-        listener,
-        (uintptr_t)hw,
-        hw->info.bits,
-        hw->info.is_signed,
-        hw->info.is_float,
-        hw->info.freq,
-        hw->info.nchannels,
-        hw->info.bytes_per_frame,
-        hw->info.bytes_per_second,
-        hw->info.swap_endianness ? !AUDIO_HOST_BE : AUDIO_HOST_BE,
-        G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL, NULL);
-}
-
-static int
-dbus_init_out(HWVoiceOut *hw, struct audsettings *as, void *drv_opaque)
-{
-    DBusAudio *da = (DBusAudio *)hw->s->drv_opaque;
-    DBusVoiceOut *vo = container_of(hw, DBusVoiceOut, hw);
-    GHashTableIter iter;
-    QemuDBusDisplay1AudioOutListener *listener = NULL;
-
-    audio_pcm_init_info(&hw->info, as);
-    hw->samples = DBUS_AUDIO_NSAMPLES;
-    audio_rate_start(&vo->rate);
-
-    g_hash_table_iter_init(&iter, da->out_listeners);
-    while (g_hash_table_iter_next(&iter, NULL, (void **)&listener)) {
-        dbus_init_out_listener(listener, hw);
-    }
-    return 0;
-}
-
-static void
-dbus_fini_out(HWVoiceOut *hw)
-{
-    DBusAudio *da = (DBusAudio *)hw->s->drv_opaque;
-    DBusVoiceOut *vo = container_of(hw, DBusVoiceOut, hw);
-    GHashTableIter iter;
-    QemuDBusDisplay1AudioOutListener *listener = NULL;
-
-    g_hash_table_iter_init(&iter, da->out_listeners);
-    while (g_hash_table_iter_next(&iter, NULL, (void **)&listener)) {
-        qemu_dbus_display1_audio_out_listener_call_fini(
-            listener,
-            (uintptr_t)hw,
-            G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL, NULL);
-    }
-
-    g_clear_pointer(&vo->buf, g_free);
-}
-
-static void
-dbus_enable_out(HWVoiceOut *hw, bool enable)
-{
-    DBusAudio *da = (DBusAudio *)hw->s->drv_opaque;
-    DBusVoiceOut *vo = container_of(hw, DBusVoiceOut, hw);
-    GHashTableIter iter;
-    QemuDBusDisplay1AudioOutListener *listener = NULL;
-
-    vo->enabled = enable;
-    if (enable) {
-        audio_rate_start(&vo->rate);
-    }
-
-    g_hash_table_iter_init(&iter, da->out_listeners);
-    while (g_hash_table_iter_next(&iter, NULL, (void **)&listener)) {
-        qemu_dbus_display1_audio_out_listener_call_set_enabled(
-            listener, (uintptr_t)hw, enable,
-            G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL, NULL);
-    }
-}
-
-static void
-dbus_volume_out_listener(HWVoiceOut *hw,
-                         QemuDBusDisplay1AudioOutListener *listener)
-{
-    DBusVoiceOut *vo = container_of(hw, DBusVoiceOut, hw);
-    Volume *vol = &vo->volume;
-    g_autoptr(GBytes) bytes = NULL;
-    GVariant *v_vol = NULL;
-
-    if (!vo->has_volume) {
-        return;
-    }
-
-    assert(vol->channels < sizeof(vol->vol));
-    bytes = g_bytes_new(vol->vol, vol->channels);
-    v_vol = g_variant_new_from_bytes(G_VARIANT_TYPE("ay"), bytes, TRUE);
-    qemu_dbus_display1_audio_out_listener_call_set_volume(
-        listener, (uintptr_t)hw, vol->mute, v_vol,
-        G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL, NULL);
-}
-
-static void
-dbus_volume_out(HWVoiceOut *hw, Volume *vol)
-{
-    DBusAudio *da = (DBusAudio *)hw->s->drv_opaque;
-    DBusVoiceOut *vo = container_of(hw, DBusVoiceOut, hw);
-    GHashTableIter iter;
-    QemuDBusDisplay1AudioOutListener *listener = NULL;
-
-    vo->has_volume = true;
-    vo->volume = *vol;
-
-    g_hash_table_iter_init(&iter, da->out_listeners);
-    while (g_hash_table_iter_next(&iter, NULL, (void **)&listener)) {
-        dbus_volume_out_listener(hw, listener);
-    }
-}
-
-static void
-dbus_init_in_listener(QemuDBusDisplay1AudioInListener *listener, HWVoiceIn *hw)
-{
-    qemu_dbus_display1_audio_in_listener_call_init(
-        listener,
-        (uintptr_t)hw,
-        hw->info.bits,
-        hw->info.is_signed,
-        hw->info.is_float,
-        hw->info.freq,
-        hw->info.nchannels,
-        hw->info.bytes_per_frame,
-        hw->info.bytes_per_second,
-        hw->info.swap_endianness ? !AUDIO_HOST_BE : AUDIO_HOST_BE,
-        G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL, NULL);
-}
-
-static int
-dbus_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
-{
-    DBusAudio *da = (DBusAudio *)hw->s->drv_opaque;
-    DBusVoiceIn *vo = container_of(hw, DBusVoiceIn, hw);
-    GHashTableIter iter;
-    QemuDBusDisplay1AudioInListener *listener = NULL;
-
-    audio_pcm_init_info(&hw->info, as);
-    hw->samples = DBUS_AUDIO_NSAMPLES;
-    audio_rate_start(&vo->rate);
-
-    g_hash_table_iter_init(&iter, da->in_listeners);
-    while (g_hash_table_iter_next(&iter, NULL, (void **)&listener)) {
-        dbus_init_in_listener(listener, hw);
-    }
-    return 0;
-}
-
-static void
-dbus_fini_in(HWVoiceIn *hw)
-{
-    DBusAudio *da = (DBusAudio *)hw->s->drv_opaque;
-    GHashTableIter iter;
-    QemuDBusDisplay1AudioInListener *listener = NULL;
-
-    g_hash_table_iter_init(&iter, da->in_listeners);
-    while (g_hash_table_iter_next(&iter, NULL, (void **)&listener)) {
-        qemu_dbus_display1_audio_in_listener_call_fini(
-            listener,
-            (uintptr_t)hw,
-            G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL, NULL);
-    }
-}
-
-static void
-dbus_volume_in_listener(HWVoiceIn *hw,
-                         QemuDBusDisplay1AudioInListener *listener)
-{
-    DBusVoiceIn *vo = container_of(hw, DBusVoiceIn, hw);
-    Volume *vol = &vo->volume;
-    g_autoptr(GBytes) bytes = NULL;
-    GVariant *v_vol = NULL;
-
-    if (!vo->has_volume) {
-        return;
-    }
-
-    assert(vol->channels < sizeof(vol->vol));
-    bytes = g_bytes_new(vol->vol, vol->channels);
-    v_vol = g_variant_new_from_bytes(G_VARIANT_TYPE("ay"), bytes, TRUE);
-    qemu_dbus_display1_audio_in_listener_call_set_volume(
-        listener, (uintptr_t)hw, vol->mute, v_vol,
-        G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL, NULL);
-}
-
-static void
-dbus_volume_in(HWVoiceIn *hw, Volume *vol)
-{
-    DBusAudio *da = (DBusAudio *)hw->s->drv_opaque;
-    DBusVoiceIn *vo = container_of(hw, DBusVoiceIn, hw);
-    GHashTableIter iter;
-    QemuDBusDisplay1AudioInListener *listener = NULL;
-
-    vo->has_volume = true;
-    vo->volume = *vol;
-
-    g_hash_table_iter_init(&iter, da->in_listeners);
-    while (g_hash_table_iter_next(&iter, NULL, (void **)&listener)) {
-        dbus_volume_in_listener(hw, listener);
-    }
-}
-
-static size_t
-dbus_read(HWVoiceIn *hw, void *buf, size_t size)
-{
-    DBusAudio *da = (DBusAudio *)hw->s->drv_opaque;
-    /* DBusVoiceIn *vo = container_of(hw, DBusVoiceIn, hw); */
-    GHashTableIter iter;
-    QemuDBusDisplay1AudioInListener *listener = NULL;
-
-    trace_dbus_audio_read(size);
-
-    /* size = audio_rate_get_bytes(&hw->info, &vo->rate, size); */
-
-    g_hash_table_iter_init(&iter, da->in_listeners);
-    while (g_hash_table_iter_next(&iter, NULL, (void **)&listener)) {
-        g_autoptr(GVariant) v_data = NULL;
-        const char *data;
-        gsize n = 0;
-
-        if (qemu_dbus_display1_audio_in_listener_call_read_sync(
-                listener,
-                (uintptr_t)hw,
-                size,
-                G_DBUS_CALL_FLAGS_NONE, -1,
-                &v_data, NULL, NULL)) {
-            data = g_variant_get_fixed_array(v_data, &n, 1);
-            g_warn_if_fail(n <= size);
-            size = MIN(n, size);
-            memcpy(buf, data, size);
-            break;
-        }
-    }
-
-    return size;
-}
-
-static void
-dbus_enable_in(HWVoiceIn *hw, bool enable)
-{
-    DBusAudio *da = (DBusAudio *)hw->s->drv_opaque;
-    DBusVoiceIn *vo = container_of(hw, DBusVoiceIn, hw);
-    GHashTableIter iter;
-    QemuDBusDisplay1AudioInListener *listener = NULL;
-
-    vo->enabled = enable;
-    if (enable) {
-        audio_rate_start(&vo->rate);
-    }
-
-    g_hash_table_iter_init(&iter, da->in_listeners);
-    while (g_hash_table_iter_next(&iter, NULL, (void **)&listener)) {
-        qemu_dbus_display1_audio_in_listener_call_set_enabled(
-            listener, (uintptr_t)hw, enable,
-            G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL, NULL);
-    }
-}
-
-static void *
-dbus_audio_init(Audiodev *dev)
-{
-    DBusAudio *da = g_new0(DBusAudio, 1);
-
-    da->out_listeners = g_hash_table_new_full(g_str_hash, g_str_equal,
-                                                g_free, g_object_unref);
-    da->in_listeners = g_hash_table_new_full(g_str_hash, g_str_equal,
-                                               g_free, g_object_unref);
-    return da;
-}
-
-static void
-dbus_audio_fini(void *opaque)
-{
-    DBusAudio *da = opaque;
-
-    if (da->server) {
-        g_dbus_object_manager_server_unexport(da->server,
-                                              DBUS_DISPLAY1_AUDIO_PATH);
-    }
-    g_clear_object(&da->audio);
-    g_clear_object(&da->iface);
-    g_clear_pointer(&da->in_listeners, g_hash_table_unref);
-    g_clear_pointer(&da->out_listeners, g_hash_table_unref);
-    g_clear_object(&da->server);
-    g_free(da);
-}
-
-static void
-listener_out_vanished_cb(GDBusConnection *connection,
-                         gboolean remote_peer_vanished,
-                         GError *error,
-                         DBusAudio *da)
-{
-    char *name = g_object_get_data(G_OBJECT(connection), "name");
-
-    g_hash_table_remove(da->out_listeners, name);
-}
-
-static void
-listener_in_vanished_cb(GDBusConnection *connection,
-                        gboolean remote_peer_vanished,
-                        GError *error,
-                        DBusAudio *da)
-{
-    char *name = g_object_get_data(G_OBJECT(connection), "name");
-
-    g_hash_table_remove(da->in_listeners, name);
-}
-
-static gboolean
-dbus_audio_register_listener(AudioState *s,
-                             GDBusMethodInvocation *invocation,
-                             GUnixFDList *fd_list,
-                             GVariant *arg_listener,
-                             bool out)
-{
-    DBusAudio *da = s->drv_opaque;
-    const char *sender = g_dbus_method_invocation_get_sender(invocation);
-    g_autoptr(GDBusConnection) listener_conn = NULL;
-    g_autoptr(GError) err = NULL;
-    g_autoptr(GSocket) socket = NULL;
-    g_autoptr(GSocketConnection) socket_conn = NULL;
-    g_autofree char *guid = g_dbus_generate_guid();
-    GHashTable *listeners = out ? da->out_listeners : da->in_listeners;
-    GObject *listener;
-    int fd;
-
-    trace_dbus_audio_register(sender, out ? "out" : "in");
-
-    if (g_hash_table_contains(listeners, sender)) {
-        g_dbus_method_invocation_return_error(invocation,
-                                              DBUS_DISPLAY_ERROR,
-                                              DBUS_DISPLAY_ERROR_INVALID,
-                                              "`%s` is already registered!",
-                                              sender);
-        return DBUS_METHOD_INVOCATION_HANDLED;
-    }
-
-    fd = g_unix_fd_list_get(fd_list, g_variant_get_handle(arg_listener), &err);
-    if (err) {
-        g_dbus_method_invocation_return_error(invocation,
-                                              DBUS_DISPLAY_ERROR,
-                                              DBUS_DISPLAY_ERROR_FAILED,
-                                              "Couldn't get peer fd: %s",
-                                              err->message);
-        return DBUS_METHOD_INVOCATION_HANDLED;
-    }
-
-    socket = g_socket_new_from_fd(fd, &err);
-    if (err) {
-        g_dbus_method_invocation_return_error(invocation,
-                                              DBUS_DISPLAY_ERROR,
-                                              DBUS_DISPLAY_ERROR_FAILED,
-                                              "Couldn't make a socket: %s",
-                                              err->message);
-        return DBUS_METHOD_INVOCATION_HANDLED;
-    }
-    socket_conn = g_socket_connection_factory_create_connection(socket);
-    if (out) {
-        qemu_dbus_display1_audio_complete_register_out_listener(
-            da->iface, invocation, NULL);
-    } else {
-        qemu_dbus_display1_audio_complete_register_in_listener(
-            da->iface, invocation, NULL);
-    }
-
-    listener_conn =
-        g_dbus_connection_new_sync(
-            G_IO_STREAM(socket_conn),
-            guid,
-            G_DBUS_CONNECTION_FLAGS_AUTHENTICATION_SERVER,
-            NULL, NULL, &err);
-    if (err) {
-        error_report("Failed to setup peer connection: %s", err->message);
-        return DBUS_METHOD_INVOCATION_HANDLED;
-    }
-
-    listener = out ?
-        G_OBJECT(qemu_dbus_display1_audio_out_listener_proxy_new_sync(
-            listener_conn,
-            G_DBUS_PROXY_FLAGS_DO_NOT_AUTO_START,
-            NULL,
-            "/org/qemu/Display1/AudioOutListener",
-            NULL,
-            &err)) :
-        G_OBJECT(qemu_dbus_display1_audio_in_listener_proxy_new_sync(
-            listener_conn,
-            G_DBUS_PROXY_FLAGS_DO_NOT_AUTO_START,
-            NULL,
-            "/org/qemu/Display1/AudioInListener",
-            NULL,
-            &err));
-    if (!listener) {
-        error_report("Failed to setup proxy: %s", err->message);
-        return DBUS_METHOD_INVOCATION_HANDLED;
-    }
-
-    if (out) {
-        HWVoiceOut *hw;
-
-        QLIST_FOREACH(hw, &s->hw_head_out, entries) {
-            DBusVoiceOut *vo = container_of(hw, DBusVoiceOut, hw);
-            QemuDBusDisplay1AudioOutListener *l =
-                QEMU_DBUS_DISPLAY1_AUDIO_OUT_LISTENER(listener);
-
-            dbus_init_out_listener(l, hw);
-            qemu_dbus_display1_audio_out_listener_call_set_enabled(
-                l, (uintptr_t)hw, vo->enabled,
-                G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL, NULL);
-        }
-    } else {
-        HWVoiceIn *hw;
-
-        QLIST_FOREACH(hw, &s->hw_head_in, entries) {
-            DBusVoiceIn *vo = container_of(hw, DBusVoiceIn, hw);
-            QemuDBusDisplay1AudioInListener *l =
-                QEMU_DBUS_DISPLAY1_AUDIO_IN_LISTENER(listener);
-
-            dbus_init_in_listener(
-                QEMU_DBUS_DISPLAY1_AUDIO_IN_LISTENER(listener), hw);
-            qemu_dbus_display1_audio_in_listener_call_set_enabled(
-                l, (uintptr_t)hw, vo->enabled,
-                G_DBUS_CALL_FLAGS_NONE, -1, NULL, NULL, NULL);
-        }
-    }
-
-    g_object_set_data_full(G_OBJECT(listener_conn), "name",
-                           g_strdup(sender), g_free);
-    g_hash_table_insert(listeners, g_strdup(sender), listener);
-    g_object_connect(listener_conn,
-                     "signal::closed",
-                     out ? listener_out_vanished_cb : listener_in_vanished_cb,
-                     da,
-                     NULL);
-
-    return DBUS_METHOD_INVOCATION_HANDLED;
-}
-
-static gboolean
-dbus_audio_register_out_listener(AudioState *s,
-                                 GDBusMethodInvocation *invocation,
-                                 GUnixFDList *fd_list,
-                                 GVariant *arg_listener)
-{
-    return dbus_audio_register_listener(s, invocation,
-                                        fd_list, arg_listener, true);
-
-}
-
-static gboolean
-dbus_audio_register_in_listener(AudioState *s,
-                                GDBusMethodInvocation *invocation,
-                                GUnixFDList *fd_list,
-                                GVariant *arg_listener)
-{
-    return dbus_audio_register_listener(s, invocation,
-                                        fd_list, arg_listener, false);
-}
-
-static void
-dbus_audio_set_server(AudioState *s, GDBusObjectManagerServer *server)
-{
-    DBusAudio *da = s->drv_opaque;
-
-    g_assert(da);
-    g_assert(!da->server);
-
-    da->server = g_object_ref(server);
-
-    da->audio = g_dbus_object_skeleton_new(DBUS_DISPLAY1_AUDIO_PATH);
-    da->iface = qemu_dbus_display1_audio_skeleton_new();
-    g_object_connect(da->iface,
-                     "swapped-signal::handle-register-in-listener",
-                     dbus_audio_register_in_listener, s,
-                     "swapped-signal::handle-register-out-listener",
-                     dbus_audio_register_out_listener, s,
-                     NULL);
-
-    g_dbus_object_skeleton_add_interface(G_DBUS_OBJECT_SKELETON(da->audio),
-                                         G_DBUS_INTERFACE_SKELETON(da->iface));
-    g_dbus_object_manager_server_export(da->server, da->audio);
-}
-
-static struct audio_pcm_ops dbus_pcm_ops = {
-    .init_out = dbus_init_out,
-    .fini_out = dbus_fini_out,
-    .write    = audio_generic_write,
-    .get_buffer_out = dbus_get_buffer_out,
-    .put_buffer_out = dbus_put_buffer_out,
-    .enable_out = dbus_enable_out,
-    .volume_out = dbus_volume_out,
-
-    .init_in  = dbus_init_in,
-    .fini_in  = dbus_fini_in,
-    .read     = dbus_read,
-    .run_buffer_in = audio_generic_run_buffer_in,
-    .enable_in = dbus_enable_in,
-    .volume_in = dbus_volume_in,
-};
-
-static struct audio_driver dbus_audio_driver = {
-    .name            = "dbus",
-    .descr           = "Timer based audio exposed with DBus interface",
-    .init            = dbus_audio_init,
-    .fini            = dbus_audio_fini,
-    .set_dbus_server = dbus_audio_set_server,
-    .pcm_ops         = &dbus_pcm_ops,
-    .can_be_default  = 1,
-    .max_voices_out  = INT_MAX,
-    .max_voices_in   = INT_MAX,
-    .voice_size_out  = sizeof(DBusVoiceOut),
-    .voice_size_in   = sizeof(DBusVoiceIn)
-};
-
-static void register_audio_dbus(void)
-{
-    audio_driver_register(&dbus_audio_driver);
-}
-type_init(register_audio_dbus);
-
-module_dep("ui-dbus")
--- a/audio/dsoundaudio.c
+++ b/audio/dsoundaudio.c
@ -536,12 +536,13 @@ static void *dsound_get_buffer_in(HWVoiceIn *hw, size_t *size)
    DSoundVoiceIn *ds = (DSoundVoiceIn *) hw;
    LPDIRECTSOUNDCAPTUREBUFFER dscb = ds->dsound_capture_buffer;
    HRESULT hr;
-    DWORD rpos, act_size;
+    DWORD cpos, rpos, act_size;
    size_t req_size;
    int err;
    void *ret;

-    hr = IDirectSoundCaptureBuffer_GetCurrentPosition(dscb, NULL, &rpos);
+    hr = IDirectSoundCaptureBuffer_GetCurrentPosition(
+        dscb, &cpos, ds->first_time ? &rpos : NULL);
    if (FAILED(hr)) {
        dsound_logerr(hr, "Could not get capture buffer position\n");
        *size = 0;
@ -553,7 +554,7 @@ static void *dsound_get_buffer_in(HWVoiceIn *hw, size_t *size)
        ds->first_time = false;
    }

-    req_size = audio_ring_dist(rpos, hw->pos_emul, hw->size_emul);
+    req_size = audio_ring_dist(cpos, hw->pos_emul, hw->size_emul);
    req_size = MIN(*size, MIN(req_size, hw->size_emul - hw->pos_emul));

    if (req_size == 0) {
--- a/audio/jackaudio.c
+++ b/audio/jackaudio.c
@ -622,7 +622,6 @@ static void qjack_enable_in(HWVoiceIn *hw, bool enable)
    ji->c.enabled = enable;
 }

-#if !defined(WIN32) && defined(CONFIG_PTHREAD_SETNAME_NP_W_TID)
 static int qjack_thread_creator(jack_native_thread_t *thread,
    const pthread_attr_t *attr, void *(*function)(void *), void *arg)
 {
@ -636,7 +635,6 @@ static int qjack_thread_creator(jack_native_thread_t *thread,

    return ret;
 }
-#endif

 static void *qjack_init(Audiodev *dev)
 {
@ -689,9 +687,7 @@ static void register_audio_jack(void)
 {
    qemu_mutex_init(&qjack_shutdown_lock);
    audio_driver_register(&jack_driver);
-#if !defined(WIN32) && defined(CONFIG_PTHREAD_SETNAME_NP_W_TID)
    jack_set_thread_creator(qjack_thread_creator);
-#endif
    jack_set_error_function(qjack_error);
    jack_set_info_function(qjack_info);
 }
--- a/audio/meson.build
+++ b/audio/meson.build
@ -7,29 +7,24 @@ softmmu_ss.add(files(
  'wavcapture.c',
 ))

-softmmu_ss.add(when: coreaudio, if_true: files('coreaudio.c'))
-softmmu_ss.add(when: dsound, if_true: files('dsoundaudio.c', 'audio_win_int.c'))
+softmmu_ss.add(when: [coreaudio, 'CONFIG_AUDIO_COREAUDIO'], if_true: files('coreaudio.c'))
+softmmu_ss.add(when: [dsound, 'CONFIG_AUDIO_DSOUND'], if_true: files('dsoundaudio.c'))
+softmmu_ss.add(when: ['CONFIG_AUDIO_WIN_INT'], if_true: files('audio_win_int.c'))

 audio_modules = {}
 foreach m : [
-  ['alsa', alsa, files('alsaaudio.c')],
-  ['oss', oss, files('ossaudio.c')],
-  ['pa', pulse, files('paaudio.c')],
-  ['sdl', sdl, files('sdlaudio.c')],
-  ['jack', jack, files('jackaudio.c')],
-  ['spice', spice, files('spiceaudio.c')]
+  ['CONFIG_AUDIO_ALSA', 'alsa', alsa, 'alsaaudio.c'],
+  ['CONFIG_AUDIO_OSS', 'oss', oss, 'ossaudio.c'],
+  ['CONFIG_AUDIO_PA', 'pa', pulse, 'paaudio.c'],
+  ['CONFIG_AUDIO_SDL', 'sdl', sdl, 'sdlaudio.c'],
+  ['CONFIG_AUDIO_JACK', 'jack', jack, 'jackaudio.c'],
+  ['CONFIG_SPICE', 'spice', spice, 'spiceaudio.c']
 ]
-  if m[1].found()
+  if config_host.has_key(m[0])
    module_ss = ss.source_set()
-    module_ss.add(m[1], m[2])
-    audio_modules += {m[0] : module_ss}
+    module_ss.add(when: m[2], if_true: files(m[3]))
+    audio_modules += {m[1] : module_ss}
  endif
 endforeach

-if dbus_display
-    module_ss = ss.source_set()
-    module_ss.add(when: gio, if_true: files('dbusaudio.c'))
-    audio_modules += {'dbus': module_ss}
-endif
-
 modules += {'audio': audio_modules}
--- a/audio/trace-events
+++ b/audio/trace-events
@ -13,11 +13,6 @@ alsa_resume_out(void) "Resuming suspended output stream"
 # ossaudio.c
 oss_version(int version) "OSS version = 0x%x"

-# dbusaudio.c
-dbus_audio_register(const char *s, const char *dir) "sender = %s, dir = %s"
-dbus_audio_put_buffer_out(size_t len) "len = %zu"
-dbus_audio_read(size_t len) "len = %zu"
-
 # audio.c
 audio_timer_start(int interval) "interval %d ms"
 audio_timer_stop(void) ""
--- a/backends/dbus-vmstate1.xml
+++ b/backends/dbus-vmstate1.xml
@ -1,52 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<node name="/" xmlns:doc="http://www.freedesktop.org/dbus/1.0/doc.dtd">
-  <!--
-      org.qemu.VMState1:
-
-      This interface must be implemented at the object path
-      ``/org/qemu/VMState1`` to support helper migration.
-  -->
-  <interface name="org.qemu.VMState1">
-
-    <!--
-        Id:
-
-        A string that identifies the helper uniquely. (maximum 256 bytes
-        including terminating NUL byte)
-
-        .. note::
-
-           The VMState helper ID namespace is its own namespace. In particular,
-           it is not related to QEMU "id" used in -object/-device objects.
-    -->
-    <property name="Id" type="s" access="read"/>
-
-    <!--
-        Load:
-        @data: data to restore the state.
-
-        The method called on destination with the state to restore.
-
-        The helper may be initially started in a waiting state (with an
-        ``-incoming`` argument for example), and it may resume on success.
-
-        An error may be returned to the caller.
-    -->
-    <method name="Load">
-      <arg type="ay" name="data" direction="in"/>
-    </method>
-
-    <!--
-        Save:
-        @data: state data to save for later resume.
-
-        The method called on the source to get the current state to be
-        migrated. The helper should continue to run normally.
-
-        An error may be returned to the caller.
-    -->
-    <method name="Save">
-      <arg type="ay" name="data" direction="out"/>
-    </method>
-  </interface>
-</node>
--- a/backends/hostmem-epc.c
+++ b/backends/hostmem-epc.c
@ -1,82 +0,0 @@
-/*
- * QEMU host SGX EPC memory backend
- *
- * Copyright (C) 2019 Intel Corporation
- *
- * Authors:
- *   Sean Christopherson <sean.j.christopherson@intel.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-#include <sys/ioctl.h>
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qom/object_interfaces.h"
-#include "qapi/error.h"
-#include "sysemu/hostmem.h"
-#include "hw/i386/hostmem-epc.h"
-
-static void
-sgx_epc_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
-{
-    uint32_t ram_flags;
-    char *name;
-    int fd;
-
-    if (!backend->size) {
-        error_setg(errp, "can't create backend with size 0");
-        return;
-    }
-
-    fd = qemu_open_old("/dev/sgx_vepc", O_RDWR);
-    if (fd < 0) {
-        error_setg_errno(errp, errno,
-                         "failed to open /dev/sgx_vepc to alloc SGX EPC");
-        return;
-    }
-
-    name = object_get_canonical_path(OBJECT(backend));
-    ram_flags = (backend->share ? RAM_SHARED : 0) | RAM_PROTECTED;
-    memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend),
-                                   name, backend->size, ram_flags,
-                                   fd, 0, errp);
-    g_free(name);
-}
-
-static void sgx_epc_backend_instance_init(Object *obj)
-{
-    HostMemoryBackend *m = MEMORY_BACKEND(obj);
-
-    m->share = true;
-    m->merge = false;
-    m->dump = false;
-}
-
-static void sgx_epc_backend_class_init(ObjectClass *oc, void *data)
-{
-    HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
-
-    bc->alloc = sgx_epc_backend_memory_alloc;
-}
-
-static const TypeInfo sgx_epc_backed_info = {
-    .name = TYPE_MEMORY_BACKEND_EPC,
-    .parent = TYPE_MEMORY_BACKEND,
-    .instance_init = sgx_epc_backend_instance_init,
-    .class_init = sgx_epc_backend_class_init,
-    .instance_size = sizeof(HostMemoryBackendEpc),
-};
-
-static void register_types(void)
-{
-    int fd = qemu_open_old("/dev/sgx_vepc", O_RDWR);
-    if (fd >= 0) {
-        close(fd);
-
-        type_register_static(&sgx_epc_backed_info);
-    }
-}
-
-type_init(register_types);
--- a/backends/meson.build
+++ b/backends/meson.build
@ -16,6 +16,5 @@ softmmu_ss.add(when: ['CONFIG_VHOST_USER', 'CONFIG_VIRTIO'], if_true: files('vho
 softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c'))
 softmmu_ss.add(when: ['CONFIG_VIRTIO_CRYPTO', 'CONFIG_VHOST_CRYPTO'], if_true: files('cryptodev-vhost-user.c'))
 softmmu_ss.add(when: 'CONFIG_GIO', if_true: [files('dbus-vmstate.c'), gio])
-softmmu_ss.add(when: 'CONFIG_SGX', if_true: files('hostmem-epc.c'))

 subdir('tpm')
--- a/backends/tpm/tpm_emulator.c
+++ b/backends/tpm/tpm_emulator.c
@ -492,7 +492,8 @@ static int tpm_emulator_block_migration(TPMEmulator *tpm_emu)
        error_setg(&tpm_emu->migration_blocker,
                   "Migration disabled: TPM emulator does not support "
                   "migration");
-        if (migrate_add_blocker(tpm_emu->migration_blocker, &err) < 0) {
+        migrate_add_blocker(tpm_emu->migration_blocker, &err);
+        if (err) {
            error_report_err(err);
            error_free(tpm_emu->migration_blocker);
            tpm_emu->migration_blocker = NULL;
@ -623,7 +624,7 @@ static TpmTypeOptions *tpm_emulator_get_tpm_options(TPMBackend *tb)
    TPMEmulator *tpm_emu = TPM_EMULATOR(tb);
    TpmTypeOptions *options = g_new0(TpmTypeOptions, 1);

-    options->type = TPM_TYPE_EMULATOR;
+    options->type = TPM_TYPE_OPTIONS_KIND_EMULATOR;
    options->u.emulator.data = QAPI_CLONE(TPMEmulatorOptions, tpm_emu->options);

    return options;
--- a/backends/tpm/tpm_passthrough.c
+++ b/backends/tpm/tpm_passthrough.c
@ -321,7 +321,7 @@ static TpmTypeOptions *tpm_passthrough_get_tpm_options(TPMBackend *tb)
 {
    TpmTypeOptions *options = g_new0(TpmTypeOptions, 1);

-    options->type = TPM_TYPE_PASSTHROUGH;
+    options->type = TPM_TYPE_OPTIONS_KIND_PASSTHROUGH;
    options->u.passthrough.data = QAPI_CLONE(TPMPassthroughOptions,
                                             TPM_PASSTHROUGH(tb)->options);

--- a/block.c
+++ b/block.c
@ -49,8 +49,6 @@
 #include "qemu/timer.h"
 #include "qemu/cutils.h"
 #include "qemu/id.h"
-#include "qemu/range.h"
-#include "qemu/rcu.h"
 #include "block/coroutines.h"

 #ifdef CONFIG_BSD
@ -84,13 +82,8 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
                                           BdrvChildRole child_role,
                                           Error **errp);

-static bool bdrv_recurse_has_child(BlockDriverState *bs,
-                                   BlockDriverState *child);
-
-static void bdrv_child_free(BdrvChild *child);
-static void bdrv_replace_child_noperm(BdrvChild **child,
-                                      BlockDriverState *new_bs,
-                                      bool free_empty_child);
+static void bdrv_replace_child_noperm(BdrvChild *child,
+                                      BlockDriverState *new_bs);
 static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
                                              BdrvChild *child,
                                              Transaction *tran);
@ -103,8 +96,6 @@ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
 static void bdrv_reopen_commit(BDRVReopenState *reopen_state);
 static void bdrv_reopen_abort(BDRVReopenState *reopen_state);

-static bool bdrv_backing_overridden(BlockDriverState *bs);
-
 /* If non-zero, use only whitelisted block drivers */
 static int use_bdrv_whitelist;

@ -410,9 +401,6 @@ BlockDriverState *bdrv_new(void)

    qemu_co_queue_init(&bs->flush_queue);

-    qemu_co_mutex_init(&bs->bsc_modify_lock);
-    bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1);
-
    for (i = 0; i < bdrv_drain_all_count; i++) {
        bdrv_drained_begin(bs);
    }
@ -1391,8 +1379,6 @@ static void bdrv_child_cb_attach(BdrvChild *child)
 {
    BlockDriverState *bs = child->opaque;

-    QLIST_INSERT_HEAD(&bs->children, child, next);
-
    if (child->role & BDRV_CHILD_COW) {
        bdrv_backing_attach(child);
    }
@ -1409,8 +1395,6 @@ static void bdrv_child_cb_detach(BdrvChild *child)
    }

    bdrv_unapply_subtree_drain(child, bs);
-
-    QLIST_REMOVE(child, next);
 }

 static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
@ -1615,26 +1599,16 @@ open_failed:
    return ret;
 }

-/*
- * Create and open a block node.
- *
- * @options is a QDict of options to pass to the block drivers, or NULL for an
- * empty set of options. The reference to the QDict belongs to the block layer
- * after the call (even on failure), so if the caller intends to reuse the
- * dictionary, it needs to use qobject_ref() before calling bdrv_open.
- */
-BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
-                                            const char *node_name,
-                                            QDict *options, int flags,
-                                            Error **errp)
+BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
+                                       int flags, Error **errp)
 {
    BlockDriverState *bs;
    int ret;

    bs = bdrv_new();
    bs->open_flags = flags;
-    bs->options = options ?: qdict_new();
-    bs->explicit_options = qdict_clone_shallow(bs->options);
+    bs->explicit_options = qdict_new();
+    bs->options = qdict_new();
    bs->opaque = NULL;

    update_options_from_flags(bs->options, flags);
@ -1652,13 +1626,6 @@ BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
    return bs;
 }

-/* Create and open a block node. */
-BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
-                                       int flags, Error **errp)
-{
-    return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp);
-}
-
 QemuOptsList bdrv_runtime_opts = {
    .name = "bdrv_common",
    .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
@ -2258,18 +2225,13 @@ static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm,

 typedef struct BdrvReplaceChildState {
    BdrvChild *child;
-    BdrvChild **childp;
    BlockDriverState *old_bs;
-    bool free_empty_child;
 } BdrvReplaceChildState;

 static void bdrv_replace_child_commit(void *opaque)
 {
    BdrvReplaceChildState *s = opaque;

-    if (s->free_empty_child && !s->child->bs) {
-        bdrv_child_free(s->child);
-    }
    bdrv_unref(s->old_bs);
 }

@ -2278,34 +2240,8 @@ static void bdrv_replace_child_abort(void *opaque)
    BdrvReplaceChildState *s = opaque;
    BlockDriverState *new_bs = s->child->bs;

-    /*
-     * old_bs reference is transparently moved from @s to s->child.
-     *
-     * Pass &s->child here instead of s->childp, because:
-     * (1) s->old_bs must be non-NULL, so bdrv_replace_child_noperm() will not
-     *     modify the BdrvChild * pointer we indirectly pass to it, i.e. it
-     *     will not modify s->child.  From that perspective, it does not matter
-     *     whether we pass s->childp or &s->child.
-     * (2) If new_bs is not NULL, s->childp will be NULL.  We then cannot use
-     *     it here.
-     * (3) If new_bs is NULL, *s->childp will have been NULLed by
-     *     bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we
-     *     must not pass a NULL *s->childp here.
-     *
-     * So whether new_bs was NULL or not, we cannot pass s->childp here; and in
-     * any case, there is no reason to pass it anyway.
-     */
-    bdrv_replace_child_noperm(&s->child, s->old_bs, true);
-    /*
-     * The child was pre-existing, so s->old_bs must be non-NULL, and
-     * s->child thus must not have been freed
-     */
-    assert(s->child != NULL);
-    if (!new_bs) {
-        /* As described above, *s->childp was cleared, so restore it */
-        assert(s->childp != NULL);
-        *s->childp = s->child;
-    }
+    /* old_bs reference is transparently moved from @s to @s->child */
+    bdrv_replace_child_noperm(s->child, s->old_bs);
    bdrv_unref(new_bs);
 }

@ -2321,46 +2257,22 @@ static TransactionActionDrv bdrv_replace_child_drv = {
 * Note: real unref of old_bs is done only on commit.
 *
 * The function doesn't update permissions, caller is responsible for this.
- *
- * (*childp)->bs must not be NULL.
- *
- * Note that if new_bs == NULL, @childp is stored in a state object attached
- * to @tran, so that the old child can be reinstated in the abort handler.
- * Therefore, if @new_bs can be NULL, @childp must stay valid until the
- * transaction is committed or aborted.
- *
- * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is
- * freed (on commit).  @free_empty_child should only be false if the
- * caller will free the BDrvChild themselves (which may be important
- * if this is in turn called in another transactional context).
 */
-static void bdrv_replace_child_tran(BdrvChild **childp,
-                                    BlockDriverState *new_bs,
-                                    Transaction *tran,
-                                    bool free_empty_child)
+static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
+                                    Transaction *tran)
 {
    BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
    *s = (BdrvReplaceChildState) {
-        .child = *childp,
-        .childp = new_bs == NULL ? childp : NULL,
-        .old_bs = (*childp)->bs,
-        .free_empty_child = free_empty_child,
+        .child = child,
+        .old_bs = child->bs,
    };
    tran_add(tran, &bdrv_replace_child_drv, s);

-    /* The abort handler relies on this */
-    assert(s->old_bs != NULL);
-
    if (new_bs) {
        bdrv_ref(new_bs);
    }
-    /*
-     * Pass free_empty_child=false, we will free the child (if
-     * necessary) in bdrv_replace_child_commit() (if our
-     * @free_empty_child parameter was true).
-     */
-    bdrv_replace_child_noperm(childp, new_bs, false);
-    /* old_bs reference is transparently moved from *childp to @s */
+    bdrv_replace_child_noperm(child, new_bs);
+    /* old_bs reference is transparently moved from @child to @s */
 }

 /*
@ -2485,6 +2397,7 @@ char *bdrv_perm_names(uint64_t perm)
        { BLK_PERM_WRITE,           "write" },
        { BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
        { BLK_PERM_RESIZE,          "resize" },
+        { BLK_PERM_GRAPH_MOD,       "change children" },
        { 0, NULL }
    };

@ -2600,7 +2513,8 @@ static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c,
        shared = 0;
    }

-    shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
+    shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD |
+              BLK_PERM_WRITE_UNCHANGED;

    if (bs->open_flags & BDRV_O_INACTIVE) {
        shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
@ -2718,6 +2632,7 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
        [BLOCK_PERMISSION_WRITE]            = BLK_PERM_WRITE,
        [BLOCK_PERMISSION_WRITE_UNCHANGED]  = BLK_PERM_WRITE_UNCHANGED,
        [BLOCK_PERMISSION_RESIZE]           = BLK_PERM_RESIZE,
+        [BLOCK_PERMISSION_GRAPH_MOD]        = BLK_PERM_GRAPH_MOD,
    };

    QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX);
@ -2728,30 +2643,14 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
    return permissions[qapi_perm];
 }

-/**
- * Replace (*childp)->bs by @new_bs.
- *
- * If @new_bs is NULL, *childp will be set to NULL, too: BDS parents
- * generally cannot handle a BdrvChild with .bs == NULL, so clearing
- * BdrvChild.bs should generally immediately be followed by the
- * BdrvChild pointer being cleared as well.
- *
- * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is
- * freed.  @free_empty_child should only be false if the caller will
- * free the BdrvChild themselves (this may be important in a
- * transactional context, where it may only be freed on commit).
- */
-static void bdrv_replace_child_noperm(BdrvChild **childp,
-                                      BlockDriverState *new_bs,
-                                      bool free_empty_child)
+static void bdrv_replace_child_noperm(BdrvChild *child,
+                                      BlockDriverState *new_bs)
 {
-    BdrvChild *child = *childp;
    BlockDriverState *old_bs = child->bs;
    int new_bs_quiesce_counter;
    int drain_saldo;

    assert(!child->frozen);
-    assert(old_bs != new_bs);

    if (old_bs && new_bs) {
        assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
@ -2780,9 +2679,6 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
    }

    child->bs = new_bs;
-    if (!new_bs) {
-        *childp = NULL;
-    }

    if (new_bs) {
        QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
@ -2812,25 +2708,21 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
        bdrv_parent_drained_end_single(child);
        drain_saldo++;
    }
-
-    if (free_empty_child && !child->bs) {
-        bdrv_child_free(child);
-    }
 }

-/**
- * Free the given @child.
- *
- * The child must be empty (i.e. `child->bs == NULL`) and it must be
- * unused (i.e. not in a children list).
- */
-static void bdrv_child_free(BdrvChild *child)
+static void bdrv_child_free(void *opaque)
+{
+    BdrvChild *c = opaque;
+
+    g_free(c->name);
+    g_free(c);
+}
+
+static void bdrv_remove_empty_child(BdrvChild *child)
 {
    assert(!child->bs);
-    assert(!child->next.le_prev); /* not in children list */
-
-    g_free(child->name);
-    g_free(child);
+    QLIST_SAFE_REMOVE(child, next);
+    bdrv_child_free(child);
 }

 typedef struct BdrvAttachChildCommonState {
@ -2845,35 +2737,27 @@ static void bdrv_attach_child_common_abort(void *opaque)
    BdrvChild *child = *s->child;
    BlockDriverState *bs = child->bs;

-    /*
-     * Pass free_empty_child=false, because we still need the child
-     * for the AioContext operations on the parent below; those
-     * BdrvChildClass methods all work on a BdrvChild object, so we
-     * need to keep it as an empty shell (after this function, it will
-     * not be attached to any parent, and it will not have a .bs).
-     */
-    bdrv_replace_child_noperm(s->child, NULL, false);
+    bdrv_replace_child_noperm(child, NULL);

    if (bdrv_get_aio_context(bs) != s->old_child_ctx) {
        bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort);
    }

    if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) {
-        GSList *ignore;
+        GSList *ignore = g_slist_prepend(NULL, child);

-        /* No need to ignore `child`, because it has been detached already */
-        ignore = NULL;
        child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore,
                                      &error_abort);
        g_slist_free(ignore);
-
-        ignore = NULL;
+        ignore = g_slist_prepend(NULL, child);
        child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore);
+
        g_slist_free(ignore);
    }

    bdrv_unref(bs);
-    bdrv_child_free(child);
+    bdrv_remove_empty_child(child);
+    *s->child = NULL;
 }

 static TransactionActionDrv bdrv_attach_child_common_drv = {
@ -2945,15 +2829,13 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs,

        if (ret < 0) {
            error_propagate(errp, local_err);
-            bdrv_child_free(new_child);
+            bdrv_remove_empty_child(new_child);
            return ret;
        }
    }

    bdrv_ref(child_bs);
-    bdrv_replace_child_noperm(&new_child, child_bs, true);
-    /* child_bs was non-NULL, so new_child must not have been freed */
-    assert(new_child != NULL);
+    bdrv_replace_child_noperm(new_child, child_bs);

    *child = new_child;

@ -2988,12 +2870,6 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,

    assert(parent_bs->drv);

-    if (bdrv_recurse_has_child(child_bs, parent_bs)) {
-        error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle",
-                   child_bs->node_name, child_name, parent_bs->node_name);
-        return -EINVAL;
-    }
-
    bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
    bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
                    perm, shared_perm, &perm, &shared_perm);
@ -3005,14 +2881,21 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
        return ret;
    }

+    QLIST_INSERT_HEAD(&parent_bs->children, *child, next);
+    /*
+     * child is removed in bdrv_attach_child_common_abort(), so don't care to
+     * abort this change separately.
+     */
+
    return 0;
 }

-static void bdrv_detach_child(BdrvChild **childp)
+static void bdrv_detach_child(BdrvChild *child)
 {
-    BlockDriverState *old_bs = (*childp)->bs;
+    BlockDriverState *old_bs = child->bs;

-    bdrv_replace_child_noperm(childp, NULL, true);
+    bdrv_replace_child_noperm(child, NULL);
+    bdrv_remove_empty_child(child);

    if (old_bs) {
        /*
@ -3118,7 +3001,7 @@ void bdrv_root_unref_child(BdrvChild *child)
    BlockDriverState *child_bs;

    child_bs = child->bs;
-    bdrv_detach_child(&child);
+    bdrv_detach_child(child);
    bdrv_unref(child_bs);
 }

@ -3341,8 +3224,6 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
    int ret;
    Transaction *tran = tran_new();

-    bdrv_drained_begin(bs);
-
    ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
    if (ret < 0) {
        goto out;
@ -3352,8 +3233,6 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
 out:
    tran_finalize(tran, ret);

-    bdrv_drained_end(bs);
-
    return ret;
 }

@ -4815,8 +4694,6 @@ static void bdrv_close(BlockDriverState *bs)
    bs->explicit_options = NULL;
    qobject_unref(bs->full_open_options);
    bs->full_open_options = NULL;
-    g_free(bs->block_status_cache);
-    bs->block_status_cache = NULL;

    bdrv_release_named_dirty_bitmaps(bs);
    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
@ -4932,7 +4809,6 @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to)

 typedef struct BdrvRemoveFilterOrCowChild {
    BdrvChild *child;
-    BlockDriverState *bs;
    bool is_backing;
 } BdrvRemoveFilterOrCowChild;

@ -4941,6 +4817,7 @@ static void bdrv_remove_filter_or_cow_child_abort(void *opaque)
    BdrvRemoveFilterOrCowChild *s = opaque;
    BlockDriverState *parent_bs = s->child->opaque;

+    QLIST_INSERT_HEAD(&parent_bs->children, s->child, next);
    if (s->is_backing) {
        parent_bs->backing = s->child;
    } else {
@ -4962,19 +4839,10 @@ static void bdrv_remove_filter_or_cow_child_commit(void *opaque)
    bdrv_child_free(s->child);
 }

-static void bdrv_remove_filter_or_cow_child_clean(void *opaque)
-{
-    BdrvRemoveFilterOrCowChild *s = opaque;
-
-    /* Drop the bs reference after the transaction is done */
-    bdrv_unref(s->bs);
-    g_free(s);
-}
-
 static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = {
    .abort = bdrv_remove_filter_or_cow_child_abort,
    .commit = bdrv_remove_filter_or_cow_child_commit,
-    .clean = bdrv_remove_filter_or_cow_child_clean,
+    .clean = g_free,
 };

 /*
@ -4985,41 +4853,31 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
                                              BdrvChild *child,
                                              Transaction *tran)
 {
-    BdrvChild **childp;
    BdrvRemoveFilterOrCowChild *s;

+    assert(child == bs->backing || child == bs->file);
+
    if (!child) {
        return;
    }

-    /*
-     * Keep a reference to @bs so @childp will stay valid throughout the
-     * transaction (required by bdrv_replace_child_tran())
-     */
-    bdrv_ref(bs);
-    if (child == bs->backing) {
-        childp = &bs->backing;
-    } else if (child == bs->file) {
-        childp = &bs->file;
-    } else {
-        g_assert_not_reached();
-    }
-
    if (child->bs) {
-        /*
-         * Pass free_empty_child=false, we will free the child in
-         * bdrv_remove_filter_or_cow_child_commit()
-         */
-        bdrv_replace_child_tran(childp, NULL, tran, false);
+        bdrv_replace_child_tran(child, NULL, tran);
    }

    s = g_new(BdrvRemoveFilterOrCowChild, 1);
    *s = (BdrvRemoveFilterOrCowChild) {
        .child = child,
-        .bs = bs,
-        .is_backing = (childp == &bs->backing),
+        .is_backing = (child == bs->backing),
    };
    tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s);
+
+    QLIST_SAFE_REMOVE(child, next);
+    if (s->is_backing) {
+        bs->backing = NULL;
+    } else {
+        bs->file = NULL;
+    }
 }

 /*
@ -5040,8 +4898,6 @@ static int bdrv_replace_node_noperm(BlockDriverState *from,
 {
    BdrvChild *c, *next;

-    assert(to != NULL);
-
    QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
        assert(c->bs == from);
        if (!should_update_child(c, to)) {
@ -5057,12 +4913,7 @@ static int bdrv_replace_node_noperm(BlockDriverState *from,
                       c->name, from->node_name);
            return -EPERM;
        }
-
-        /*
-         * Passing a pointer to the local variable @c is fine here, because
-         * @to is not NULL, and so &c will not be attached to the transaction.
-         */
-        bdrv_replace_child_tran(&c, to, tran, true);
+        bdrv_replace_child_tran(c, to, tran);
    }

    return 0;
@ -5077,8 +4928,6 @@ static int bdrv_replace_node_noperm(BlockDriverState *from,
 *
 * With @detach_subchain=true @to must be in a backing chain of @from. In this
 * case backing link of the cow-parent of @to is removed.
- *
- * @to must not be NULL.
 */
 static int bdrv_replace_node_common(BlockDriverState *from,
                                    BlockDriverState *to,
@ -5091,8 +4940,6 @@ static int bdrv_replace_node_common(BlockDriverState *from,
    BlockDriverState *to_cow_parent = NULL;
    int ret;

-    assert(to != NULL);
-
    if (detach_subchain) {
        assert(bdrv_chain_contains(from, to));
        assert(from != to);
@ -5148,9 +4995,6 @@ out:
    return ret;
 }

-/**
- * Replace node @from by @to (where neither may be NULL).
- */
 int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
                      Error **errp)
 {
@ -5204,39 +5048,6 @@ out:
    return ret;
 }

-/* Not for empty child */
-int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
-                          Error **errp)
-{
-    int ret;
-    Transaction *tran = tran_new();
-    g_autoptr(GHashTable) found = NULL;
-    g_autoptr(GSList) refresh_list = NULL;
-    BlockDriverState *old_bs = child->bs;
-
-    bdrv_ref(old_bs);
-    bdrv_drained_begin(old_bs);
-    bdrv_drained_begin(new_bs);
-
-    bdrv_replace_child_tran(&child, new_bs, tran, true);
-    /* @new_bs must have been non-NULL, so @child must not have been freed */
-    assert(child != NULL);
-
-    found = g_hash_table_new(NULL, NULL);
-    refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs);
-    refresh_list = bdrv_topological_dfs(refresh_list, found, new_bs);
-
-    ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
-
-    tran_finalize(tran, ret);
-
-    bdrv_drained_end(old_bs);
-    bdrv_drained_end(new_bs);
-    bdrv_unref(old_bs);
-
-    return ret;
-}
-
 static void bdrv_delete(BlockDriverState *bs)
 {
    assert(bdrv_op_blocker_is_empty(bs));
@ -5253,61 +5064,29 @@ static void bdrv_delete(BlockDriverState *bs)
    g_free(bs);
 }

-
-/*
- * Replace @bs by newly created block node.
- *
- * @options is a QDict of options to pass to the block drivers, or NULL for an
- * empty set of options. The reference to the QDict belongs to the block layer
- * after the call (even on failure), so if the caller intends to reuse the
- * dictionary, it needs to use qobject_ref() before calling bdrv_open.
- */
-BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
                                   int flags, Error **errp)
 {
-    ERRP_GUARD();
-    int ret;
-    BlockDriverState *new_node_bs = NULL;
-    const char *drvname, *node_name;
-    BlockDriver *drv;
+    BlockDriverState *new_node_bs;
+    Error *local_err = NULL;

-    drvname = qdict_get_try_str(options, "driver");
-    if (!drvname) {
-        error_setg(errp, "driver is not specified");
-        goto fail;
-    }
-
-    drv = bdrv_find_format(drvname);
-    if (!drv) {
-        error_setg(errp, "Unknown driver: '%s'", drvname);
-        goto fail;
-    }
-
-    node_name = qdict_get_try_str(options, "node-name");
-
-    new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags,
-                                            errp);
-    options = NULL; /* bdrv_new_open_driver() eats options */
-    if (!new_node_bs) {
+    new_node_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
+    if (new_node_bs == NULL) {
        error_prepend(errp, "Could not create node: ");
-        goto fail;
+        return NULL;
    }

    bdrv_drained_begin(bs);
-    ret = bdrv_replace_node(bs, new_node_bs, errp);
+    bdrv_replace_node(bs, new_node_bs, &local_err);
    bdrv_drained_end(bs);

-    if (ret < 0) {
-        error_prepend(errp, "Could not replace node: ");
-        goto fail;
+    if (local_err) {
+        bdrv_unref(new_node_bs);
+        error_propagate(errp, local_err);
+        return NULL;
    }

    return new_node_bs;
-
-fail:
-    qobject_unref(options);
-    bdrv_unref(new_node_bs);
-    return NULL;
 }

 /*
@ -5547,6 +5326,8 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
    update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top);

    /* success - we can delete the intermediate states, and link top->base */
+    /* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once
+     * we've figured out how they should work. */
    if (!backing_file_str) {
        bdrv_refresh_filename(base);
        backing_file_str = base->filename;
@ -6507,7 +6288,6 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs)
 {
    BdrvChild *child, *parent;
    int ret;
-    uint64_t cumulative_perms, cumulative_shared_perms;

    if (!bs->drv) {
        return -ENOMEDIUM;
@ -6538,13 +6318,6 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs)
        }
    }

-    bdrv_get_cumulative_perm(bs, &cumulative_perms,
-                             &cumulative_shared_perms);
-    if (cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
-        /* Our inactive parents still need write access. Inactivation failed. */
-        return -EPERM;
-    }
-
    bs->open_flags |= BDRV_O_INACTIVE;

    /*
@ -7476,7 +7249,7 @@ static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs)
 /* Note: This function may return false positives; it may return true
 * even if opening the backing file specified by bs's image header
 * would result in exactly bs->backing. */
-static bool bdrv_backing_overridden(BlockDriverState *bs)
+bool bdrv_backing_overridden(BlockDriverState *bs)
 {
    if (bs->backing) {
        return strcmp(bs->auto_backing_file,
@ -7880,76 +7653,3 @@ BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs)
 {
    return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs)));
 }
-
-/**
- * Check whether [offset, offset + bytes) overlaps with the cached
- * block-status data region.
- *
- * If so, and @pnum is not NULL, set *pnum to `bsc.data_end - offset`,
- * which is what bdrv_bsc_is_data()'s interface needs.
- * Otherwise, *pnum is not touched.
- */
-static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs,
-                                           int64_t offset, int64_t bytes,
-                                           int64_t *pnum)
-{
-    BdrvBlockStatusCache *bsc = qatomic_rcu_read(&bs->block_status_cache);
-    bool overlaps;
-
-    overlaps =
-        qatomic_read(&bsc->valid) &&
-        ranges_overlap(offset, bytes, bsc->data_start,
-                       bsc->data_end - bsc->data_start);
-
-    if (overlaps && pnum) {
-        *pnum = bsc->data_end - offset;
-    }
-
-    return overlaps;
-}
-
-/**
- * See block_int.h for this function's documentation.
- */
-bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum)
-{
-    RCU_READ_LOCK_GUARD();
-
-    return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum);
-}
-
-/**
- * See block_int.h for this function's documentation.
- */
-void bdrv_bsc_invalidate_range(BlockDriverState *bs,
-                               int64_t offset, int64_t bytes)
-{
-    RCU_READ_LOCK_GUARD();
-
-    if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) {
-        qatomic_set(&bs->block_status_cache->valid, false);
-    }
-}
-
-/**
- * See block_int.h for this function's documentation.
- */
-void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes)
-{
-    BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1);
-    BdrvBlockStatusCache *old_bsc;
-
-    *new_bsc = (BdrvBlockStatusCache) {
-        .valid = true,
-        .data_start = offset,
-        .data_end = offset + bytes,
-    };
-
-    QEMU_LOCK_GUARD(&bs->bsc_modify_lock);
-
-    old_bsc = qatomic_rcu_read(&bs->block_status_cache);
-    qatomic_rcu_set(&bs->block_status_cache, new_bsc);
-    if (old_bsc) {
-        g_free_rcu(old_bsc, rcu);
-    }
-}
--- a/block/aio_task.c
+++ b/block/aio_task.c
@ -98,8 +98,6 @@ AioTaskPool *coroutine_fn aio_task_pool_new(int max_busy_tasks)
 {
    AioTaskPool *pool = g_new0(AioTaskPool, 1);

-    assert(max_busy_tasks > 0);
-
    pool->main_co = qemu_coroutine_self();
    pool->max_busy_tasks = max_busy_tasks;

--- a/block/backup-top.c
+++ b/block/backup-top.c
@ -0,0 +1,253 @@
+/*
+ * backup-top filter driver
+ *
+ * The driver performs Copy-Before-Write (CBW) operation: it is injected above
+ * some node, and before each write it copies _old_ data to the target node.
+ *
+ * Copyright (c) 2018-2019 Virtuozzo International GmbH.
+ *
+ * Author:
+ *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+
+#include "sysemu/block-backend.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "block/qdict.h"
+#include "block/block-copy.h"
+
+#include "block/backup-top.h"
+
+typedef struct BDRVBackupTopState {
+    BlockCopyState *bcs;
+    BdrvChild *target;
+    int64_t cluster_size;
+} BDRVBackupTopState;
+
+static coroutine_fn int backup_top_co_preadv(
+        BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+        QEMUIOVector *qiov, int flags)
+{
+    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
+}
+
+static coroutine_fn int backup_top_cbw(BlockDriverState *bs, uint64_t offset,
+                                       uint64_t bytes, BdrvRequestFlags flags)
+{
+    BDRVBackupTopState *s = bs->opaque;
+    uint64_t off, end;
+
+    if (flags & BDRV_REQ_WRITE_UNCHANGED) {
+        return 0;
+    }
+
+    off = QEMU_ALIGN_DOWN(offset, s->cluster_size);
+    end = QEMU_ALIGN_UP(offset + bytes, s->cluster_size);
+
+    return block_copy(s->bcs, off, end - off, true);
+}
+
+static int coroutine_fn backup_top_co_pdiscard(BlockDriverState *bs,
+                                               int64_t offset, int bytes)
+{
+    int ret = backup_top_cbw(bs, offset, bytes, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return bdrv_co_pdiscard(bs->backing, offset, bytes);
+}
+
+static int coroutine_fn backup_top_co_pwrite_zeroes(BlockDriverState *bs,
+        int64_t offset, int bytes, BdrvRequestFlags flags)
+{
+    int ret = backup_top_cbw(bs, offset, bytes, flags);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags);
+}
+
+static coroutine_fn int backup_top_co_pwritev(BlockDriverState *bs,
+                                              uint64_t offset,
+                                              uint64_t bytes,
+                                              QEMUIOVector *qiov, int flags)
+{
+    int ret = backup_top_cbw(bs, offset, bytes, flags);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn backup_top_co_flush(BlockDriverState *bs)
+{
+    if (!bs->backing) {
+        return 0;
+    }
+
+    return bdrv_co_flush(bs->backing->bs);
+}
+
+static void backup_top_refresh_filename(BlockDriverState *bs)
+{
+    if (bs->backing == NULL) {
+        /*
+         * we can be here after failed bdrv_attach_child in
+         * bdrv_set_backing_hd
+         */
+        return;
+    }
+    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
+            bs->backing->bs->filename);
+}
+
+static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c,
+                                  BdrvChildRole role,
+                                  BlockReopenQueue *reopen_queue,
+                                  uint64_t perm, uint64_t shared,
+                                  uint64_t *nperm, uint64_t *nshared)
+{
+    if (!(role & BDRV_CHILD_FILTERED)) {
+        /*
+         * Target child
+         *
+         * Share write to target (child_file), to not interfere
+         * with guest writes to its disk which may be in target backing chain.
+         * Can't resize during a backup block job because we check the size
+         * only upfront.
+         */
+        *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
+        *nperm = BLK_PERM_WRITE;
+    } else {
+        /* Source child */
+        bdrv_default_perms(bs, c, role, reopen_queue,
+                           perm, shared, nperm, nshared);
+
+        if (perm & BLK_PERM_WRITE) {
+            *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+        }
+        *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+    }
+}
+
+BlockDriver bdrv_backup_top_filter = {
+    .format_name = "backup-top",
+    .instance_size = sizeof(BDRVBackupTopState),
+
+    .bdrv_co_preadv             = backup_top_co_preadv,
+    .bdrv_co_pwritev            = backup_top_co_pwritev,
+    .bdrv_co_pwrite_zeroes      = backup_top_co_pwrite_zeroes,
+    .bdrv_co_pdiscard           = backup_top_co_pdiscard,
+    .bdrv_co_flush              = backup_top_co_flush,
+
+    .bdrv_refresh_filename      = backup_top_refresh_filename,
+
+    .bdrv_child_perm            = backup_top_child_perm,
+
+    .is_filter = true,
+};
+
+BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
+                                         BlockDriverState *target,
+                                         const char *filter_node_name,
+                                         uint64_t cluster_size,
+                                         BackupPerf *perf,
+                                         BdrvRequestFlags write_flags,
+                                         BlockCopyState **bcs,
+                                         Error **errp)
+{
+    ERRP_GUARD();
+    int ret;
+    BDRVBackupTopState *state;
+    BlockDriverState *top;
+    bool appended = false;
+
+    assert(source->total_sectors == target->total_sectors);
+
+    top = bdrv_new_open_driver(&bdrv_backup_top_filter, filter_node_name,
+                               BDRV_O_RDWR, errp);
+    if (!top) {
+        return NULL;
+    }
+
+    state = top->opaque;
+    top->total_sectors = source->total_sectors;
+    top->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+            (BDRV_REQ_FUA & source->supported_write_flags);
+    top->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+            ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+             source->supported_zero_flags);
+
+    bdrv_ref(target);
+    state->target = bdrv_attach_child(top, target, "target", &child_of_bds,
+                                      BDRV_CHILD_DATA, errp);
+    if (!state->target) {
+        bdrv_unref(target);
+        bdrv_unref(top);
+        return NULL;
+    }
+
+    bdrv_drained_begin(source);
+
+    ret = bdrv_append(top, source, errp);
+    if (ret < 0) {
+        error_prepend(errp, "Cannot append backup-top filter: ");
+        goto fail;
+    }
+    appended = true;
+
+    state->cluster_size = cluster_size;
+    state->bcs = block_copy_state_new(top->backing, state->target,
+                                      cluster_size, perf->use_copy_range,
+                                      write_flags, errp);
+    if (!state->bcs) {
+        error_prepend(errp, "Cannot create block-copy-state: ");
+        goto fail;
+    }
+    *bcs = state->bcs;
+
+    bdrv_drained_end(source);
+
+    return top;
+
+fail:
+    if (appended) {
+        bdrv_backup_top_drop(top);
+    } else {
+        bdrv_unref(top);
+    }
+
+    bdrv_drained_end(source);
+
+    return NULL;
+}
+
+void bdrv_backup_top_drop(BlockDriverState *bs)
+{
+    BDRVBackupTopState *s = bs->opaque;
+
+    bdrv_drop_filter(bs, &error_abort);
+
+    block_copy_state_free(s->bcs);
+
+    bdrv_unref(bs);
+}
--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
@ -1,10 +1,10 @@
 /*
- * copy-before-write filter driver
+ * backup-top filter driver
 *
 * The driver performs Copy-Before-Write (CBW) operation: it is injected above
 * some node, and before each write it copies _old_ data to the target node.
 *
- * Copyright (c) 2018-2021 Virtuozzo International GmbH.
+ * Copyright (c) 2018-2019 Virtuozzo International GmbH.
 *
 * Author:
 *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
@ -23,17 +23,20 @@
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

-#ifndef COPY_BEFORE_WRITE_H
-#define COPY_BEFORE_WRITE_H
+#ifndef BACKUP_TOP_H
+#define BACKUP_TOP_H

 #include "block/block_int.h"
 #include "block/block-copy.h"

-BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
-                                  BlockDriverState *target,
-                                  const char *filter_node_name,
-                                  BlockCopyState **bcs,
-                                  Error **errp);
-void bdrv_cbw_drop(BlockDriverState *bs);
+BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
+                                         BlockDriverState *target,
+                                         const char *filter_node_name,
+                                         uint64_t cluster_size,
+                                         BackupPerf *perf,
+                                         BdrvRequestFlags write_flags,
+                                         BlockCopyState **bcs,
+                                         Error **errp);
+void bdrv_backup_top_drop(BlockDriverState *bs);

-#endif /* COPY_BEFORE_WRITE_H */
+#endif /* BACKUP_TOP_H */
--- a/block/backup.c
+++ b/block/backup.c
@ -27,11 +27,13 @@
 #include "qemu/bitmap.h"
 #include "qemu/error-report.h"

-#include "block/copy-before-write.h"
+#include "block/backup-top.h"
+
+#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)

 typedef struct BackupBlockJob {
    BlockJob common;
-    BlockDriverState *cbw;
+    BlockDriverState *backup_top;
    BlockDriverState *source_bs;
    BlockDriverState *target_bs;

@ -102,7 +104,7 @@ static void backup_clean(Job *job)
 {
    BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
    block_job_remove_all_bdrv(&s->common);
-    bdrv_cbw_drop(s->cbw);
+    bdrv_backup_top_drop(s->backup_top);
 }

 void backup_do_checkpoint(BlockJob *job, Error **errp)
@ -233,16 +235,18 @@ static void backup_init_bcs_bitmap(BackupBlockJob *job)
    BdrvDirtyBitmap *bcs_bitmap = block_copy_dirty_bitmap(job->bcs);

    if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
-        bdrv_clear_dirty_bitmap(bcs_bitmap, NULL);
        ret = bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap,
                                               NULL, true);
        assert(ret);
-    } else if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
-        /*
-         * We can't hog the coroutine to initialize this thoroughly.
-         * Set a flag and resume work when we are able to yield safely.
-         */
-        block_copy_set_skip_unallocated(job->bcs, true);
+    } else {
+        if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
+            /*
+             * We can't hog the coroutine to initialize this thoroughly.
+             * Set a flag and resume work when we are able to yield safely.
+             */
+            block_copy_set_skip_unallocated(job->bcs, true);
+        }
+        bdrv_set_dirty_bitmap(bcs_bitmap, 0, job->len);
    }

    estimate = bdrv_get_dirty_count(bcs_bitmap);
@ -327,12 +331,11 @@ static void coroutine_fn backup_set_speed(BlockJob *job, int64_t speed)
    }
 }

-static bool backup_cancel(Job *job, bool force)
+static void backup_cancel(Job *job, bool force)
 {
    BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);

    bdrv_cancel_in_flight(s->target_bs);
-    return true;
 }

 static const BlockJobDriver backup_job_driver = {
@ -351,6 +354,43 @@ static const BlockJobDriver backup_job_driver = {
    .set_speed = backup_set_speed,
 };

+static int64_t backup_calculate_cluster_size(BlockDriverState *target,
+                                             Error **errp)
+{
+    int ret;
+    BlockDriverInfo bdi;
+    bool target_does_cow = bdrv_backing_chain_next(target);
+
+    /*
+     * If there is no backing file on the target, we cannot rely on COW if our
+     * backup cluster size is smaller than the target cluster size. Even for
+     * targets with a backing file, try to avoid COW if possible.
+     */
+    ret = bdrv_get_info(target, &bdi);
+    if (ret == -ENOTSUP && !target_does_cow) {
+        /* Cluster size is not defined */
+        warn_report("The target block device doesn't provide "
+                    "information about the block size and it doesn't have a "
+                    "backing file. The default block size of %u bytes is "
+                    "used. If the actual block size of the target exceeds "
+                    "this default, the backup may be unusable",
+                    BACKUP_CLUSTER_SIZE_DEFAULT);
+        return BACKUP_CLUSTER_SIZE_DEFAULT;
+    } else if (ret < 0 && !target_does_cow) {
+        error_setg_errno(errp, -ret,
+            "Couldn't determine the cluster size of the target image, "
+            "which has no backing file");
+        error_append_hint(errp,
+            "Aborting, since this may create an unusable destination image\n");
+        return ret;
+    } else if (ret < 0 && target_does_cow) {
+        /* Not fatal; just trudge on ahead. */
+        return BACKUP_CLUSTER_SIZE_DEFAULT;
+    }
+
+    return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
+}
+
 BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *target, int64_t speed,
                  MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
@ -367,7 +407,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
    int64_t len, target_len;
    BackupBlockJob *job = NULL;
    int64_t cluster_size;
-    BlockDriverState *cbw = NULL;
+    BdrvRequestFlags write_flags;
+    BlockDriverState *backup_top = NULL;
    BlockCopyState *bcs = NULL;

    assert(bs);
@ -408,8 +449,13 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
        return NULL;
    }

-    if (perf->max_workers < 1 || perf->max_workers > INT_MAX) {
-        error_setg(errp, "max-workers must be between 1 and %d", INT_MAX);
+    cluster_size = backup_calculate_cluster_size(target, errp);
+    if (cluster_size < 0) {
+        goto error;
+    }
+
+    if (perf->max_workers < 1) {
+        error_setg(errp, "max-workers must be greater than zero");
        return NULL;
    }

@ -419,6 +465,13 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
        return NULL;
    }

+    if (perf->max_chunk && perf->max_chunk < cluster_size) {
+        error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup "
+                   "cluster size (%" PRIi64 ")", perf->max_chunk, cluster_size);
+        return NULL;
+    }
+
+
    if (sync_bitmap) {
        /* If we need to write to this bitmap, check that we can: */
        if (bitmap_mode != BITMAP_SYNC_MODE_NEVER &&
@ -451,28 +504,39 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
        goto error;
    }

-    cbw = bdrv_cbw_append(bs, target, filter_node_name, &bcs, errp);
-    if (!cbw) {
-        goto error;
-    }
+    /*
+     * If source is in backing chain of target assume that target is going to be
+     * used for "image fleecing", i.e. it should represent a kind of snapshot of
+     * source at backup-start point in time. And target is going to be read by
+     * somebody (for example, used as NBD export) during backup job.
+     *
+     * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid
+     * intersection of backup writes and third party reads from target,
+     * otherwise reading from target we may occasionally read already updated by
+     * guest data.
+     *
+     * For more information see commit f8d59dfb40bb and test
+     * tests/qemu-iotests/222
+     */
+    write_flags = (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) |
+                  (compress ? BDRV_REQ_WRITE_COMPRESSED : 0),

-    cluster_size = block_copy_cluster_size(bcs);
-
-    if (perf->max_chunk && perf->max_chunk < cluster_size) {
-        error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup "
-                   "cluster size (%" PRIi64 ")", perf->max_chunk, cluster_size);
+    backup_top = bdrv_backup_top_append(bs, target, filter_node_name,
+                                        cluster_size, perf,
+                                        write_flags, &bcs, errp);
+    if (!backup_top) {
        goto error;
    }

    /* job->len is fixed, so we can't allow resize */
-    job = block_job_create(job_id, &backup_job_driver, txn, cbw,
+    job = block_job_create(job_id, &backup_job_driver, txn, backup_top,
                           0, BLK_PERM_ALL,
                           speed, creation_flags, cb, opaque, errp);
    if (!job) {
        goto error;
    }

-    job->cbw = cbw;
+    job->backup_top = backup_top;
    job->source_bs = bs;
    job->target_bs = target;
    job->on_source_error = on_source_error;
@ -485,11 +549,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
    job->len = len;
    job->perf = *perf;

-    block_copy_set_copy_opts(bcs, perf->use_copy_range, compress);
    block_copy_set_progress_meter(bcs, &job->common.job.progress);
    block_copy_set_speed(bcs, speed);

-    /* Required permissions are taken by copy-before-write filter target */
+    /* Required permissions are already taken by backup-top target */
    block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
                       &error_abort);

@ -499,8 +562,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
    if (sync_bitmap) {
        bdrv_reclaim_dirty_bitmap(sync_bitmap, NULL);
    }
-    if (cbw) {
-        bdrv_cbw_drop(cbw);
+    if (backup_top) {
+        bdrv_backup_top_drop(backup_top);
    }

    return NULL;
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@ -631,8 +631,8 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 }

 static int coroutine_fn
-blkdebug_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
-                   QEMUIOVector *qiov, BdrvRequestFlags flags)
+blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                   QEMUIOVector *qiov, int flags)
 {
    int err;

@ -652,8 +652,8 @@ blkdebug_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
 }

 static int coroutine_fn
-blkdebug_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
-                    QEMUIOVector *qiov, BdrvRequestFlags flags)
+blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                    QEMUIOVector *qiov, int flags)
 {
    int err;

@ -684,7 +684,7 @@ static int blkdebug_co_flush(BlockDriverState *bs)
 }

 static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int64_t bytes,
+                                                  int64_t offset, int bytes,
                                                  BdrvRequestFlags flags)
 {
    uint32_t align = MAX(bs->bl.request_alignment,
@ -717,7 +717,7 @@ static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
 }

 static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes)
+                                             int64_t offset, int bytes)
 {
    uint32_t align = bs->bl.pdiscard_alignment;
    int err;
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@ -301,8 +301,8 @@ static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp)
 }

 static int coroutine_fn
-blk_log_writes_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
-                         QEMUIOVector *qiov, BdrvRequestFlags flags)
+blk_log_writes_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                         QEMUIOVector *qiov, int flags)
 {
    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
@ -460,16 +460,16 @@ blk_log_writes_co_do_file_pdiscard(BlkLogWritesFileReq *fr)
 }

 static int coroutine_fn
-blk_log_writes_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
-                          QEMUIOVector *qiov, BdrvRequestFlags flags)
+blk_log_writes_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                          QEMUIOVector *qiov, int flags)
 {
    return blk_log_writes_co_log(bs, offset, bytes, qiov, flags,
                                 blk_log_writes_co_do_file_pwritev, 0, false);
 }

 static int coroutine_fn
-blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-                                int64_t bytes, BdrvRequestFlags flags)
+blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
+                                BdrvRequestFlags flags)
 {
    return blk_log_writes_co_log(bs, offset, bytes, NULL, flags,
                                 blk_log_writes_co_do_file_pwrite_zeroes, 0,
@ -484,9 +484,9 @@ static int coroutine_fn blk_log_writes_co_flush_to_disk(BlockDriverState *bs)
 }

 static int coroutine_fn
-blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
+blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
 {
-    return blk_log_writes_co_log(bs, offset, bytes, NULL, 0,
+    return blk_log_writes_co_log(bs, offset, count, NULL, 0,
                                 blk_log_writes_co_do_file_pdiscard,
                                 LOG_DISCARD_FLAG, false);
 }
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@ -72,7 +72,7 @@ static void block_request_create(uint64_t reqid, BlockDriverState *bs,
 }

 static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    uint64_t reqid = blkreplay_next_id();
    int ret = bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
@ -83,7 +83,7 @@ static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
 }

 static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    uint64_t reqid = blkreplay_next_id();
    int ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
@ -94,7 +94,7 @@ static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
 }

 static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+    int64_t offset, int bytes, BdrvRequestFlags flags)
 {
    uint64_t reqid = blkreplay_next_id();
    int ret = bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
@ -105,7 +105,7 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
 }

 static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
-                                              int64_t offset, int64_t bytes)
+                                              int64_t offset, int bytes)
 {
    uint64_t reqid = blkreplay_next_id();
    int ret = bdrv_co_pdiscard(bs->file, offset, bytes);
--- a/block/blkverify.c
+++ b/block/blkverify.c
@ -221,8 +221,8 @@ blkverify_co_prwv(BlockDriverState *bs, BlkverifyRequest *r, uint64_t offset,
 }

 static int coroutine_fn
-blkverify_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
-                    QEMUIOVector *qiov, BdrvRequestFlags flags)
+blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                    QEMUIOVector *qiov, int flags)
 {
    BlkverifyRequest r;
    QEMUIOVector raw_qiov;
@ -250,8 +250,8 @@ blkverify_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
 }

 static int coroutine_fn
-blkverify_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
-                     QEMUIOVector *qiov, BdrvRequestFlags flags)
+blkverify_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                     QEMUIOVector *qiov, int flags)
 {
    BlkverifyRequest r;
    return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true);
--- a/block/block-backend.c
+++ b/block/block-backend.c
@ -14,7 +14,6 @@
 #include "sysemu/block-backend.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
-#include "block/coroutines.h"
 #include "block/throttle-groups.h"
 #include "hw/qdev-core.h"
 #include "sysemu/blockdev.h"
@ -190,7 +189,6 @@ static void blk_root_activate(BdrvChild *child, Error **errp)
 {
    BlockBackend *blk = child->opaque;
    Error *local_err = NULL;
-    uint64_t saved_shared_perm;

    if (!blk->disable_perm) {
        return;
@ -198,22 +196,12 @@ static void blk_root_activate(BdrvChild *child, Error **errp)

    blk->disable_perm = false;

-    /*
-     * blk->shared_perm contains the permissions we want to share once
-     * migration is really completely done.  For now, we need to share
-     * all; but we also need to retain blk->shared_perm, which is
-     * overwritten by a successful blk_set_perm() call.  Save it and
-     * restore it below.
-     */
-    saved_shared_perm = blk->shared_perm;
-
    blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        blk->disable_perm = true;
        return;
    }
-    blk->shared_perm = saved_shared_perm;

    if (runstate_check(RUN_STATE_INMIGRATE)) {
        /* Activation can happen when migration process is still active, for
@ -833,22 +821,16 @@ BlockBackend *blk_by_public(BlockBackendPublic *public)
 void blk_remove_bs(BlockBackend *blk)
 {
    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
+    BlockDriverState *bs;
    BdrvChild *root;

    notifier_list_notify(&blk->remove_bs_notifiers, blk);
    if (tgm->throttle_state) {
-        BlockDriverState *bs = blk_bs(blk);
-
-        /*
-         * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for
-         * example, if a temporary filter node is removed by a blockjob.
-         */
-        bdrv_ref(bs);
+        bs = blk_bs(blk);
        bdrv_drained_begin(bs);
        throttle_group_detach_aio_context(tgm);
        throttle_group_attach_aio_context(tgm, qemu_get_aio_context());
        bdrv_drained_end(bs);
-        bdrv_unref(bs);
    }

    blk_update_root_state(blk);
@ -887,14 +869,6 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
    return 0;
 }

-/*
- * Change BlockDriverState associated with @blk.
- */
-int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp)
-{
-    return bdrv_replace_child_bs(blk->root, new_bs, errp);
-}
-
 /*
 * Sets the permission bitmasks that the user of the BlockBackend needs.
 */
@ -1179,11 +1153,11 @@ void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
 }

 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
-                                  int64_t bytes)
+                                  size_t size)
 {
    int64_t len;

-    if (bytes < 0) {
+    if (size > INT_MAX) {
        return -EIO;
    }

@ -1201,7 +1175,7 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
            return len;
        }

-        if (offset > len || len - offset < bytes) {
+        if (offset > len || len - offset < size) {
            return -EIO;
        }
    }
@ -1222,9 +1196,9 @@ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
 }

 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
-int coroutine_fn
-blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
-                 QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn
+blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes,
+              QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
    int ret;
    BlockDriverState *bs;
@ -1254,23 +1228,23 @@ blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
 }

 int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
-                               int64_t bytes, QEMUIOVector *qiov,
+                               unsigned int bytes, QEMUIOVector *qiov,
                               BdrvRequestFlags flags)
 {
    int ret;

    blk_inc_in_flight(blk);
-    ret = blk_co_do_preadv(blk, offset, bytes, qiov, flags);
+    ret = blk_do_preadv(blk, offset, bytes, qiov, flags);
    blk_dec_in_flight(blk);

    return ret;
 }

 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
-int coroutine_fn
-blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
-                       QEMUIOVector *qiov, size_t qiov_offset,
-                       BdrvRequestFlags flags)
+static int coroutine_fn
+blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes,
+                    QEMUIOVector *qiov, size_t qiov_offset,
+                    BdrvRequestFlags flags)
 {
    int ret;
    BlockDriverState *bs;
@ -1304,33 +1278,12 @@ blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
 }

 int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
-                                     int64_t bytes,
+                                     unsigned int bytes,
                                     QEMUIOVector *qiov, size_t qiov_offset,
                                     BdrvRequestFlags flags)
 {
    int ret;

-    blk_inc_in_flight(blk);
-    ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
-    blk_dec_in_flight(blk);
-
-    return ret;
-}
-
-int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
-                                int64_t bytes, QEMUIOVector *qiov,
-                                BdrvRequestFlags flags)
-{
-    return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
-}
-
-static int coroutine_fn blk_pwritev_part(BlockBackend *blk, int64_t offset,
-                                         int64_t bytes,
-                                         QEMUIOVector *qiov, size_t qiov_offset,
-                                         BdrvRequestFlags flags)
-{
-    int ret;
-
    blk_inc_in_flight(blk);
    ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
    blk_dec_in_flight(blk);
@ -1338,6 +1291,13 @@ static int coroutine_fn blk_pwritev_part(BlockBackend *blk, int64_t offset,
    return ret;
 }

+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+                                unsigned int bytes, QEMUIOVector *qiov,
+                                BdrvRequestFlags flags)
+{
+    return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
+}
+
 typedef struct BlkRwCo {
    BlockBackend *blk;
    int64_t offset;
@ -1346,11 +1306,58 @@ typedef struct BlkRwCo {
    BdrvRequestFlags flags;
 } BlkRwCo;

-int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                      int64_t bytes, BdrvRequestFlags flags)
+static void blk_read_entry(void *opaque)
 {
-    return blk_pwritev_part(blk, offset, bytes, NULL, 0,
-                            flags | BDRV_REQ_ZERO_WRITE);
+    BlkRwCo *rwco = opaque;
+    QEMUIOVector *qiov = rwco->iobuf;
+
+    rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size,
+                              qiov, rwco->flags);
+    aio_wait_kick();
+}
+
+static void blk_write_entry(void *opaque)
+{
+    BlkRwCo *rwco = opaque;
+    QEMUIOVector *qiov = rwco->iobuf;
+
+    rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size,
+                                    qiov, 0, rwco->flags);
+    aio_wait_kick();
+}
+
+static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
+                   int64_t bytes, CoroutineEntry co_entry,
+                   BdrvRequestFlags flags)
+{
+    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+    BlkRwCo rwco = {
+        .blk    = blk,
+        .offset = offset,
+        .iobuf  = &qiov,
+        .flags  = flags,
+        .ret    = NOT_DONE,
+    };
+
+    blk_inc_in_flight(blk);
+    if (qemu_in_coroutine()) {
+        /* Fast-path if already in coroutine context */
+        co_entry(&rwco);
+    } else {
+        Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
+        bdrv_coroutine_enter(blk_bs(blk), co);
+        BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
+    }
+    blk_dec_in_flight(blk);
+
+    return rwco.ret;
+}
+
+int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                      int bytes, BdrvRequestFlags flags)
+{
+    return blk_prw(blk, offset, NULL, bytes, blk_write_entry,
+                   flags | BDRV_REQ_ZERO_WRITE);
 }

 int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
@ -1397,7 +1404,7 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
 typedef struct BlkAioEmAIOCB {
    BlockAIOCB common;
    BlkRwCo rwco;
-    int64_t bytes;
+    int bytes;
    bool has_returned;
 } BlkAioEmAIOCB;

@ -1429,8 +1436,7 @@ static void blk_aio_complete_bh(void *opaque)
    blk_aio_complete(acb);
 }

-static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset,
-                                int64_t bytes,
+static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
                                void *iobuf, CoroutineEntry co_entry,
                                BdrvRequestFlags flags,
                                BlockCompletionFunc *cb, void *opaque)
@ -1469,8 +1475,8 @@ static void blk_aio_read_entry(void *opaque)
    QEMUIOVector *qiov = rwco->iobuf;

    assert(qiov->size == acb->bytes);
-    rwco->ret = blk_co_do_preadv(rwco->blk, rwco->offset, acb->bytes,
-                                 qiov, rwco->flags);
+    rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes,
+                              qiov, rwco->flags);
    blk_aio_complete(acb);
 }

@ -1481,40 +1487,37 @@ static void blk_aio_write_entry(void *opaque)
    QEMUIOVector *qiov = rwco->iobuf;

    assert(!qiov || qiov->size == acb->bytes);
-    rwco->ret = blk_co_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
-                                       qiov, 0, rwco->flags);
+    rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
+                                    qiov, 0, rwco->flags);
    blk_aio_complete(acb);
 }

 BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                                  int64_t bytes, BdrvRequestFlags flags,
+                                  int count, BdrvRequestFlags flags,
                                  BlockCompletionFunc *cb, void *opaque)
 {
-    return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_write_entry,
+    return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
                        flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
 }

-int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes)
+int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
 {
-    int ret;
-    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
-    blk_inc_in_flight(blk);
-    ret = blk_do_preadv(blk, offset, bytes, &qiov, 0);
-    blk_dec_in_flight(blk);
-
-    return ret < 0 ? ret : bytes;
+    int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
+    if (ret < 0) {
+        return ret;
+    }
+    return count;
 }

-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
+int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
               BdrvRequestFlags flags)
 {
-    int ret;
-    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
-    ret = blk_pwritev_part(blk, offset, bytes, &qiov, 0, flags);
-
-    return ret < 0 ? ret : bytes;
+    int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
+                      flags);
+    if (ret < 0) {
+        return ret;
+    }
+    return count;
 }

 int64_t blk_getlength(BlockBackend *blk)
@ -1548,7 +1551,6 @@ BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
                           QEMUIOVector *qiov, BdrvRequestFlags flags,
                           BlockCompletionFunc *cb, void *opaque)
 {
-    assert((uint64_t)qiov->size <= INT64_MAX);
    return blk_aio_prwv(blk, offset, qiov->size, qiov,
                        blk_aio_read_entry, flags, cb, opaque);
 }
@ -1557,7 +1559,6 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
                            QEMUIOVector *qiov, BdrvRequestFlags flags,
                            BlockCompletionFunc *cb, void *opaque)
 {
-    assert((uint64_t)qiov->size <= INT64_MAX);
    return blk_aio_prwv(blk, offset, qiov->size, qiov,
                        blk_aio_write_entry, flags, cb, opaque);
 }
@ -1573,8 +1574,8 @@ void blk_aio_cancel_async(BlockAIOCB *acb)
 }

 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
-int coroutine_fn
-blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
+static int coroutine_fn
+blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
    blk_wait_while_drained(blk);

@ -1585,15 +1586,18 @@ blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
    return bdrv_co_ioctl(blk_bs(blk), req, buf);
 }

+static void blk_ioctl_entry(void *opaque)
+{
+    BlkRwCo *rwco = opaque;
+    QEMUIOVector *qiov = rwco->iobuf;
+
+    rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base);
+    aio_wait_kick();
+}
+
 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
-    int ret;
-
-    blk_inc_in_flight(blk);
-    ret = blk_do_ioctl(blk, req, buf);
-    blk_dec_in_flight(blk);
-
-    return ret;
+    return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0);
 }

 static void blk_aio_ioctl_entry(void *opaque)
@ -1601,7 +1605,7 @@ static void blk_aio_ioctl_entry(void *opaque)
    BlkAioEmAIOCB *acb = opaque;
    BlkRwCo *rwco = &acb->rwco;

-    rwco->ret = blk_co_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
+    rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);

    blk_aio_complete(acb);
 }
@ -1613,8 +1617,8 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
 }

 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
-int coroutine_fn
-blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
+static int coroutine_fn
+blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
 {
    int ret;

@ -1633,31 +1637,19 @@ static void blk_aio_pdiscard_entry(void *opaque)
    BlkAioEmAIOCB *acb = opaque;
    BlkRwCo *rwco = &acb->rwco;

-    rwco->ret = blk_co_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
+    rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
    blk_aio_complete(acb);
 }

 BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
-                             int64_t offset, int64_t bytes,
+                             int64_t offset, int bytes,
                             BlockCompletionFunc *cb, void *opaque)
 {
    return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
                        cb, opaque);
 }

-int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
-                                 int64_t bytes)
-{
-    int ret;
-
-    blk_inc_in_flight(blk);
-    ret = blk_co_do_pdiscard(blk, offset, bytes);
-    blk_dec_in_flight(blk);
-
-    return ret;
-}
-
-int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
+int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
 {
    int ret;

@ -1668,8 +1660,22 @@ int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
    return ret;
 }

+static void blk_pdiscard_entry(void *opaque)
+{
+    BlkRwCo *rwco = opaque;
+    QEMUIOVector *qiov = rwco->iobuf;
+
+    rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size);
+    aio_wait_kick();
+}
+
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+{
+    return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
+}
+
 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
-int coroutine_fn blk_co_do_flush(BlockBackend *blk)
+static int coroutine_fn blk_do_flush(BlockBackend *blk)
 {
    blk_wait_while_drained(blk);

@ -1685,7 +1691,7 @@ static void blk_aio_flush_entry(void *opaque)
    BlkAioEmAIOCB *acb = opaque;
    BlkRwCo *rwco = &acb->rwco;

-    rwco->ret = blk_co_do_flush(rwco->blk);
+    rwco->ret = blk_do_flush(rwco->blk);
    blk_aio_complete(acb);
 }

@ -1700,21 +1706,22 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
    int ret;

    blk_inc_in_flight(blk);
-    ret = blk_co_do_flush(blk);
+    ret = blk_do_flush(blk);
    blk_dec_in_flight(blk);

    return ret;
 }

+static void blk_flush_entry(void *opaque)
+{
+    BlkRwCo *rwco = opaque;
+    rwco->ret = blk_do_flush(rwco->blk);
+    aio_wait_kick();
+}
+
 int blk_flush(BlockBackend *blk)
 {
-    int ret;
-
-    blk_inc_in_flight(blk);
-    ret = blk_do_flush(blk);
-    blk_dec_in_flight(blk);
-
-    return ret;
+    return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0);
 }

 void blk_drain(BlockBackend *blk)
@ -1722,7 +1729,6 @@ void blk_drain(BlockBackend *blk)
    BlockDriverState *bs = blk_bs(blk);

    if (bs) {
-        bdrv_ref(bs);
        bdrv_drained_begin(bs);
    }

@ -1732,7 +1738,6 @@ void blk_drain(BlockBackend *blk)

    if (bs) {
        bdrv_drained_end(bs);
-        bdrv_unref(bs);
    }
 }

@ -2063,13 +2068,10 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
    int ret;

    if (bs) {
-        bdrv_ref(bs);
-
        if (update_root_node) {
            ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root,
                                                 errp);
            if (ret < 0) {
-                bdrv_unref(bs);
                return ret;
            }
        }
@ -2079,8 +2081,6 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
            throttle_group_attach_aio_context(tgm, new_context);
            bdrv_drained_end(bs);
        }
-
-        bdrv_unref(bs);
    }

    blk->ctx = new_context;
@ -2206,18 +2206,17 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
 }

 int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                                      int64_t bytes, BdrvRequestFlags flags)
+                                      int bytes, BdrvRequestFlags flags)
 {
    return blk_co_pwritev(blk, offset, bytes, NULL,
                          flags | BDRV_REQ_ZERO_WRITE);
 }

 int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
-                          int64_t bytes)
+                          int count)
 {
-    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-    return blk_pwritev_part(blk, offset, bytes, &qiov, 0,
-                            BDRV_REQ_WRITE_COMPRESSED);
+    return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
+                   BDRV_REQ_WRITE_COMPRESSED);
 }

 int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
@ -2350,13 +2349,11 @@ void blk_io_limits_disable(BlockBackend *blk)
    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
    assert(tgm->throttle_state);
    if (bs) {
-        bdrv_ref(bs);
        bdrv_drained_begin(bs);
    }
    throttle_group_unregister_tgm(tgm);
    if (bs) {
        bdrv_drained_end(bs);
-        bdrv_unref(bs);
    }
 }

@ -2447,7 +2444,7 @@ void blk_unregister_buf(BlockBackend *blk, void *host)

 int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
                                   BlockBackend *blk_out, int64_t off_out,
-                                   int64_t bytes, BdrvRequestFlags read_flags,
+                                   int bytes, BdrvRequestFlags read_flags,
                                   BdrvRequestFlags write_flags)
 {
    int r;
--- a/block/block-copy.c
+++ b/block/block-copy.c
@ -21,14 +21,12 @@
 #include "qemu/units.h"
 #include "qemu/coroutine.h"
 #include "block/aio_task.h"
-#include "qemu/error-report.h"

 #define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
 #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
 #define BLOCK_COPY_MAX_MEM (128 * MiB)
 #define BLOCK_COPY_MAX_WORKERS 64
 #define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
-#define BLOCK_COPY_CLUSTER_SIZE_DEFAULT (1 << 16)

 typedef enum {
    COPY_READ_WRITE_CLUSTER,
@ -292,11 +290,9 @@ static void coroutine_fn block_copy_task_end(BlockCopyTask *task, int ret)
        bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->offset, task->bytes);
    }
    QLIST_REMOVE(task, list);
-    if (task->s->progress) {
-        progress_set_remaining(task->s->progress,
-                               bdrv_get_dirty_count(task->s->copy_bitmap) +
-                               task->s->in_flight_bytes);
-    }
+    progress_set_remaining(task->s->progress,
+                           bdrv_get_dirty_count(task->s->copy_bitmap) +
+                           task->s->in_flight_bytes);
    qemu_co_queue_restart_all(&task->wait_queue);
 }

@ -319,14 +315,35 @@ static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target)
                                     target->bs->bl.max_transfer));
 }

-void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
-                              bool compress)
+BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                     int64_t cluster_size, bool use_copy_range,
+                                     BdrvRequestFlags write_flags, Error **errp)
 {
-    /* Keep BDRV_REQ_SERIALISING set (or not set) in block_copy_state_new() */
-    s->write_flags = (s->write_flags & BDRV_REQ_SERIALISING) |
-        (compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
+    BlockCopyState *s;
+    BdrvDirtyBitmap *copy_bitmap;

-    if (s->max_transfer < s->cluster_size) {
+    copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
+                                           errp);
+    if (!copy_bitmap) {
+        return NULL;
+    }
+    bdrv_disable_dirty_bitmap(copy_bitmap);
+
+    s = g_new(BlockCopyState, 1);
+    *s = (BlockCopyState) {
+        .source = source,
+        .target = target,
+        .copy_bitmap = copy_bitmap,
+        .cluster_size = cluster_size,
+        .len = bdrv_dirty_bitmap_size(copy_bitmap),
+        .write_flags = write_flags,
+        .mem = shres_create(BLOCK_COPY_MAX_MEM),
+        .max_transfer = QEMU_ALIGN_DOWN(
+                                    block_copy_max_transfer(source, target),
+                                    cluster_size),
+    };
+
+    if (s->max_transfer < cluster_size) {
        /*
         * copy_range does not respect max_transfer. We don't want to bother
         * with requests smaller than block-copy cluster size, so fallback to
@ -334,7 +351,7 @@ void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
         * behalf).
         */
        s->method = COPY_READ_WRITE_CLUSTER;
-    } else if (compress) {
+    } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
        /* Compression supports only cluster-size writes and no copy-range. */
        s->method = COPY_READ_WRITE_CLUSTER;
    } else {
@ -344,96 +361,6 @@ void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
         */
        s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
    }
-}
-
-static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
-                                                 Error **errp)
-{
-    int ret;
-    BlockDriverInfo bdi;
-    bool target_does_cow = bdrv_backing_chain_next(target);
-
-    /*
-     * If there is no backing file on the target, we cannot rely on COW if our
-     * backup cluster size is smaller than the target cluster size. Even for
-     * targets with a backing file, try to avoid COW if possible.
-     */
-    ret = bdrv_get_info(target, &bdi);
-    if (ret == -ENOTSUP && !target_does_cow) {
-        /* Cluster size is not defined */
-        warn_report("The target block device doesn't provide "
-                    "information about the block size and it doesn't have a "
-                    "backing file. The default block size of %u bytes is "
-                    "used. If the actual block size of the target exceeds "
-                    "this default, the backup may be unusable",
-                    BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
-        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
-    } else if (ret < 0 && !target_does_cow) {
-        error_setg_errno(errp, -ret,
-            "Couldn't determine the cluster size of the target image, "
-            "which has no backing file");
-        error_append_hint(errp,
-            "Aborting, since this may create an unusable destination image\n");
-        return ret;
-    } else if (ret < 0 && target_does_cow) {
-        /* Not fatal; just trudge on ahead. */
-        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
-    }
-
-    return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
-}
-
-BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-                                     Error **errp)
-{
-    BlockCopyState *s;
-    int64_t cluster_size;
-    BdrvDirtyBitmap *copy_bitmap;
-    bool is_fleecing;
-
-    cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
-    if (cluster_size < 0) {
-        return NULL;
-    }
-
-    copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
-                                           errp);
-    if (!copy_bitmap) {
-        return NULL;
-    }
-    bdrv_disable_dirty_bitmap(copy_bitmap);
-
-    /*
-     * If source is in backing chain of target assume that target is going to be
-     * used for "image fleecing", i.e. it should represent a kind of snapshot of
-     * source at backup-start point in time. And target is going to be read by
-     * somebody (for example, used as NBD export) during backup job.
-     *
-     * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid
-     * intersection of backup writes and third party reads from target,
-     * otherwise reading from target we may occasionally read already updated by
-     * guest data.
-     *
-     * For more information see commit f8d59dfb40bb and test
-     * tests/qemu-iotests/222
-     */
-    is_fleecing = bdrv_chain_contains(target->bs, source->bs);
-
-    s = g_new(BlockCopyState, 1);
-    *s = (BlockCopyState) {
-        .source = source,
-        .target = target,
-        .copy_bitmap = copy_bitmap,
-        .cluster_size = cluster_size,
-        .len = bdrv_dirty_bitmap_size(copy_bitmap),
-        .write_flags = (is_fleecing ? BDRV_REQ_SERIALISING : 0),
-        .mem = shres_create(BLOCK_COPY_MAX_MEM),
-        .max_transfer = QEMU_ALIGN_DOWN(
-                                    block_copy_max_transfer(source, target),
-                                    cluster_size),
-    };
-
-    block_copy_set_copy_opts(s, false, false);

    ratelimit_init(&s->rate_limit);
    qemu_co_mutex_init(&s->lock);
@ -595,7 +522,7 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
                t->call_state->ret = ret;
                t->call_state->error_is_read = error_is_read;
            }
-        } else if (s->progress) {
+        } else {
            progress_work_done(s->progress, t->bytes);
        }
    }
@ -701,11 +628,9 @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
    if (!ret) {
        qemu_co_mutex_lock(&s->lock);
        bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
-        if (s->progress) {
-            progress_set_remaining(s->progress,
-                                   bdrv_get_dirty_count(s->copy_bitmap) +
-                                   s->in_flight_bytes);
-        }
+        progress_set_remaining(s->progress,
+                               bdrv_get_dirty_count(s->copy_bitmap) +
+                               s->in_flight_bytes);
        qemu_co_mutex_unlock(&s->lock);
    }

@ -1008,11 +933,6 @@ BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s)
    return s->copy_bitmap;
 }

-int64_t block_copy_cluster_size(BlockCopyState *s)
-{
-    return s->cluster_size;
-}
-
 void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
 {
    qatomic_set(&s->skip_unallocated, skip);
--- a/block/bochs.c
+++ b/block/bochs.c
@ -238,8 +238,8 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
 }

 static int coroutine_fn
-bochs_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
-                QEMUIOVector *qiov, BdrvRequestFlags flags)
+bochs_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                QEMUIOVector *qiov, int flags)
 {
    BDRVBochsState *s = bs->opaque;
    uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
--- a/block/cloop.c
+++ b/block/cloop.c
@ -245,8 +245,8 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num)
 }

 static int coroutine_fn
-cloop_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
-                QEMUIOVector *qiov, BdrvRequestFlags flags)
+cloop_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                QEMUIOVector *qiov, int flags)
 {
    BDRVCloopState *s = bs->opaque;
    uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
--- a/block/commit.c
+++ b/block/commit.c
@ -207,7 +207,7 @@ static const BlockJobDriver commit_job_driver = {
 };

 static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
 }
@ -370,6 +370,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    s->base = blk_new(s->common.job.aio_context,
                      base_perms,
                      BLK_PERM_CONSISTENT_READ
+                      | BLK_PERM_GRAPH_MOD
                      | BLK_PERM_WRITE_UNCHANGED);
    ret = blk_insert_bs(s->base, base, errp);
    if (ret < 0) {
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@ -1,257 +0,0 @@
-/*
- * copy-before-write filter driver
- *
- * The driver performs Copy-Before-Write (CBW) operation: it is injected above
- * some node, and before each write it copies _old_ data to the target node.
- *
- * Copyright (c) 2018-2021 Virtuozzo International GmbH.
- *
- * Author:
- *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-
-#include "sysemu/block-backend.h"
-#include "qemu/cutils.h"
-#include "qapi/error.h"
-#include "block/block_int.h"
-#include "block/qdict.h"
-#include "block/block-copy.h"
-
-#include "block/copy-before-write.h"
-
-typedef struct BDRVCopyBeforeWriteState {
-    BlockCopyState *bcs;
-    BdrvChild *target;
-} BDRVCopyBeforeWriteState;
-
-static coroutine_fn int cbw_co_preadv(
-        BlockDriverState *bs, int64_t offset, int64_t bytes,
-        QEMUIOVector *qiov, BdrvRequestFlags flags)
-{
-    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
-}
-
-static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, BdrvRequestFlags flags)
-{
-    BDRVCopyBeforeWriteState *s = bs->opaque;
-    uint64_t off, end;
-    int64_t cluster_size = block_copy_cluster_size(s->bcs);
-
-    if (flags & BDRV_REQ_WRITE_UNCHANGED) {
-        return 0;
-    }
-
-    off = QEMU_ALIGN_DOWN(offset, cluster_size);
-    end = QEMU_ALIGN_UP(offset + bytes, cluster_size);
-
-    return block_copy(s->bcs, off, end - off, true);
-}
-
-static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int64_t bytes)
-{
-    int ret = cbw_do_copy_before_write(bs, offset, bytes, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_co_pdiscard(bs->file, offset, bytes);
-}
-
-static int coroutine_fn cbw_co_pwrite_zeroes(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, BdrvRequestFlags flags)
-{
-    int ret = cbw_do_copy_before_write(bs, offset, bytes, flags);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
-}
-
-static coroutine_fn int cbw_co_pwritev(BlockDriverState *bs,
-                                       int64_t offset,
-                                       int64_t bytes,
-                                       QEMUIOVector *qiov,
-                                       BdrvRequestFlags flags)
-{
-    int ret = cbw_do_copy_before_write(bs, offset, bytes, flags);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
-}
-
-static int coroutine_fn cbw_co_flush(BlockDriverState *bs)
-{
-    if (!bs->file) {
-        return 0;
-    }
-
-    return bdrv_co_flush(bs->file->bs);
-}
-
-static void cbw_refresh_filename(BlockDriverState *bs)
-{
-    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
-            bs->file->bs->filename);
-}
-
-static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c,
-                           BdrvChildRole role,
-                           BlockReopenQueue *reopen_queue,
-                           uint64_t perm, uint64_t shared,
-                           uint64_t *nperm, uint64_t *nshared)
-{
-    if (!(role & BDRV_CHILD_FILTERED)) {
-        /*
-         * Target child
-         *
-         * Share write to target (child_file), to not interfere
-         * with guest writes to its disk which may be in target backing chain.
-         * Can't resize during a backup block job because we check the size
-         * only upfront.
-         */
-        *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
-        *nperm = BLK_PERM_WRITE;
-    } else {
-        /* Source child */
-        bdrv_default_perms(bs, c, role, reopen_queue,
-                           perm, shared, nperm, nshared);
-
-        if (!QLIST_EMPTY(&bs->parents)) {
-            if (perm & BLK_PERM_WRITE) {
-                *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
-            }
-            *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
-        }
-    }
-}
-
-static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
-{
-    BDRVCopyBeforeWriteState *s = bs->opaque;
-    BdrvDirtyBitmap *copy_bitmap;
-
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
-    }
-
-    s->target = bdrv_open_child(NULL, options, "target", bs, &child_of_bds,
-                                BDRV_CHILD_DATA, false, errp);
-    if (!s->target) {
-        return -EINVAL;
-    }
-
-    bs->total_sectors = bs->file->bs->total_sectors;
-    bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
-            (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
-    bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
-            ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
-             bs->file->bs->supported_zero_flags);
-
-    s->bcs = block_copy_state_new(bs->file, s->target, errp);
-    if (!s->bcs) {
-        error_prepend(errp, "Cannot create block-copy-state: ");
-        return -EINVAL;
-    }
-
-    copy_bitmap = block_copy_dirty_bitmap(s->bcs);
-    bdrv_set_dirty_bitmap(copy_bitmap, 0, bdrv_dirty_bitmap_size(copy_bitmap));
-
-    return 0;
-}
-
-static void cbw_close(BlockDriverState *bs)
-{
-    BDRVCopyBeforeWriteState *s = bs->opaque;
-
-    block_copy_state_free(s->bcs);
-    s->bcs = NULL;
-}
-
-BlockDriver bdrv_cbw_filter = {
-    .format_name = "copy-before-write",
-    .instance_size = sizeof(BDRVCopyBeforeWriteState),
-
-    .bdrv_open                  = cbw_open,
-    .bdrv_close                 = cbw_close,
-
-    .bdrv_co_preadv             = cbw_co_preadv,
-    .bdrv_co_pwritev            = cbw_co_pwritev,
-    .bdrv_co_pwrite_zeroes      = cbw_co_pwrite_zeroes,
-    .bdrv_co_pdiscard           = cbw_co_pdiscard,
-    .bdrv_co_flush              = cbw_co_flush,
-
-    .bdrv_refresh_filename      = cbw_refresh_filename,
-
-    .bdrv_child_perm            = cbw_child_perm,
-
-    .is_filter = true,
-};
-
-BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
-                                  BlockDriverState *target,
-                                  const char *filter_node_name,
-                                  BlockCopyState **bcs,
-                                  Error **errp)
-{
-    ERRP_GUARD();
-    BDRVCopyBeforeWriteState *state;
-    BlockDriverState *top;
-    QDict *opts;
-
-    assert(source->total_sectors == target->total_sectors);
-
-    opts = qdict_new();
-    qdict_put_str(opts, "driver", "copy-before-write");
-    if (filter_node_name) {
-        qdict_put_str(opts, "node-name", filter_node_name);
-    }
-    qdict_put_str(opts, "file", bdrv_get_node_name(source));
-    qdict_put_str(opts, "target", bdrv_get_node_name(target));
-
-    top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp);
-    if (!top) {
-        return NULL;
-    }
-
-    state = top->opaque;
-    *bcs = state->bcs;
-
-    return top;
-}
-
-void bdrv_cbw_drop(BlockDriverState *bs)
-{
-    bdrv_drop_filter(bs, &error_abort);
-    bdrv_unref(bs);
-}
-
-static void cbw_init(void)
-{
-    bdrv_register(&bdrv_cbw_filter);
-}
-
-block_init(cbw_init);
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@ -128,10 +128,10 @@ static int64_t cor_getlength(BlockDriverState *bs)


 static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
-                                           int64_t offset, int64_t bytes,
+                                           uint64_t offset, uint64_t bytes,
                                           QEMUIOVector *qiov,
                                           size_t qiov_offset,
-                                           BdrvRequestFlags flags)
+                                           int flags)
 {
    int64_t n;
    int local_flags;
@ -181,11 +181,10 @@ static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,


 static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
-                                            int64_t offset,
-                                            int64_t bytes,
+                                            uint64_t offset,
+                                            uint64_t bytes,
                                            QEMUIOVector *qiov,
-                                            size_t qiov_offset,
-                                            BdrvRequestFlags flags)
+                                            size_t qiov_offset, int flags)
 {
    return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
                                flags);
@ -193,7 +192,7 @@ static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,


 static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes,
+                                             int64_t offset, int bytes,
                                             BdrvRequestFlags flags)
 {
    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
@ -201,15 +200,15 @@ static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,


 static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int64_t bytes)
+                                        int64_t offset, int bytes)
 {
    return bdrv_co_pdiscard(bs->file, offset, bytes);
 }


 static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs,
-                                                  int64_t offset,
-                                                  int64_t bytes,
+                                                  uint64_t offset,
+                                                  uint64_t bytes,
                                                  QEMUIOVector *qiov)
 {
    return bdrv_co_pwritev(bs->file, offset, bytes, qiov,
--- a/block/coroutines.h
+++ b/block/coroutines.h
@ -27,9 +27,6 @@

 #include "block/block_int.h"

-/* For blk_bs() in generated block/block-gen.c */
-#include "sysemu/block-backend.h"
-
 int coroutine_fn bdrv_co_check(BlockDriverState *bs,
                               BdrvCheckResult *res, BdrvCheckMode fix);
 int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
@ -75,34 +72,4 @@ int coroutine_fn
 nbd_co_do_establish_connection(BlockDriverState *bs, Error **errp);


-int generated_co_wrapper
-blk_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
-              QEMUIOVector *qiov, BdrvRequestFlags flags);
-int coroutine_fn
-blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
-                 QEMUIOVector *qiov, BdrvRequestFlags flags);
-
-
-int generated_co_wrapper
-blk_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
-                    QEMUIOVector *qiov, size_t qiov_offset,
-                    BdrvRequestFlags flags);
-int coroutine_fn
-blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
-                       QEMUIOVector *qiov, size_t qiov_offset,
-                       BdrvRequestFlags flags);
-
-int generated_co_wrapper
-blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
-int coroutine_fn
-blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
-
-int generated_co_wrapper
-blk_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
-int coroutine_fn
-blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
-
-int generated_co_wrapper blk_do_flush(BlockBackend *blk);
-int coroutine_fn blk_co_do_flush(BlockBackend *blk);
-
 #endif /* BLOCK_COROUTINES_INT_H */
--- a/block/crypto.c
+++ b/block/crypto.c
@ -397,8 +397,8 @@ static int block_crypto_reopen_prepare(BDRVReopenState *state,
 #define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024)

 static coroutine_fn int
-block_crypto_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
-                       QEMUIOVector *qiov, BdrvRequestFlags flags)
+block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                       QEMUIOVector *qiov, int flags)
 {
    BlockCrypto *crypto = bs->opaque;
    uint64_t cur_bytes; /* number of bytes in current iteration */
@ -460,8 +460,8 @@ block_crypto_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,


 static coroutine_fn int
-block_crypto_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
-                        QEMUIOVector *qiov, BdrvRequestFlags flags)
+block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                        QEMUIOVector *qiov, int flags)
 {
    BlockCrypto *crypto = bs->opaque;
    uint64_t cur_bytes; /* number of bytes in current iteration */
--- a/block/curl.c
+++ b/block/curl.c
@ -125,7 +125,7 @@ static gboolean curl_drop_socket(void *key, void *value, void *opaque)
    BDRVCURLState *s = socket->s;

    aio_set_fd_handler(s->aio_context, socket->fd, false,
-                       NULL, NULL, NULL, NULL, NULL);
+                       NULL, NULL, NULL, NULL);
    return true;
 }

@ -173,20 +173,19 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
    switch (action) {
        case CURL_POLL_IN:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               curl_multi_do, NULL, NULL, NULL, socket);
+                               curl_multi_do, NULL, NULL, socket);
            break;
        case CURL_POLL_OUT:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               NULL, curl_multi_do, NULL, NULL, socket);
+                               NULL, curl_multi_do, NULL, socket);
            break;
        case CURL_POLL_INOUT:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               curl_multi_do, curl_multi_do,
-                               NULL, NULL, socket);
+                               curl_multi_do, curl_multi_do, NULL, socket);
            break;
        case CURL_POLL_REMOVE:
            aio_set_fd_handler(s->aio_context, fd, false,
-                               NULL, NULL, NULL, NULL, NULL);
+                               NULL, NULL, NULL, NULL);
            break;
    }

@ -499,8 +498,8 @@ static int curl_init_state(BDRVCURLState *s, CURLState *state)
         * Restricting protocols is only supported from 7.19.4 upwards.
         */
 #if LIBCURL_VERSION_NUM >= 0x071304
-        curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS_STR, PROTOCOLS);
-        curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS_STR, PROTOCOLS);
+        curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS);
+        curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS);
 #endif

 #ifdef DEBUG_VERBOSE
@ -769,7 +768,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
    curl_easy_setopt(state->curl, CURLOPT_HEADERDATA, s);
    if (curl_easy_perform(state->curl))
        goto out;
-    if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &d)) {
+    if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d)) {
        goto out;
    }
    /* Prior CURL 7.19.4 return value of 0 could mean that the file size is not
@ -897,8 +896,7 @@ out:
 }

 static int coroutine_fn curl_co_preadv(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, QEMUIOVector *qiov,
-        BdrvRequestFlags flags)
+        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    CURLAIOCB acb = {
        .co = qemu_coroutine_self(),
--- a/block/dmg.c
+++ b/block/dmg.c
@ -689,8 +689,8 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
 }

 static int coroutine_fn
-dmg_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
-              QEMUIOVector *qiov, BdrvRequestFlags flags)
+dmg_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+              QEMUIOVector *qiov, int flags)
 {
    BDRVDMGState *s = bs->opaque;
    uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
--- a/block/export/fuse.c
+++ b/block/export/fuse.c
@ -31,13 +31,6 @@
 #include <fuse.h>
 #include <fuse_lowlevel.h>

-#if defined(CONFIG_FALLOCATE_ZERO_RANGE)
-#include <linux/falloc.h>
-#endif
-
-#ifdef __linux__
-#include <linux/fs.h>
-#endif

 /* Prevent overly long bounce buffer allocations */
 #define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
@ -223,7 +216,7 @@ static int setup_fuse_export(FuseExport *exp, const char *mountpoint,

    aio_set_fd_handler(exp->common.ctx,
                       fuse_session_fd(exp->fuse_session), true,
-                       read_from_fuse_export, NULL, NULL, NULL, exp);
+                       read_from_fuse_export, NULL, NULL, exp);
    exp->fd_handler_set_up = true;

    return 0;
@ -267,7 +260,7 @@ static void fuse_export_shutdown(BlockExport *blk_exp)
        if (exp->fd_handler_set_up) {
            aio_set_fd_handler(exp->common.ctx,
                               fuse_session_fd(exp->fuse_session), true,
-                               NULL, NULL, NULL, NULL, NULL);
+                               NULL, NULL, NULL, NULL);
            exp->fd_handler_set_up = false;
        }
    }
@ -625,33 +618,11 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
        return;
    }

-#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
    if (mode & FALLOC_FL_KEEP_SIZE) {
        length = MIN(length, blk_len - offset);
    }
-#endif /* CONFIG_FALLOCATE_PUNCH_HOLE */

-    if (!mode) {
-        /* We can only fallocate at the EOF with a truncate */
-        if (offset < blk_len) {
-            fuse_reply_err(req, EOPNOTSUPP);
-            return;
-        }
-
-        if (offset > blk_len) {
-            /* No preallocation needed here */
-            ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
-            if (ret < 0) {
-                fuse_reply_err(req, -ret);
-                return;
-            }
-        }
-
-        ret = fuse_do_truncate(exp, offset + length, true,
-                               PREALLOC_MODE_FALLOC);
-    }
-#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
-    else if (mode & FALLOC_FL_PUNCH_HOLE) {
+    if (mode & FALLOC_FL_PUNCH_HOLE) {
        if (!(mode & FALLOC_FL_KEEP_SIZE)) {
            fuse_reply_err(req, EINVAL);
            return;
@ -665,7 +636,6 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
            length -= size;
        } while (ret == 0 && length > 0);
    }
-#endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
 #ifdef CONFIG_FALLOCATE_ZERO_RANGE
    else if (mode & FALLOC_FL_ZERO_RANGE) {
        if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
@ -688,7 +658,25 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
        } while (ret == 0 && length > 0);
    }
 #endif /* CONFIG_FALLOCATE_ZERO_RANGE */
-    else {
+    else if (!mode) {
+        /* We can only fallocate at the EOF with a truncate */
+        if (offset < blk_len) {
+            fuse_reply_err(req, EOPNOTSUPP);
+            return;
+        }
+
+        if (offset > blk_len) {
+            /* No preallocation needed here */
+            ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
+            if (ret < 0) {
+                fuse_reply_err(req, -ret);
+                return;
+            }
+        }
+
+        ret = fuse_do_truncate(exp, offset + length, true,
+                               PREALLOC_MODE_FALLOC);
+    } else {
        ret = -EOPNOTSUPP;
    }

--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@ -172,7 +172,6 @@ vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov,
    return VIRTIO_BLK_S_IOERR;
 }

-/* Called with server refcount increased, must decrease before returning */
 static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
 {
    VuBlkReq *req = opaque;
@ -287,12 +286,10 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
    }

    vu_blk_req_complete(req);
-    vhost_user_server_unref(server);
    return;

 err:
    free(req);
-    vhost_user_server_unref(server);
 }

 static void vu_blk_process_vq(VuDev *vu_dev, int idx)
@ -313,8 +310,6 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx)

        Coroutine *co =
            qemu_coroutine_create(vu_blk_virtio_process_req, req);
-
-        vhost_user_server_ref(server);
        qemu_coroutine_enter(co);
    }
 }
--- a/block/file-posix.c
+++ b/block/file-posix.c
@ -106,6 +106,10 @@
 #include <sys/diskslice.h>
 #endif

+#ifdef CONFIG_XFS
+#include <xfs/xfs.h>
+#endif
+
 /* OS X does not have O_DSYNC */
 #ifndef O_DSYNC
 #ifdef O_SYNC
@ -146,12 +150,13 @@ typedef struct BDRVRawState {
    uint64_t locked_perm;
    uint64_t locked_shared_perm;

-    uint64_t aio_max_batch;
-
    int perm_change_fd;
    int perm_change_flags;
    BDRVReopenState *reopen_state;

+#ifdef CONFIG_XFS
+    bool is_xfs:1;
+#endif
    bool has_discard:1;
    bool has_write_zeroes:1;
    bool discard_zeroes:1;
@ -160,7 +165,6 @@ typedef struct BDRVRawState {
    int page_cache_inconsistent; /* errno from fdatasync failure */
    bool has_fallocate;
    bool needs_alignment;
-    bool force_alignment;
    bool drop_cache;
    bool check_cache_dropped;
    struct {
@ -345,17 +349,6 @@ static bool dio_byte_aligned(int fd)
    return false;
 }

-static bool raw_needs_alignment(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) {
-        return true;
-    }
-
-    return s->force_alignment;
-}
-
 /* Check if read is allowed with given memory buffer and length.
 *
 * This function is used to check O_DIRECT memory buffer and request alignment.
@ -402,22 +395,14 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
    if (probe_logical_blocksize(fd, &bs->bl.request_alignment) < 0) {
        bs->bl.request_alignment = 0;
    }
-
-#ifdef __linux__
-    /*
-     * The XFS ioctl definitions are shipped in extra packages that might
-     * not always be available. Since we just need the XFS_IOC_DIOINFO ioctl
-     * here, we simply use our own definition instead:
-     */
-    struct xfs_dioattr {
-        uint32_t d_mem;
-        uint32_t d_miniosz;
-        uint32_t d_maxiosz;
-    } da;
-    if (ioctl(fd, _IOR('X', 30, struct xfs_dioattr), &da) >= 0) {
-        bs->bl.request_alignment = da.d_miniosz;
-        /* The kernel returns wrong information for d_mem */
-        /* s->buf_align = da.d_mem; */
+#ifdef CONFIG_XFS
+    if (s->is_xfs) {
+        struct dioattr da;
+        if (xfsctl(NULL, fd, XFS_IOC_DIOINFO, &da) >= 0) {
+            bs->bl.request_alignment = da.d_miniosz;
+            /* The kernel returns wrong information for d_mem */
+            /* s->buf_align = da.d_mem; */
+        }
    }
 #endif

@ -545,11 +530,6 @@ static QemuOptsList raw_runtime_opts = {
            .type = QEMU_OPT_STRING,
            .help = "host AIO implementation (threads, native, io_uring)",
        },
-        {
-            .name = "aio-max-batch",
-            .type = QEMU_OPT_NUMBER,
-            .help = "AIO max batch size (0 = auto handled by AIO backend, default: 0)",
-        },
        {
            .name = "locking",
            .type = QEMU_OPT_STRING,
@ -629,8 +609,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    s->use_linux_io_uring = (aio == BLOCKDEV_AIO_OPTIONS_IO_URING);
 #endif

-    s->aio_max_batch = qemu_opt_get_number(opts, "aio-max-batch", 0);
-
    locking = qapi_enum_parse(&OnOffAuto_lookup,
                              qemu_opt_get(opts, "locking"),
                              ON_OFF_AUTO_AUTO, &local_err);
@ -741,6 +719,9 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,

    s->has_discard = true;
    s->has_write_zeroes = true;
+    if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) {
+        s->needs_alignment = true;
+    }

    if (fstat(s->fd, &st) < 0) {
        ret = -errno;
@ -794,10 +775,15 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
         * so QEMU makes sure all IO operations on the device are aligned
         * to sector size, or else FreeBSD will reject them with EINVAL.
         */
-        s->force_alignment = true;
+        s->needs_alignment = true;
+    }
+#endif
+
+#ifdef CONFIG_XFS
+    if (platform_test_xfs_fd(s->fd)) {
+        s->is_xfs = true;
    }
 #endif
-    s->needs_alignment = raw_needs_alignment(bs);

    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
    if (S_ISREG(st.st_mode)) {
@ -1256,9 +1242,7 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
    BDRVRawState *s = bs->opaque;
    struct stat st;

-    s->needs_alignment = raw_needs_alignment(bs);
    raw_probe_alignment(bs, s->fd, errp);
-
    bs->bl.min_mem_alignment = s->buf_align;
    bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size);

@ -1721,7 +1705,7 @@ static int handle_aiocb_write_zeroes(void *opaque)
             */
            warn_report_once("Your file system is misbehaving: "
                             "fallocate(FALLOC_FL_PUNCH_HOLE) returned EINVAL. "
-                             "Please report this bug to your file system "
+                             "Please report this bug to your file sytem "
                             "vendor.");
        } else if (ret != -ENOTSUP) {
            return ret;
@ -2073,8 +2057,7 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
    } else if (s->use_linux_aio) {
        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
        assert(qiov->size == bytes);
-        return laio_co_submit(bs, aio, s->fd, offset, qiov, type,
-                              s->aio_max_batch);
+        return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
 #endif
    }

@ -2094,16 +2077,16 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
    return raw_thread_pool_submit(bs, handle_aiocb_rw, &acb);
 }

-static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
-                                      int64_t bytes, QEMUIOVector *qiov,
-                                      BdrvRequestFlags flags)
+static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
+                                      uint64_t bytes, QEMUIOVector *qiov,
+                                      int flags)
 {
    return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
 }

-static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
-                                       int64_t bytes, QEMUIOVector *qiov,
-                                       BdrvRequestFlags flags)
+static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
+                                       uint64_t bytes, QEMUIOVector *qiov,
+                                       int flags)
 {
    assert(flags == 0);
    return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
@ -2132,7 +2115,7 @@ static void raw_aio_unplug(BlockDriverState *bs)
 #ifdef CONFIG_LINUX_AIO
    if (s->use_linux_aio) {
        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
-        laio_io_unplug(bs, aio, s->aio_max_batch);
+        laio_io_unplug(bs, aio);
    }
 #endif
 #ifdef CONFIG_LINUX_IO_URING
@ -2761,8 +2744,7 @@ static int find_allocation(BlockDriverState *bs, off_t start,
 * the specified offset) that are known to be in the same
 * allocated/unallocated state.
 *
- * 'bytes' is a soft cap for 'pnum'.  If the information is free, 'pnum' may
- * well exceed it.
+ * 'bytes' is the max value 'pnum' should be set to.
 */
 static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
                                            bool want_zero,
@ -2800,7 +2782,7 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
    } else if (data == offset) {
        /* On a data extent, compute bytes to the end of the extent,
         * possibly including a partial sector at EOF. */
-        *pnum = hole - offset;
+        *pnum = MIN(bytes, hole - offset);

        /*
         * We are not allowed to return partial sectors, though, so
@ -2819,7 +2801,7 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
    } else {
        /* On a hole, compute bytes to the beginning of the next extent.  */
        assert(hole == offset);
-        *pnum = data - offset;
+        *pnum = MIN(bytes, data - offset);
        ret = BDRV_BLOCK_ZERO;
    }
    *map = offset;
@ -2959,8 +2941,7 @@ static void raw_account_discard(BDRVRawState *s, uint64_t nbytes, int ret)
 }

 static coroutine_fn int
-raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes,
-                bool blkdev)
+raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int bytes, bool blkdev)
 {
    BDRVRawState *s = bs->opaque;
    RawPosixAIOData acb;
@ -2984,13 +2965,13 @@ raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes,
 }

 static coroutine_fn int
-raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
+raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
 {
    return raw_do_pdiscard(bs, offset, bytes, false);
 }

 static int coroutine_fn
-raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
                     BdrvRequestFlags flags, bool blkdev)
 {
    BDRVRawState *s = bs->opaque;
@ -3058,7 +3039,7 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,

 static int coroutine_fn raw_co_pwrite_zeroes(
    BlockDriverState *bs, int64_t offset,
-    int64_t bytes, BdrvRequestFlags flags)
+    int bytes, BdrvRequestFlags flags)
 {
    return raw_do_pwrite_zeroes(bs, offset, bytes, flags, false);
 }
@ -3221,8 +3202,8 @@ static void raw_abort_perm_update(BlockDriverState *bs)
 }

 static int coroutine_fn raw_co_copy_range_from(
-        BlockDriverState *bs, BdrvChild *src, int64_t src_offset,
-        BdrvChild *dst, int64_t dst_offset, int64_t bytes,
+        BlockDriverState *bs, BdrvChild *src, uint64_t src_offset,
+        BdrvChild *dst, uint64_t dst_offset, uint64_t bytes,
        BdrvRequestFlags read_flags, BdrvRequestFlags write_flags)
 {
    return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
@ -3231,10 +3212,10 @@ static int coroutine_fn raw_co_copy_range_from(

 static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
                                             BdrvChild *src,
-                                             int64_t src_offset,
+                                             uint64_t src_offset,
                                             BdrvChild *dst,
-                                             int64_t dst_offset,
-                                             int64_t bytes,
+                                             uint64_t dst_offset,
+                                             uint64_t bytes,
                                             BdrvRequestFlags read_flags,
                                             BdrvRequestFlags write_flags)
 {
@ -3609,7 +3590,7 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 #endif /* linux */

 static coroutine_fn int
-hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
+hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
 {
    BDRVRawState *s = bs->opaque;
    int ret;
@ -3623,7 +3604,7 @@ hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 }

 static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+    int64_t offset, int bytes, BdrvRequestFlags flags)
 {
    int rc;

--- a/block/file-win32.c
+++ b/block/file-win32.c
@ -58,10 +58,6 @@ typedef struct BDRVRawState {
    QEMUWin32AIOState *aio;
 } BDRVRawState;

-typedef struct BDRVRawReopenState {
-    HANDLE hfile;
-} BDRVRawReopenState;
-
 /*
 * Read/writes the data to/from a given linear buffer.
 *
@ -396,7 +392,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
    }

    s->hfile = CreateFile(filename, access_flags,
-                          FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
+                          FILE_SHARE_READ, NULL,
                          OPEN_EXISTING, overlapped, NULL);
    if (s->hfile == INVALID_HANDLE_VALUE) {
        int err = GetLastError();
@ -440,8 +436,8 @@ fail:
 }

 static BlockAIOCB *raw_aio_preadv(BlockDriverState *bs,
-                                  int64_t offset, int64_t bytes,
-                                  QEMUIOVector *qiov, BdrvRequestFlags flags,
+                                  uint64_t offset, uint64_t bytes,
+                                  QEMUIOVector *qiov, int flags,
                                  BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
@ -455,8 +451,8 @@ static BlockAIOCB *raw_aio_preadv(BlockDriverState *bs,
 }

 static BlockAIOCB *raw_aio_pwritev(BlockDriverState *bs,
-                                   int64_t offset, int64_t bytes,
-                                   QEMUIOVector *qiov, BdrvRequestFlags flags,
+                                   uint64_t offset, uint64_t bytes,
+                                   QEMUIOVector *qiov, int flags,
                                   BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
@ -638,97 +634,6 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
    return raw_co_create(&options, errp);
 }

-static int raw_reopen_prepare(BDRVReopenState *state,
-                              BlockReopenQueue *queue, Error **errp)
-{
-    BDRVRawState *s = state->bs->opaque;
-    BDRVRawReopenState *rs;
-    int access_flags;
-    DWORD overlapped;
-    int ret = 0;
-
-    if (s->type != FTYPE_FILE) {
-        error_setg(errp, "Can only reopen files");
-        return -EINVAL;
-    }
-
-    rs = g_new0(BDRVRawReopenState, 1);
-
-    /*
-     * We do not support changing any options (only flags). By leaving
-     * all options in state->options, we tell the generic reopen code
-     * that we do not support changing any of them, so it will verify
-     * that their values did not change.
-     */
-
-    raw_parse_flags(state->flags, s->aio != NULL, &access_flags, &overlapped);
-    rs->hfile = CreateFile(state->bs->filename, access_flags,
-                           FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
-                           OPEN_EXISTING, overlapped, NULL);
-
-    if (rs->hfile == INVALID_HANDLE_VALUE) {
-        int err = GetLastError();
-
-        error_setg_win32(errp, err, "Could not reopen '%s'",
-                         state->bs->filename);
-        if (err == ERROR_ACCESS_DENIED) {
-            ret = -EACCES;
-        } else {
-            ret = -EINVAL;
-        }
-        goto fail;
-    }
-
-    if (s->aio) {
-        ret = win32_aio_attach(s->aio, rs->hfile);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not enable AIO");
-            CloseHandle(rs->hfile);
-            goto fail;
-        }
-    }
-
-    state->opaque = rs;
-
-    return 0;
-
-fail:
-    g_free(rs);
-    state->opaque = NULL;
-
-    return ret;
-}
-
-static void raw_reopen_commit(BDRVReopenState *state)
-{
-    BDRVRawState *s = state->bs->opaque;
-    BDRVRawReopenState *rs = state->opaque;
-
-    assert(rs != NULL);
-
-    CloseHandle(s->hfile);
-    s->hfile = rs->hfile;
-
-    g_free(rs);
-    state->opaque = NULL;
-}
-
-static void raw_reopen_abort(BDRVReopenState *state)
-{
-    BDRVRawReopenState *rs = state->opaque;
-
-    if (!rs) {
-        return;
-    }
-
-    if (rs->hfile != INVALID_HANDLE_VALUE) {
-        CloseHandle(rs->hfile);
-    }
-
-    g_free(rs);
-    state->opaque = NULL;
-}
-
 static QemuOptsList raw_create_opts = {
    .name = "raw-create-opts",
    .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
@ -754,10 +659,6 @@ BlockDriver bdrv_file = {
    .bdrv_co_create_opts = raw_co_create_opts,
    .bdrv_has_zero_init = bdrv_has_zero_init_1,

-    .bdrv_reopen_prepare = raw_reopen_prepare,
-    .bdrv_reopen_commit  = raw_reopen_commit,
-    .bdrv_reopen_abort   = raw_reopen_abort,
-
    .bdrv_aio_preadv    = raw_aio_preadv,
    .bdrv_aio_pwritev   = raw_aio_pwritev,
    .bdrv_aio_flush     = raw_aio_flush,
--- a/block/filter-compress.c
+++ b/block/filter-compress.c
@ -63,10 +63,10 @@ static int64_t compress_getlength(BlockDriverState *bs)


 static int coroutine_fn compress_co_preadv_part(BlockDriverState *bs,
-                                                int64_t offset, int64_t bytes,
+                                                uint64_t offset, uint64_t bytes,
                                                QEMUIOVector *qiov,
                                                size_t qiov_offset,
-                                                BdrvRequestFlags flags)
+                                                int flags)
 {
    return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
                               flags);
@ -74,11 +74,10 @@ static int coroutine_fn compress_co_preadv_part(BlockDriverState *bs,


 static int coroutine_fn compress_co_pwritev_part(BlockDriverState *bs,
-                                                 int64_t offset,
-                                                 int64_t bytes,
+                                                 uint64_t offset,
+                                                 uint64_t bytes,
                                                 QEMUIOVector *qiov,
-                                                 size_t qiov_offset,
-                                                 BdrvRequestFlags flags)
+                                                 size_t qiov_offset, int flags)
 {
    return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
                                flags | BDRV_REQ_WRITE_COMPRESSED);
@ -86,7 +85,7 @@ static int coroutine_fn compress_co_pwritev_part(BlockDriverState *bs,


 static int coroutine_fn compress_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int64_t bytes,
+                                                  int64_t offset, int bytes,
                                                  BdrvRequestFlags flags)
 {
    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
@ -94,7 +93,7 @@ static int coroutine_fn compress_co_pwrite_zeroes(BlockDriverState *bs,


 static int coroutine_fn compress_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes)
+                                             int64_t offset, int bytes)
 {
    return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
--- a/block/gluster.c
+++ b/block/gluster.c
@ -891,7 +891,6 @@ out:
 static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp)
 {
    bs->bl.max_transfer = GLUSTER_MAX_TRANSFER;
-    bs->bl.max_pdiscard = SIZE_MAX;
 }

 static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
@ -1004,19 +1003,19 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state)
 #ifdef CONFIG_GLUSTERFS_ZEROFILL
 static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
                                                      int64_t offset,
-                                                      int64_t bytes,
+                                                      int size,
                                                      BdrvRequestFlags flags)
 {
    int ret;
    GlusterAIOCB acb;
    BDRVGlusterState *s = bs->opaque;

-    acb.size = bytes;
+    acb.size = size;
    acb.ret = 0;
    acb.coroutine = qemu_coroutine_self();
    acb.aio_context = bdrv_get_aio_context(bs);

-    ret = glfs_zerofill_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
+    ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
    if (ret < 0) {
        return -errno;
    }
@ -1298,20 +1297,18 @@ error:

 #ifdef CONFIG_GLUSTERFS_DISCARD
 static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
-                                                 int64_t offset, int64_t bytes)
+                                                 int64_t offset, int size)
 {
    int ret;
    GlusterAIOCB acb;
    BDRVGlusterState *s = bs->opaque;

-    assert(bytes <= SIZE_MAX); /* rely on max_pdiscard */
-
    acb.size = 0;
    acb.ret = 0;
    acb.coroutine = qemu_coroutine_self();
    acb.aio_context = bdrv_get_aio_context(bs);

-    ret = glfs_discard_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
+    ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
    if (ret < 0) {
        return -errno;
    }
@ -1464,8 +1461,7 @@ exit:
 * the specified offset) that are known to be in the same
 * allocated/unallocated state.
 *
- * 'bytes' is a soft cap for 'pnum'.  If the information is free, 'pnum' may
- * well exceed it.
+ * 'bytes' is the max value 'pnum' should be set to.
 *
 * (Based on raw_co_block_status() from file-posix.c.)
 */
@ -1481,8 +1477,6 @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
    off_t data = 0, hole = 0;
    int ret = -EINVAL;

-    assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
-
    if (!s->fd) {
        return ret;
    }
@ -1506,26 +1500,12 @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
    } else if (data == offset) {
        /* On a data extent, compute bytes to the end of the extent,
         * possibly including a partial sector at EOF. */
-        *pnum = hole - offset;
-
-        /*
-         * We are not allowed to return partial sectors, though, so
-         * round up if necessary.
-         */
-        if (!QEMU_IS_ALIGNED(*pnum, bs->bl.request_alignment)) {
-            int64_t file_length = qemu_gluster_getlength(bs);
-            if (file_length > 0) {
-                /* Ignore errors, this is just a safeguard */
-                assert(hole == file_length);
-            }
-            *pnum = ROUND_UP(*pnum, bs->bl.request_alignment);
-        }
-
+        *pnum = MIN(bytes, hole - offset);
        ret = BDRV_BLOCK_DATA;
    } else {
        /* On a hole, compute bytes to the beginning of the next extent.  */
        assert(hole == offset);
-        *pnum = data - offset;
+        *pnum = MIN(bytes, data - offset);
        ret = BDRV_BLOCK_ZERO;
    }

--- a/block/io.c
+++ b/block/io.c
@ -957,9 +957,9 @@ bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
    return waited;
 }

-int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
-                            QEMUIOVector *qiov, size_t qiov_offset,
-                            Error **errp)
+static int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
+                                   QEMUIOVector *qiov, size_t qiov_offset,
+                                   Error **errp)
 {
    /*
     * Check generic offset/bytes correctness
@ -1231,8 +1231,7 @@ out:
 static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
                                            int64_t offset, int64_t bytes,
                                            QEMUIOVector *qiov,
-                                            size_t qiov_offset,
-                                            BdrvRequestFlags flags)
+                                            size_t qiov_offset, int flags)
 {
    BlockDriver *drv = bs->drv;
    int64_t sector_num;
@ -1870,8 +1869,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
    int head = 0;
    int tail = 0;

-    int64_t max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes,
-                                            INT64_MAX);
+    int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
    int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
                        bs->bl.request_alignment);
    int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
@ -1886,9 +1884,6 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
        return -ENOTSUP;
    }

-    /* Invalidate the cached block-status data range if this write overlaps */
-    bdrv_bsc_invalidate_range(bs, offset, bytes);
-
    assert(alignment % bs->bl.request_alignment == 0);
    head = offset % alignment;
    tail = (offset + bytes) % alignment;
@ -2076,8 +2071,7 @@ bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, int64_t bytes,
 */
 static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
    BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
-    int64_t align, QEMUIOVector *qiov, size_t qiov_offset,
-    BdrvRequestFlags flags)
+    int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
 {
    BlockDriverState *bs = child->bs;
    BlockDriver *drv = bs->drv;
@ -2250,11 +2244,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
        return -ENOMEDIUM;
    }

-    if (flags & BDRV_REQ_ZERO_WRITE) {
-        ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
-    } else {
-        ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
-    }
+    ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
    if (ret < 0) {
        return ret;
    }
@ -2458,69 +2448,9 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
    aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;

    if (bs->drv->bdrv_co_block_status) {
-        /*
-         * Use the block-status cache only for protocol nodes: Format
-         * drivers are generally quick to inquire the status, but protocol
-         * drivers often need to get information from outside of qemu, so
-         * we do not have control over the actual implementation.  There
-         * have been cases where inquiring the status took an unreasonably
-         * long time, and we can do nothing in qemu to fix it.
-         * This is especially problematic for images with large data areas,
-         * because finding the few holes in them and giving them special
-         * treatment does not gain much performance.  Therefore, we try to
-         * cache the last-identified data region.
-         *
-         * Second, limiting ourselves to protocol nodes allows us to assume
-         * the block status for data regions to be DATA | OFFSET_VALID, and
-         * that the host offset is the same as the guest offset.
-         *
-         * Note that it is possible that external writers zero parts of
-         * the cached regions without the cache being invalidated, and so
-         * we may report zeroes as data.  This is not catastrophic,
-         * however, because reporting zeroes as data is fine.
-         */
-        if (QLIST_EMPTY(&bs->children) &&
-            bdrv_bsc_is_data(bs, aligned_offset, pnum))
-        {
-            ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
-            local_file = bs;
-            local_map = aligned_offset;
-        } else {
-            ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
-                                                aligned_bytes, pnum, &local_map,
-                                                &local_file);
-
-            /*
-             * Note that checking QLIST_EMPTY(&bs->children) is also done when
-             * the cache is queried above.  Technically, we do not need to check
-             * it here; the worst that can happen is that we fill the cache for
-             * non-protocol nodes, and then it is never used.  However, filling
-             * the cache requires an RCU update, so double check here to avoid
-             * such an update if possible.
-             *
-             * Check want_zero, because we only want to update the cache when we
-             * have accurate information about what is zero and what is data.
-             */
-            if (want_zero &&
-                ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
-                QLIST_EMPTY(&bs->children))
-            {
-                /*
-                 * When a protocol driver reports BLOCK_OFFSET_VALID, the
-                 * returned local_map value must be the same as the offset we
-                 * have passed (aligned_offset), and local_bs must be the node
-                 * itself.
-                 * Assert this, because we follow this rule when reading from
-                 * the cache (see the `local_file = bs` and
-                 * `local_map = aligned_offset` assignments above), and the
-                 * result the cache delivers must be the same as the driver
-                 * would deliver.
-                 */
-                assert(local_file == bs);
-                assert(local_map == aligned_offset);
-                bdrv_bsc_fill(bs, aligned_offset, *pnum);
-            }
-        }
+        ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
+                                            aligned_bytes, pnum, &local_map,
+                                            &local_file);
    } else {
        /* Default code for filters */

@ -2822,12 +2752,7 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 {
    BlockDriver *drv = bs->drv;
    BlockDriverState *child_bs = bdrv_primary_bs(bs);
-    int ret;
-
-    ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
-    if (ret < 0) {
-        return ret;
-    }
+    int ret = -ENOTSUP;

    if (!drv) {
        return -ENOMEDIUM;
@ -2839,8 +2764,6 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
        ret = drv->bdrv_load_vmstate(bs, qiov, pos);
    } else if (child_bs) {
        ret = bdrv_co_readv_vmstate(child_bs, qiov, pos);
-    } else {
-        ret = -ENOTSUP;
    }

    bdrv_dec_in_flight(bs);
@ -2853,12 +2776,7 @@ bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 {
    BlockDriver *drv = bs->drv;
    BlockDriverState *child_bs = bdrv_primary_bs(bs);
-    int ret;
-
-    ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
-    if (ret < 0) {
-        return ret;
-    }
+    int ret = -ENOTSUP;

    if (!drv) {
        return -ENOMEDIUM;
@ -2870,8 +2788,6 @@ bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
        ret = drv->bdrv_save_vmstate(bs, qiov, pos);
    } else if (child_bs) {
        ret = bdrv_co_writev_vmstate(child_bs, qiov, pos);
-    } else {
-        ret = -ENOTSUP;
    }

    bdrv_dec_in_flight(bs);
@ -3061,8 +2977,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
                                  int64_t bytes)
 {
    BdrvTrackedRequest req;
-    int ret;
-    int64_t max_pdiscard;
+    int max_pdiscard, ret;
    int head, tail, align;
    BlockDriverState *bs = child->bs;

@ -3088,9 +3003,6 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
        return 0;
    }

-    /* Invalidate the cached block-status data range if this discard overlaps */
-    bdrv_bsc_invalidate_range(bs, offset, bytes);
-
    /* Discard is advisory, but some devices track and coalesce
     * unaligned requests, so we must pass everything down rather than
     * round here.  Still, most devices will just silently ignore
@ -3109,7 +3021,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
        goto out;
    }

-    max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT64_MAX),
+    max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
                                   align);
    assert(max_pdiscard >= bs->bl.request_alignment);

--- a/block/io_uring.c
+++ b/block/io_uring.c
@ -292,14 +292,12 @@ static bool qemu_luring_poll_cb(void *opaque)
 {
    LuringState *s = opaque;

-    return io_uring_cq_ready(&s->ring);
-}
+    if (io_uring_cq_ready(&s->ring)) {
+        luring_process_completions_and_submit(s);
+        return true;
+    }

-static void qemu_luring_poll_ready(void *opaque)
-{
-    LuringState *s = opaque;
-
-    luring_process_completions_and_submit(s);
+    return false;
 }

 static void ioq_init(LuringQueue *io_q)
@ -404,8 +402,8 @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd,

 void luring_detach_aio_context(LuringState *s, AioContext *old_context)
 {
-    aio_set_fd_handler(old_context, s->ring.ring_fd, false,
-                       NULL, NULL, NULL, NULL, s);
+    aio_set_fd_handler(old_context, s->ring.ring_fd, false, NULL, NULL, NULL,
+                       s);
    qemu_bh_delete(s->completion_bh);
    s->aio_context = NULL;
 }
@ -415,8 +413,7 @@ void luring_attach_aio_context(LuringState *s, AioContext *new_context)
    s->aio_context = new_context;
    s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s);
    aio_set_fd_handler(s->aio_context, s->ring.ring_fd, false,
-                       qemu_luring_completion_cb, NULL,
-                       qemu_luring_poll_cb, qemu_luring_poll_ready, s);
+                       qemu_luring_completion_cb, NULL, qemu_luring_poll_cb, s);
 }

 LuringState *luring_init(Error **errp)
--- a/block/iscsi.c
+++ b/block/iscsi.c
@ -363,7 +363,7 @@ iscsi_set_events(IscsiLun *iscsilun)
                           false,
                           (ev & POLLIN) ? iscsi_process_read : NULL,
                           (ev & POLLOUT) ? iscsi_process_write : NULL,
-                           NULL, NULL,
+                           NULL,
                           iscsilun);
        iscsilun->events = ev;
    }
@ -427,14 +427,14 @@ static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
    return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
 }

-static bool is_byte_request_lun_aligned(int64_t offset, int64_t bytes,
+static bool is_byte_request_lun_aligned(int64_t offset, int count,
                                        IscsiLun *iscsilun)
 {
-    if (offset % iscsilun->block_size || bytes % iscsilun->block_size) {
+    if (offset % iscsilun->block_size || count % iscsilun->block_size) {
        error_report("iSCSI misaligned request: "
                     "iscsilun->block_size %u, offset %" PRIi64
-                     ", bytes %" PRIi64,
-                     iscsilun->block_size, offset, bytes);
+                     ", count %d",
+                     iscsilun->block_size, offset, count);
        return false;
    }
    return true;
@ -781,6 +781,9 @@ retry:
        iscsi_allocmap_set_allocated(iscsilun, offset, *pnum);
    }

+    if (*pnum > bytes) {
+        *pnum = bytes;
+    }
 out_unlock:
    qemu_mutex_unlock(&iscsilun->mutex);
    g_free(iTask.err_str);
@ -1138,8 +1141,7 @@ iscsi_getlength(BlockDriverState *bs)
 }

 static int
-coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset,
-                               int64_t bytes)
+coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
 {
    IscsiLun *iscsilun = bs->opaque;
    struct IscsiTask iTask;
@ -1155,12 +1157,6 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset,
        return 0;
    }

-    /*
-     * We don't want to overflow list.num which is uint32_t.
-     * We rely on our max_pdiscard.
-     */
-    assert(bytes / iscsilun->block_size <= UINT32_MAX);
-
    list.lba = offset / iscsilun->block_size;
    list.num = bytes / iscsilun->block_size;

@ -1209,12 +1205,12 @@ out_unlock:

 static int
 coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-                                    int64_t bytes, BdrvRequestFlags flags)
+                                    int bytes, BdrvRequestFlags flags)
 {
    IscsiLun *iscsilun = bs->opaque;
    struct IscsiTask iTask;
    uint64_t lba;
-    uint64_t nb_blocks;
+    uint32_t nb_blocks;
    bool use_16_for_ws = iscsilun->use_16_for_rw;
    int r = 0;

@ -1254,21 +1250,11 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
    iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
    if (use_16_for_ws) {
-        /*
-         * iscsi_writesame16_task num_blocks argument is uint32_t. We rely here
-         * on our max_pwrite_zeroes limit.
-         */
-        assert(nb_blocks <= UINT32_MAX);
        iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
                                            iscsilun->zeroblock, iscsilun->block_size,
                                            nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
                                            0, 0, iscsi_co_generic_cb, &iTask);
    } else {
-        /*
-         * iscsi_writesame10_task num_blocks argument is uint16_t. We rely here
-         * on our max_pwrite_zeroes limit.
-         */
-        assert(nb_blocks <= UINT16_MAX);
        iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
                                            iscsilun->zeroblock, iscsilun->block_size,
                                            nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
@ -1534,7 +1520,7 @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
    IscsiLun *iscsilun = bs->opaque;

    aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
-                       false, NULL, NULL, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL, NULL);
    iscsilun->events = 0;

    if (iscsilun->nop_timer) {
@ -2078,19 +2064,20 @@ static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
    }

    if (iscsilun->lbp.lbpu) {
-        bs->bl.max_pdiscard =
-            MIN_NON_ZERO(iscsilun->bl.max_unmap * iscsilun->block_size,
-                         (uint64_t)UINT32_MAX * iscsilun->block_size);
+        if (iscsilun->bl.max_unmap < 0xffffffff / block_size) {
+            bs->bl.max_pdiscard =
+                iscsilun->bl.max_unmap * iscsilun->block_size;
+        }
        bs->bl.pdiscard_alignment =
            iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
    } else {
        bs->bl.pdiscard_alignment = iscsilun->block_size;
    }

-    bs->bl.max_pwrite_zeroes =
-        MIN_NON_ZERO(iscsilun->bl.max_ws_len * iscsilun->block_size,
-                     max_xfer_len * iscsilun->block_size);
-
+    if (iscsilun->bl.max_ws_len < 0xffffffff / block_size) {
+        bs->bl.max_pwrite_zeroes =
+            iscsilun->bl.max_ws_len * iscsilun->block_size;
+    }
    if (iscsilun->lbp.lbpws) {
        bs->bl.pwrite_zeroes_alignment =
            iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
@ -2185,10 +2172,10 @@ static void coroutine_fn iscsi_co_invalidate_cache(BlockDriverState *bs,

 static int coroutine_fn iscsi_co_copy_range_from(BlockDriverState *bs,
                                                 BdrvChild *src,
-                                                 int64_t src_offset,
+                                                 uint64_t src_offset,
                                                 BdrvChild *dst,
-                                                 int64_t dst_offset,
-                                                 int64_t bytes,
+                                                 uint64_t dst_offset,
+                                                 uint64_t bytes,
                                                 BdrvRequestFlags read_flags,
                                                 BdrvRequestFlags write_flags)
 {
@ -2326,10 +2313,10 @@ static void iscsi_xcopy_data(struct iscsi_data *data,

 static int coroutine_fn iscsi_co_copy_range_to(BlockDriverState *bs,
                                               BdrvChild *src,
-                                               int64_t src_offset,
+                                               uint64_t src_offset,
                                               BdrvChild *dst,
-                                               int64_t dst_offset,
-                                               int64_t bytes,
+                                               uint64_t dst_offset,
+                                               uint64_t bytes,
                                               BdrvRequestFlags read_flags,
                                               BdrvRequestFlags write_flags)
 {
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@ -263,15 +263,12 @@ static bool qemu_laio_poll_cb(void *opaque)
    LinuxAioState *s = container_of(e, LinuxAioState, e);
    struct io_event *events;

-    return io_getevents_peek(s->ctx, &events);
-}
-
-static void qemu_laio_poll_ready(EventNotifier *opaque)
-{
-    EventNotifier *e = opaque;
-    LinuxAioState *s = container_of(e, LinuxAioState, e);
+    if (!io_getevents_peek(s->ctx, &events)) {
+        return false;
+    }

    qemu_laio_process_completions_and_submit(s);
+    return true;
 }

 static void ioq_init(LaioQueue *io_q)
@ -337,45 +334,30 @@ static void ioq_submit(LinuxAioState *s)
    }
 }

-static uint64_t laio_max_batch(LinuxAioState *s, uint64_t dev_max_batch)
-{
-    uint64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH;
-
-    /*
-     * AIO context can be shared between multiple block devices, so
-     * `dev_max_batch` allows reducing the batch size for latency-sensitive
-     * devices.
-     */
-    max_batch = MIN_NON_ZERO(dev_max_batch, max_batch);
-
-    /* limit the batch with the number of available events */
-    max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch);
-
-    return max_batch;
-}
-
 void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
 {
    s->io_q.plugged++;
 }

-void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
-                    uint64_t dev_max_batch)
+void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
 {
    assert(s->io_q.plugged);
-    if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
-        (--s->io_q.plugged == 0 &&
-         !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) {
+    if (--s->io_q.plugged == 0 &&
+        !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
        ioq_submit(s);
    }
 }

 static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
-                          int type, uint64_t dev_max_batch)
+                          int type)
 {
    LinuxAioState *s = laiocb->ctx;
    struct iocb *iocbs = &laiocb->iocb;
    QEMUIOVector *qiov = laiocb->qiov;
+    int64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH;
+
+    /* limit the batch with the number of available events */
+    max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch);

    switch (type) {
    case QEMU_AIO_WRITE:
@ -396,7 +378,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
    s->io_q.in_queue++;
    if (!s->io_q.blocked &&
        (!s->io_q.plugged ||
-         s->io_q.in_queue >= laio_max_batch(s, dev_max_batch))) {
+         s->io_q.in_queue >= max_batch)) {
        ioq_submit(s);
    }

@ -404,8 +386,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
 }

 int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
-                                uint64_t offset, QEMUIOVector *qiov, int type,
-                                uint64_t dev_max_batch)
+                                uint64_t offset, QEMUIOVector *qiov, int type)
 {
    int ret;
    struct qemu_laiocb laiocb = {
@ -417,7 +398,7 @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
        .qiov       = qiov,
    };

-    ret = laio_do_submit(fd, &laiocb, offset, type, dev_max_batch);
+    ret = laio_do_submit(fd, &laiocb, offset, type);
    if (ret < 0) {
        return ret;
    }
@ -430,7 +411,7 @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,

 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
 {
-    aio_set_event_notifier(old_context, &s->e, false, NULL, NULL, NULL);
+    aio_set_event_notifier(old_context, &s->e, false, NULL, NULL);
    qemu_bh_delete(s->completion_bh);
    s->aio_context = NULL;
 }
@ -441,8 +422,7 @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
    s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
    aio_set_event_notifier(new_context, &s->e, false,
                           qemu_laio_completion_cb,
-                           qemu_laio_poll_cb,
-                           qemu_laio_poll_ready);
+                           qemu_laio_poll_cb);
 }

 LinuxAioState *laio_init(Error **errp)
--- a/block/meson.build
+++ b/block/meson.build
@ -4,7 +4,7 @@ block_ss.add(files(
  'aio_task.c',
  'amend.c',
  'backup.c',
-  'copy-before-write.c',
+  'backup-top.c',
  'blkdebug.c',
  'blklogwrites.c',
  'blkverify.c',
@ -65,7 +65,7 @@ block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit
 block_ss.add(when: libiscsi, if_true: files('iscsi-opts.c'))
 block_ss.add(when: 'CONFIG_LINUX', if_true: files('nvme.c'))
 block_ss.add(when: 'CONFIG_REPLICATION', if_true: files('replication.c'))
-block_ss.add(when: libaio, if_true: files('linux-aio.c'))
+block_ss.add(when: ['CONFIG_LINUX_AIO', libaio], if_true: files('linux-aio.c'))
 block_ss.add(when: linux_io_uring, if_true: files('io_uring.c'))

 block_modules = {}
--- a/block/mirror.c
+++ b/block/mirror.c
@ -56,6 +56,7 @@ typedef struct MirrorBlockJob {
    bool zero_target;
    MirrorCopyMode copy_mode;
    BlockdevOnError on_source_error, on_target_error;
+    bool synced;
    /* Set when the target is synced (dirty bitmap is clean, nothing
     * in flight) and the job is running in active mode */
    bool actively_synced;
@ -120,6 +121,7 @@ typedef enum MirrorMethod {
 static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
                                            int error)
 {
+    s->synced = false;
    s->actively_synced = false;
    if (read) {
        return block_job_error_action(&s->common, s->on_source_error,
@ -158,25 +160,18 @@ static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self,
            if (ranges_overlap(self_start_chunk, self_nb_chunks,
                               op_start_chunk, op_nb_chunks))
            {
-                if (self) {
-                    /*
-                     * If the operation is already (indirectly) waiting for us,
-                     * or will wait for us as soon as it wakes up, then just go
-                     * on (instead of producing a deadlock in the former case).
-                     */
-                    if (op->waiting_for_op) {
-                        continue;
-                    }
-
-                    self->waiting_for_op = op;
+                /*
+                 * If the operation is already (indirectly) waiting for us, or
+                 * will wait for us as soon as it wakes up, then just go on
+                 * (instead of producing a deadlock in the former case).
+                 */
+                if (op->waiting_for_op) {
+                    continue;
                }

+                self->waiting_for_op = op;
                qemu_co_queue_wait(&op->waiting_requests, NULL);
-
-                if (self) {
-                    self->waiting_for_op = NULL;
-                }
-
+                self->waiting_for_op = NULL;
                break;
            }
        }
@ -771,6 +766,13 @@ static int mirror_exit_common(Job *job)
    block_job_remove_all_bdrv(bjob);
    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);

+    /* We just changed the BDS the job BB refers to (with either or both of the
+     * bdrv_replace_node() calls), so switch the BB back so the cleanup does
+     * the right thing. We don't need any permissions any more now. */
+    blk_remove_bs(bjob->blk);
+    blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
+    blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
+
    bs_opaque->job = NULL;

    bdrv_drained_end(src);
@ -935,10 +937,12 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
    if (s->bdev_length == 0) {
        /* Transition to the READY state and wait for complete. */
        job_transition_to_ready(&s->common.job);
+        s->synced = true;
        s->actively_synced = true;
-        while (!job_cancel_requested(&s->common.job) && !s->should_complete) {
+        while (!job_is_cancelled(&s->common.job) && !s->should_complete) {
            job_yield(&s->common.job);
        }
+        s->common.job.cancelled = false;
        goto immediate_exit;
    }

@ -999,11 +1003,6 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)

        job_pause_point(&s->common.job);

-        if (job_is_cancelled(&s->common.job)) {
-            ret = 0;
-            goto immediate_exit;
-        }
-
        cnt = bdrv_get_dirty_count(s->dirty_bitmap);
        /* cnt is the number of dirty bytes remaining and s->bytes_in_flight is
         * the number of bytes currently being processed; together those are
@ -1030,7 +1029,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
        should_complete = false;
        if (s->in_flight == 0 && cnt == 0) {
            trace_mirror_before_flush(s);
-            if (!job_is_ready(&s->common.job)) {
+            if (!s->synced) {
                if (mirror_flush(s) < 0) {
                    /* Go check s->ret.  */
                    continue;
@ -1041,13 +1040,14 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
                 * the target in a consistent state.
                 */
                job_transition_to_ready(&s->common.job);
+                s->synced = true;
                if (s->copy_mode != MIRROR_COPY_MODE_BACKGROUND) {
                    s->actively_synced = true;
                }
            }

            should_complete = s->should_complete ||
-                job_cancel_requested(&s->common.job);
+                job_is_cancelled(&s->common.job);
            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
        }

@ -1077,17 +1077,24 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
             * completion.
             */
            assert(QLIST_EMPTY(&bs->tracked_requests));
+            s->common.job.cancelled = false;
            need_drain = false;
            break;
        }

-        if (job_is_ready(&s->common.job) && !should_complete) {
+        ret = 0;
+
+        if (s->synced && !should_complete) {
            delay_ns = (s->in_flight == 0 &&
                        cnt == 0 ? BLOCK_JOB_SLICE_TIME : 0);
        }
-        trace_mirror_before_sleep(s, cnt, job_is_ready(&s->common.job),
-                                  delay_ns);
+        trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
        job_sleep_ns(&s->common.job, delay_ns);
+        if (job_is_cancelled(&s->common.job) &&
+            (!s->synced || s->common.job.force_cancel))
+        {
+            break;
+        }
        s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
    }

@ -1097,7 +1104,8 @@ immediate_exit:
         * or it was cancelled prematurely so that we do not guarantee that
         * the target is a copy of the source.
         */
-        assert(ret < 0 || job_is_cancelled(&s->common.job));
+        assert(ret < 0 || ((s->common.job.force_cancel || !s->synced) &&
+               job_is_cancelled(&s->common.job)));
        assert(need_drain);
        mirror_wait_for_all_io(s);
    }
@ -1120,7 +1128,7 @@ static void mirror_complete(Job *job, Error **errp)
 {
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);

-    if (!job_is_ready(job)) {
+    if (!s->synced) {
        error_setg(errp, "The active block job '%s' cannot be completed",
                   job->id);
        return;
@ -1139,7 +1147,10 @@ static void mirror_complete(Job *job, Error **errp)
        replace_aio_context = bdrv_get_aio_context(s->to_replace);
        aio_context_acquire(replace_aio_context);

-        /* TODO Translate this into child freeze system. */
+        /* TODO Translate this into permission system. Current definition of
+         * GRAPH_MOD would require to request it for the parents; they might
+         * not even be BlockDriverStates, however, so a BdrvChild can't address
+         * them. May need redefinition of GRAPH_MOD. */
        error_setg(&s->replace_blocker,
                   "block device is in use by block-job-complete");
        bdrv_op_block_all(s->to_replace, s->replace_blocker);
@ -1172,34 +1183,21 @@ static bool mirror_drained_poll(BlockJob *job)
     * from one of our own drain sections, to avoid a deadlock waiting for
     * ourselves.
     */
-    if (!s->common.job.paused && !job_is_cancelled(&job->job) && !s->in_drain) {
+    if (!s->common.job.paused && !s->common.job.cancelled && !s->in_drain) {
        return true;
    }

    return !!s->in_flight;
 }

-static bool mirror_cancel(Job *job, bool force)
+static void mirror_cancel(Job *job, bool force)
 {
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
    BlockDriverState *target = blk_bs(s->target);

-    /*
-     * Before the job is READY, we treat any cancellation like a
-     * force-cancellation.
-     */
-    force = force || !job_is_ready(job);
-
-    if (force) {
+    if (force || !job_is_ready(job)) {
        bdrv_cancel_in_flight(target);
    }
-    return force;
-}
-
-static bool commit_active_cancel(Job *job, bool force)
-{
-    /* Same as above in mirror_cancel() */
-    return force || !job_is_ready(job);
 }

 static const BlockJobDriver mirror_job_driver = {
@ -1229,7 +1227,6 @@ static const BlockJobDriver commit_active_job_driver = {
        .abort                  = mirror_abort,
        .pause                  = mirror_pause,
        .complete               = mirror_complete,
-        .cancel                 = commit_active_cancel,
    },
    .drained_poll           = mirror_drained_poll,
 };
@ -1398,7 +1395,7 @@ static void coroutine_fn active_write_settle(MirrorOp *op)
 }

 static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
 }
@ -1413,7 +1410,6 @@ static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs,
    bool copy_to_target;

    copy_to_target = s->job->ret >= 0 &&
-                     !job_is_cancelled(&s->job->common.job) &&
                     s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;

    if (copy_to_target) {
@ -1453,7 +1449,7 @@ out:
 }

 static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    MirrorBDSOpaque *s = bs->opaque;
    QEMUIOVector bounce_qiov;
@ -1462,7 +1458,6 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
    bool copy_to_target;

    copy_to_target = s->job->ret >= 0 &&
-                     !job_is_cancelled(&s->job->common.job) &&
                     s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;

    if (copy_to_target) {
@ -1499,14 +1494,14 @@ static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
 }

 static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+    int64_t offset, int bytes, BdrvRequestFlags flags)
 {
    return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, offset, bytes, NULL,
                                    flags);
 }

 static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
-    int64_t offset, int64_t bytes)
+    int64_t offset, int bytes)
 {
    return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, offset, bytes,
                                    NULL, 0);
@ -1663,7 +1658,7 @@ static BlockJob *mirror_start_job(
    s = block_job_create(job_id, driver, NULL, mirror_top_bs,
                         BLK_PERM_CONSISTENT_READ,
                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
-                         BLK_PERM_WRITE, speed,
+                         BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
                         creation_flags, cb, opaque, errp);
    if (!s) {
        goto fail;
@ -1707,7 +1702,9 @@ static BlockJob *mirror_start_job(
            target_perms |= BLK_PERM_RESIZE;
        }

-        target_shared_perms |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
+        target_shared_perms |= BLK_PERM_CONSISTENT_READ
+                            |  BLK_PERM_WRITE
+                            |  BLK_PERM_GRAPH_MOD;
    } else if (bdrv_chain_contains(bs, bdrv_skip_filters(target))) {
        /*
         * We may want to allow this in the future, but it would
@ -1718,6 +1715,10 @@ static BlockJob *mirror_start_job(
        goto fail;
    }

+    if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) {
+        target_perms |= BLK_PERM_GRAPH_MOD;
+    }
+
    s->target = blk_new(s->common.job.aio_context,
                        target_perms, target_shared_perms);
    ret = blk_insert_bs(s->target, target, errp);
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@ -101,7 +101,7 @@ void hmp_drive_add(Monitor *mon, const QDict *qdict)
        return;
    }

-    opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
+    opts = drive_def(optstr);
    if (!opts)
        return;

@ -251,10 +251,10 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)

    if (!filename) {
        error_setg(&err, QERR_MISSING_PARAMETER, "target");
-        goto end;
+        hmp_handle_error(mon, err);
+        return;
    }
    qmp_drive_mirror(&mirror, &err);
-end:
    hmp_handle_error(mon, err);
 }

@ -281,11 +281,11 @@ void hmp_drive_backup(Monitor *mon, const QDict *qdict)

    if (!filename) {
        error_setg(&err, QERR_MISSING_PARAMETER, "target");
-        goto end;
+        hmp_handle_error(mon, err);
+        return;
    }

    qmp_drive_backup(&backup, &err);
-end:
    hmp_handle_error(mon, err);
 }

@ -356,7 +356,8 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
         * will be taken internally. Today it's actually required.
         */
        error_setg(&err, QERR_MISSING_PARAMETER, "snapshot-file");
-        goto end;
+        hmp_handle_error(mon, err);
+        return;
    }

    mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
@ -364,7 +365,6 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
                               filename, false, NULL,
                               !!format, format,
                               true, mode, &err);
-end:
    hmp_handle_error(mon, err);
 }

--- a/block/nbd.c
+++ b/block/nbd.c
@ -57,8 +57,7 @@
 typedef struct {
    Coroutine *coroutine;
    uint64_t offset;        /* original offset of the request */
-    bool receiving;         /* sleeping in the yield in nbd_receive_replies */
-    bool reply_possible;    /* reply header not yet received */
+    bool receiving;         /* waiting for connection_co? */
 } NBDClientRequest;

 typedef enum NBDClientState {
@ -74,13 +73,16 @@ typedef struct BDRVNBDState {

    CoMutex send_mutex;
    CoQueue free_sema;
-
-    CoMutex receive_mutex;
+    Coroutine *connection_co;
+    Coroutine *teardown_co;
+    QemuCoSleep reconnect_sleep;
+    bool drained;
+    bool wait_drained_end;
    int in_flight;
    NBDClientState state;
+    bool wait_in_flight;

    QEMUTimer *reconnect_delay_timer;
-    QEMUTimer *open_timer;

    NBDClientRequest requests[MAX_NBD_REQUESTS];
    NBDReply reply;
@ -88,7 +90,6 @@ typedef struct BDRVNBDState {

    /* Connection parameters */
    uint32_t reconnect_delay;
-    uint32_t open_timeout;
    SocketAddress *saddr;
    char *export, *tlscredsid;
    QCryptoTLSCreds *tlscreds;
@ -126,44 +127,33 @@ static bool nbd_client_connected(BDRVNBDState *s)
    return qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTED;
 }

-static bool nbd_recv_coroutine_wake_one(NBDClientRequest *req)
-{
-    if (req->receiving) {
-        req->receiving = false;
-        aio_co_wake(req->coroutine);
-        return true;
-    }
-
-    return false;
-}
-
-static void nbd_recv_coroutines_wake(BDRVNBDState *s, bool all)
-{
-    int i;
-
-    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        if (nbd_recv_coroutine_wake_one(&s->requests[i]) && !all) {
-            return;
-        }
-    }
-}
-
 static void nbd_channel_error(BDRVNBDState *s, int ret)
 {
-    if (nbd_client_connected(s)) {
-        qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
-    }
-
    if (ret == -EIO) {
        if (nbd_client_connected(s)) {
            s->state = s->reconnect_delay ? NBD_CLIENT_CONNECTING_WAIT :
                                            NBD_CLIENT_CONNECTING_NOWAIT;
        }
    } else {
+        if (nbd_client_connected(s)) {
+            qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+        }
        s->state = NBD_CLIENT_QUIT;
    }
+}

-    nbd_recv_coroutines_wake(s, true);
+static void nbd_recv_coroutines_wake_all(BDRVNBDState *s)
+{
+    int i;
+
+    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
+        NBDClientRequest *req = &s->requests[i];
+
+        if (req->coroutine && req->receiving) {
+            req->receiving = false;
+            aio_co_wake(req->coroutine);
+        }
+    }
 }

 static void reconnect_delay_timer_del(BDRVNBDState *s)
@ -180,7 +170,6 @@ static void reconnect_delay_timer_cb(void *opaque)

    if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT) {
        s->state = NBD_CLIENT_CONNECTING_NOWAIT;
-        nbd_co_establish_connection_cancel(s->conn);
        while (qemu_co_enter_next(&s->free_sema, NULL)) {
            /* Resume all queued requests */
        }
@ -203,47 +192,113 @@ static void reconnect_delay_timer_init(BDRVNBDState *s, uint64_t expire_time_ns)
    timer_mod(s->reconnect_delay_timer, expire_time_ns);
 }

+static void nbd_client_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+
+    /* Timer is deleted in nbd_client_co_drain_begin() */
+    assert(!s->reconnect_delay_timer);
+    /*
+     * If reconnect is in progress we may have no ->ioc.  It will be
+     * re-instantiated in the proper aio context once the connection is
+     * reestablished.
+     */
+    if (s->ioc) {
+        qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
+    }
+}
+
+static void nbd_client_attach_aio_context_bh(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+
+    if (s->connection_co) {
+        /*
+         * The node is still drained, so we know the coroutine has yielded in
+         * nbd_read_eof(), the only place where bs->in_flight can reach 0, or
+         * it is entered for the first time. Both places are safe for entering
+         * the coroutine.
+         */
+        qemu_aio_coroutine_enter(bs->aio_context, s->connection_co);
+    }
+    bdrv_dec_in_flight(bs);
+}
+
+static void nbd_client_attach_aio_context(BlockDriverState *bs,
+                                          AioContext *new_context)
+{
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+
+    /*
+     * s->connection_co is either yielded from nbd_receive_reply or from
+     * nbd_co_reconnect_loop()
+     */
+    if (nbd_client_connected(s)) {
+        qio_channel_attach_aio_context(QIO_CHANNEL(s->ioc), new_context);
+    }
+
+    bdrv_inc_in_flight(bs);
+
+    /*
+     * Need to wait here for the BH to run because the BH must run while the
+     * node is still drained.
+     */
+    aio_wait_bh_oneshot(new_context, nbd_client_attach_aio_context_bh, bs);
+}
+
+static void coroutine_fn nbd_client_co_drain_begin(BlockDriverState *bs)
+{
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+
+    s->drained = true;
+    qemu_co_sleep_wake(&s->reconnect_sleep);
+
+    nbd_co_establish_connection_cancel(s->conn);
+
+    reconnect_delay_timer_del(s);
+
+    if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT) {
+        s->state = NBD_CLIENT_CONNECTING_NOWAIT;
+        qemu_co_queue_restart_all(&s->free_sema);
+    }
+}
+
+static void coroutine_fn nbd_client_co_drain_end(BlockDriverState *bs)
+{
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+
+    s->drained = false;
+    if (s->wait_drained_end) {
+        s->wait_drained_end = false;
+        aio_co_wake(s->connection_co);
+    }
+}
+
+
 static void nbd_teardown_connection(BlockDriverState *bs)
 {
    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;

-    assert(!s->in_flight);
-
    if (s->ioc) {
+        /* finish any pending coroutines */
        qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
-        yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name),
-                                 nbd_yank, s->bs);
-        object_unref(OBJECT(s->ioc));
-        s->ioc = NULL;
    }

    s->state = NBD_CLIENT_QUIT;
-}
-
-static void open_timer_del(BDRVNBDState *s)
-{
-    if (s->open_timer) {
-        timer_free(s->open_timer);
-        s->open_timer = NULL;
+    if (s->connection_co) {
+        qemu_co_sleep_wake(&s->reconnect_sleep);
+        nbd_co_establish_connection_cancel(s->conn);
    }
-}
-
-static void open_timer_cb(void *opaque)
-{
-    BDRVNBDState *s = opaque;
-
-    nbd_co_establish_connection_cancel(s->conn);
-    open_timer_del(s);
-}
-
-static void open_timer_init(BDRVNBDState *s, uint64_t expire_time_ns)
-{
-    assert(!s->open_timer);
-    s->open_timer = aio_timer_new(bdrv_get_aio_context(s->bs),
-                                  QEMU_CLOCK_REALTIME,
-                                  SCALE_NS,
-                                  open_timer_cb, s);
-    timer_mod(s->open_timer, expire_time_ns);
+    if (qemu_in_coroutine()) {
+        s->teardown_co = qemu_coroutine_self();
+        /* connection_co resumes us when it terminates */
+        qemu_coroutine_yield();
+        s->teardown_co = NULL;
+    } else {
+        BDRV_POLL_WHILE(bs, s->connection_co);
+    }
+    assert(!s->connection_co);
 }

 static bool nbd_client_connecting(BDRVNBDState *s)
@ -308,11 +363,10 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs,
 {
    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
    int ret;
-    bool blocking = nbd_client_connecting_wait(s);

    assert(!s->ioc);

-    s->ioc = nbd_co_establish_connection(s->conn, &s->info, blocking, errp);
+    s->ioc = nbd_co_establish_connection(s->conn, &s->info, true, errp);
    if (!s->ioc) {
        return -ECONNREFUSED;
    }
@ -348,22 +402,29 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs,
    return 0;
 }

-/* called under s->send_mutex */
 static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
 {
-    assert(nbd_client_connecting(s));
-    assert(s->in_flight == 0);
+    if (!nbd_client_connecting(s)) {
+        return;
+    }

-    if (nbd_client_connecting_wait(s) && s->reconnect_delay &&
-        !s->reconnect_delay_timer)
-    {
-        /*
-         * It's first reconnect attempt after switching to
-         * NBD_CLIENT_CONNECTING_WAIT
-         */
-        reconnect_delay_timer_init(s,
-            qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
-            s->reconnect_delay * NANOSECONDS_PER_SECOND);
+    /* Wait for completion of all in-flight requests */
+
+    qemu_co_mutex_lock(&s->send_mutex);
+
+    while (s->in_flight > 0) {
+        qemu_co_mutex_unlock(&s->send_mutex);
+        nbd_recv_coroutines_wake_all(s);
+        s->wait_in_flight = true;
+        qemu_coroutine_yield();
+        s->wait_in_flight = false;
+        qemu_co_mutex_lock(&s->send_mutex);
+    }
+
+    qemu_co_mutex_unlock(&s->send_mutex);
+
+    if (!nbd_client_connecting(s)) {
+        return;
    }

    /*
@ -383,73 +444,135 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
    nbd_co_do_establish_connection(s->bs, NULL);
 }

-static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle)
+static coroutine_fn void nbd_co_reconnect_loop(BDRVNBDState *s)
 {
-    int ret;
-    uint64_t ind = HANDLE_TO_INDEX(s, handle), ind2;
-    QEMU_LOCK_GUARD(&s->receive_mutex);
+    uint64_t timeout = 1 * NANOSECONDS_PER_SECOND;
+    uint64_t max_timeout = 16 * NANOSECONDS_PER_SECOND;

-    while (true) {
-        if (s->reply.handle == handle) {
-            /* We are done */
-            return 0;
+    if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT) {
+        reconnect_delay_timer_init(s, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
+                                   s->reconnect_delay * NANOSECONDS_PER_SECOND);
+    }
+
+    nbd_reconnect_attempt(s);
+
+    while (nbd_client_connecting(s)) {
+        if (s->drained) {
+            bdrv_dec_in_flight(s->bs);
+            s->wait_drained_end = true;
+            while (s->drained) {
+                /*
+                 * We may be entered once from nbd_client_attach_aio_context_bh
+                 * and then from nbd_client_co_drain_end. So here is a loop.
+                 */
+                qemu_coroutine_yield();
+            }
+            bdrv_inc_in_flight(s->bs);
+        } else {
+            qemu_co_sleep_ns_wakeable(&s->reconnect_sleep,
+                                      QEMU_CLOCK_REALTIME, timeout);
+            if (s->drained) {
+                continue;
+            }
+            if (timeout < max_timeout) {
+                timeout *= 2;
+            }
+        }
+
+        nbd_reconnect_attempt(s);
+    }
+
+    reconnect_delay_timer_del(s);
+}
+
+static coroutine_fn void nbd_connection_entry(void *opaque)
+{
+    BDRVNBDState *s = opaque;
+    uint64_t i;
+    int ret = 0;
+    Error *local_err = NULL;
+
+    while (qatomic_load_acquire(&s->state) != NBD_CLIENT_QUIT) {
+        /*
+         * The NBD client can only really be considered idle when it has
+         * yielded from qio_channel_readv_all_eof(), waiting for data. This is
+         * the point where the additional scheduled coroutine entry happens
+         * after nbd_client_attach_aio_context().
+         *
+         * Therefore we keep an additional in_flight reference all the time and
+         * only drop it temporarily here.
+         */
+
+        if (nbd_client_connecting(s)) {
+            nbd_co_reconnect_loop(s);
        }

        if (!nbd_client_connected(s)) {
-            return -EIO;
-        }
-
-        if (s->reply.handle != 0) {
-            /*
-             * Some other request is being handled now. It should already be
-             * woken by whoever set s->reply.handle (or never wait in this
-             * yield). So, we should not wake it here.
-             */
-            ind2 = HANDLE_TO_INDEX(s, s->reply.handle);
-            assert(!s->requests[ind2].receiving);
-
-            s->requests[ind].receiving = true;
-            qemu_co_mutex_unlock(&s->receive_mutex);
-
-            qemu_coroutine_yield();
-            /*
-             * We may be woken for 3 reasons:
-             * 1. From this function, executing in parallel coroutine, when our
-             *    handle is received.
-             * 2. From nbd_channel_error(), when connection is lost.
-             * 3. From nbd_co_receive_one_chunk(), when previous request is
-             *    finished and s->reply.handle set to 0.
-             * Anyway, it's OK to lock the mutex and go to the next iteration.
-             */
-
-            qemu_co_mutex_lock(&s->receive_mutex);
-            assert(!s->requests[ind].receiving);
            continue;
        }

-        /* We are under mutex and handle is 0. We have to do the dirty work. */
        assert(s->reply.handle == 0);
-        ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, NULL);
+        ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, &local_err);
+
+        if (local_err) {
+            trace_nbd_read_reply_entry_fail(ret, error_get_pretty(local_err));
+            error_free(local_err);
+            local_err = NULL;
+        }
        if (ret <= 0) {
-            ret = ret ? ret : -EIO;
-            nbd_channel_error(s, ret);
-            return ret;
+            nbd_channel_error(s, ret ? ret : -EIO);
+            continue;
        }
-        if (nbd_reply_is_structured(&s->reply) && !s->info.structured_reply) {
+
+        /*
+         * There's no need for a mutex on the receive side, because the
+         * handler acts as a synchronization point and ensures that only
+         * one coroutine is called until the reply finishes.
+         */
+        i = HANDLE_TO_INDEX(s, s->reply.handle);
+        if (i >= MAX_NBD_REQUESTS ||
+            !s->requests[i].coroutine ||
+            !s->requests[i].receiving ||
+            (nbd_reply_is_structured(&s->reply) && !s->info.structured_reply))
+        {
            nbd_channel_error(s, -EINVAL);
-            return -EINVAL;
+            continue;
        }
-        if (s->reply.handle == handle) {
-            /* We are done */
-            return 0;
-        }
-        ind2 = HANDLE_TO_INDEX(s, s->reply.handle);
-        if (ind2 >= MAX_NBD_REQUESTS || !s->requests[ind2].reply_possible) {
-            nbd_channel_error(s, -EINVAL);
-            return -EINVAL;
-        }
-        nbd_recv_coroutine_wake_one(&s->requests[ind2]);
+
+        /*
+         * We're woken up again by the request itself.  Note that there
+         * is no race between yielding and reentering connection_co.  This
+         * is because:
+         *
+         * - if the request runs on the same AioContext, it is only
+         *   entered after we yield
+         *
+         * - if the request runs on a different AioContext, reentering
+         *   connection_co happens through a bottom half, which can only
+         *   run after we yield.
+         */
+        s->requests[i].receiving = false;
+        aio_co_wake(s->requests[i].coroutine);
+        qemu_coroutine_yield();
    }
+
+    qemu_co_queue_restart_all(&s->free_sema);
+    nbd_recv_coroutines_wake_all(s);
+    bdrv_dec_in_flight(s->bs);
+
+    s->connection_co = NULL;
+    if (s->ioc) {
+        qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
+        yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name),
+                                 nbd_yank, s->bs);
+        object_unref(OBJECT(s->ioc));
+        s->ioc = NULL;
+    }
+
+    if (s->teardown_co) {
+        aio_co_wake(s->teardown_co);
+    }
+    aio_wait_kick();
 }

 static int nbd_co_send_request(BlockDriverState *bs,
@ -460,17 +583,10 @@ static int nbd_co_send_request(BlockDriverState *bs,
    int rc, i = -1;

    qemu_co_mutex_lock(&s->send_mutex);
-
-    while (s->in_flight == MAX_NBD_REQUESTS ||
-           (!nbd_client_connected(s) && s->in_flight > 0))
-    {
+    while (s->in_flight == MAX_NBD_REQUESTS || nbd_client_connecting_wait(s)) {
        qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
    }

-    if (nbd_client_connecting(s)) {
-        nbd_reconnect_attempt(s);
-    }
-
    if (!nbd_client_connected(s)) {
        rc = -EIO;
        goto err;
@ -490,7 +606,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
    s->requests[i].coroutine = qemu_coroutine_self();
    s->requests[i].offset = request->from;
    s->requests[i].receiving = false;
-    s->requests[i].reply_possible = true;

    request->handle = INDEX_TO_HANDLE(s, i);

@ -518,6 +633,10 @@ err:
        if (i != -1) {
            s->requests[i].coroutine = NULL;
            s->in_flight--;
+        }
+        if (s->in_flight == 0 && s->wait_in_flight) {
+            aio_co_wake(s->connection_co);
+        } else {
            qemu_co_queue_next(&s->free_sema);
        }
    }
@ -816,7 +935,10 @@ static coroutine_fn int nbd_co_do_receive_one_chunk(
    }
    *request_ret = 0;

-    nbd_receive_replies(s, handle);
+    /* Wait until we're woken up by nbd_connection_entry.  */
+    s->requests[i].receiving = true;
+    qemu_coroutine_yield();
+    assert(!s->requests[i].receiving);
    if (!nbd_client_connected(s)) {
        error_setg(errp, "Connection closed");
        return -EIO;
@ -909,7 +1031,14 @@ static coroutine_fn int nbd_co_receive_one_chunk(
    }
    s->reply.handle = 0;

-    nbd_recv_coroutines_wake(s, false);
+    if (s->connection_co && !s->wait_in_flight) {
+        /*
+         * We must check s->wait_in_flight, because we may entered by
+         * nbd_recv_coroutines_wake_all(), in this case we should not
+         * wake connection_co here, it will woken by last request.
+         */
+        aio_co_wake(s->connection_co);
+    }

    return ret;
 }
@ -1020,7 +1149,11 @@ break_loop:

    qemu_co_mutex_lock(&s->send_mutex);
    s->in_flight--;
-    qemu_co_queue_next(&s->free_sema);
+    if (s->in_flight == 0 && s->wait_in_flight) {
+        aio_co_wake(s->connection_co);
+    } else {
+        qemu_co_queue_next(&s->free_sema);
+    }
    qemu_co_mutex_unlock(&s->send_mutex);

    return false;
@ -1189,9 +1322,8 @@ static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
    return ret ? ret : request_ret;
 }

-static int nbd_client_co_preadv(BlockDriverState *bs, int64_t offset,
-                                int64_t bytes, QEMUIOVector *qiov,
-                                BdrvRequestFlags flags)
+static int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
+                                uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    int ret, request_ret;
    Error *local_err = NULL;
@ -1248,9 +1380,8 @@ static int nbd_client_co_preadv(BlockDriverState *bs, int64_t offset,
    return ret ? ret : request_ret;
 }

-static int nbd_client_co_pwritev(BlockDriverState *bs, int64_t offset,
-                                 int64_t bytes, QEMUIOVector *qiov,
-                                 BdrvRequestFlags flags)
+static int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
+                                 uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
    NBDRequest request = {
@ -1274,17 +1405,15 @@ static int nbd_client_co_pwritev(BlockDriverState *bs, int64_t offset,
 }

 static int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-                                       int64_t bytes, BdrvRequestFlags flags)
+                                       int bytes, BdrvRequestFlags flags)
 {
    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
    NBDRequest request = {
        .type = NBD_CMD_WRITE_ZEROES,
        .from = offset,
-        .len = bytes,  /* .len is uint32_t actually */
+        .len = bytes,
    };

-    assert(bytes <= UINT32_MAX); /* rely on max_pwrite_zeroes */
-
    assert(!(s->info.flags & NBD_FLAG_READ_ONLY));
    if (!(s->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
        return -ENOTSUP;
@ -1324,17 +1453,15 @@ static int nbd_client_co_flush(BlockDriverState *bs)
 }

 static int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset,
-                                  int64_t bytes)
+                                  int bytes)
 {
    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
    NBDRequest request = {
        .type = NBD_CMD_TRIM,
        .from = offset,
-        .len = bytes, /* len is uint32_t */
+        .len = bytes,
    };

-    assert(bytes <= UINT32_MAX); /* rely on max_pdiscard */
-
    assert(!(s->info.flags & NBD_FLAG_READ_ONLY));
    if (!(s->info.flags & NBD_FLAG_SEND_TRIM) || !bytes) {
        return 0;
@ -1770,15 +1897,6 @@ static QemuOptsList nbd_runtime_opts = {
                    "future requests before a successful reconnect will "
                    "immediately fail. Default 0",
        },
-        {
-            .name = "open-timeout",
-            .type = QEMU_OPT_NUMBER,
-            .help = "In seconds. If zero, the nbd driver tries the connection "
-                    "only once, and fails to open if the connection fails. "
-                    "If non-zero, the nbd driver will repeat connection "
-                    "attempts until successful or until @open-timeout seconds "
-                    "have elapsed. Default 0",
-        },
        { /* end of list */ }
    },
 };
@ -1834,7 +1952,6 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options,
    }

    s->reconnect_delay = qemu_opt_get_number(opts, "reconnect-delay", 0);
-    s->open_timeout = qemu_opt_get_number(opts, "open-timeout", 0);

    ret = 0;

@ -1852,7 +1969,6 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
    s->bs = bs;
    qemu_co_mutex_init(&s->send_mutex);
    qemu_co_queue_init(&s->free_sema);
-    qemu_co_mutex_init(&s->receive_mutex);

    if (!yank_register_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name), errp)) {
        return -EEXIST;
@ -1866,19 +1982,15 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
    s->conn = nbd_client_connection_new(s->saddr, true, s->export,
                                        s->x_dirty_bitmap, s->tlscreds);

-    if (s->open_timeout) {
-        nbd_client_connection_enable_retry(s->conn);
-        open_timer_init(s, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
-                        s->open_timeout * NANOSECONDS_PER_SECOND);
-    }
-
-    s->state = NBD_CLIENT_CONNECTING_WAIT;
+    /* TODO: Configurable retry-until-timeout behaviour. */
    ret = nbd_do_establish_connection(bs, errp);
    if (ret < 0) {
        goto fail;
    }

-    nbd_client_connection_enable_retry(s->conn);
+    s->connection_co = qemu_coroutine_create(nbd_connection_entry, s);
+    bdrv_inc_in_flight(bs);
+    aio_co_schedule(bdrv_get_aio_context(bs), s->connection_co);

    return 0;

@ -2032,8 +2144,6 @@ static void nbd_cancel_in_flight(BlockDriverState *bs)
        s->state = NBD_CLIENT_CONNECTING_NOWAIT;
        qemu_co_queue_restart_all(&s->free_sema);
    }
-
-    nbd_co_establish_connection_cancel(s->conn);
 }

 static BlockDriver bdrv_nbd = {
@ -2054,6 +2164,10 @@ static BlockDriver bdrv_nbd = {
    .bdrv_refresh_limits        = nbd_refresh_limits,
    .bdrv_co_truncate           = nbd_co_truncate,
    .bdrv_getlength             = nbd_getlength,
+    .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
+    .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
+    .bdrv_co_drain_begin        = nbd_client_co_drain_begin,
+    .bdrv_co_drain_end          = nbd_client_co_drain_end,
    .bdrv_refresh_filename      = nbd_refresh_filename,
    .bdrv_co_block_status       = nbd_client_co_block_status,
    .bdrv_dirname               = nbd_dirname,
@ -2079,6 +2193,10 @@ static BlockDriver bdrv_nbd_tcp = {
    .bdrv_refresh_limits        = nbd_refresh_limits,
    .bdrv_co_truncate           = nbd_co_truncate,
    .bdrv_getlength             = nbd_getlength,
+    .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
+    .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
+    .bdrv_co_drain_begin        = nbd_client_co_drain_begin,
+    .bdrv_co_drain_end          = nbd_client_co_drain_end,
    .bdrv_refresh_filename      = nbd_refresh_filename,
    .bdrv_co_block_status       = nbd_client_co_block_status,
    .bdrv_dirname               = nbd_dirname,
@ -2104,6 +2222,10 @@ static BlockDriver bdrv_nbd_unix = {
    .bdrv_refresh_limits        = nbd_refresh_limits,
    .bdrv_co_truncate           = nbd_co_truncate,
    .bdrv_getlength             = nbd_getlength,
+    .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
+    .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
+    .bdrv_co_drain_begin        = nbd_client_co_drain_begin,
+    .bdrv_co_drain_end          = nbd_client_co_drain_end,
    .bdrv_refresh_filename      = nbd_refresh_filename,
    .bdrv_co_block_status       = nbd_client_co_block_status,
    .bdrv_dirname               = nbd_dirname,
--- a/block/nfs.c
+++ b/block/nfs.c
@ -197,7 +197,7 @@ static void nfs_set_events(NFSClient *client)
                           false,
                           (ev & POLLIN) ? nfs_process_read : NULL,
                           (ev & POLLOUT) ? nfs_process_write : NULL,
-                           NULL, NULL, client);
+                           NULL, client);

    }
    client->events = ev;
@ -262,9 +262,9 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
                                     nfs_co_generic_bh_cb, task);
 }

-static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, int64_t offset,
-                                      int64_t bytes, QEMUIOVector *iov,
-                                      BdrvRequestFlags flags)
+static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
+                                      uint64_t bytes, QEMUIOVector *iov,
+                                      int flags)
 {
    NFSClient *client = bs->opaque;
    NFSRPC task;
@ -296,9 +296,9 @@ static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, int64_t offset,
    return 0;
 }

-static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, int64_t offset,
-                                       int64_t bytes, QEMUIOVector *iov,
-                                       BdrvRequestFlags flags)
+static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
+                                       uint64_t bytes, QEMUIOVector *iov,
+                                       int flags)
 {
    NFSClient *client = bs->opaque;
    NFSRPC task;
@ -372,7 +372,7 @@ static void nfs_detach_aio_context(BlockDriverState *bs)
    NFSClient *client = bs->opaque;

    aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                       false, NULL, NULL, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL, NULL);
    client->events = 0;
 }

@ -390,7 +390,7 @@ static void nfs_client_close(NFSClient *client)
    if (client->context) {
        qemu_mutex_lock(&client->mutex);
        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                           false, NULL, NULL, NULL, NULL, NULL);
+                           false, NULL, NULL, NULL, NULL);
        qemu_mutex_unlock(&client->mutex);
        if (client->fh) {
            nfs_close(client->context, client->fh);
--- a/block/null.c
+++ b/block/null.c
@ -116,9 +116,8 @@ static coroutine_fn int null_co_common(BlockDriverState *bs)
 }

 static coroutine_fn int null_co_preadv(BlockDriverState *bs,
-                                       int64_t offset, int64_t bytes,
-                                       QEMUIOVector *qiov,
-                                       BdrvRequestFlags flags)
+                                       uint64_t offset, uint64_t bytes,
+                                       QEMUIOVector *qiov, int flags)
 {
    BDRVNullState *s = bs->opaque;

@ -130,9 +129,8 @@ static coroutine_fn int null_co_preadv(BlockDriverState *bs,
 }

 static coroutine_fn int null_co_pwritev(BlockDriverState *bs,
-                                        int64_t offset, int64_t bytes,
-                                        QEMUIOVector *qiov,
-                                        BdrvRequestFlags flags)
+                                        uint64_t offset, uint64_t bytes,
+                                        QEMUIOVector *qiov, int flags)
 {
    return null_co_common(bs);
 }
@ -189,8 +187,8 @@ static inline BlockAIOCB *null_aio_common(BlockDriverState *bs,
 }

 static BlockAIOCB *null_aio_preadv(BlockDriverState *bs,
-                                   int64_t offset, int64_t bytes,
-                                   QEMUIOVector *qiov, BdrvRequestFlags flags,
+                                   uint64_t offset, uint64_t bytes,
+                                   QEMUIOVector *qiov, int flags,
                                   BlockCompletionFunc *cb,
                                   void *opaque)
 {
@ -204,8 +202,8 @@ static BlockAIOCB *null_aio_preadv(BlockDriverState *bs,
 }

 static BlockAIOCB *null_aio_pwritev(BlockDriverState *bs,
-                                    int64_t offset, int64_t bytes,
-                                    QEMUIOVector *qiov, BdrvRequestFlags flags,
+                                    uint64_t offset, uint64_t bytes,
+                                    QEMUIOVector *qiov, int flags,
                                    BlockCompletionFunc *cb,
                                    void *opaque)
 {
--- a/block/nvme.c
+++ b/block/nvme.c
@ -176,27 +176,23 @@ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
        return false;
    }
    memset(q->queue, 0, bytes);
-    r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova, errp);
+    r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova);
    if (r) {
-        error_prepend(errp, "Cannot map queue: ");
+        error_setg(errp, "Cannot map queue");
+        return false;
    }
-    return r == 0;
-}
-
-static void nvme_free_queue(NVMeQueue *q)
-{
-    qemu_vfree(q->queue);
+    return true;
 }

 static void nvme_free_queue_pair(NVMeQueuePair *q)
 {
-    trace_nvme_free_queue_pair(q->index, q, &q->cq, &q->sq);
+    trace_nvme_free_queue_pair(q->index, q);
    if (q->completion_bh) {
        qemu_bh_delete(q->completion_bh);
    }
-    nvme_free_queue(&q->sq);
-    nvme_free_queue(&q->cq);
    qemu_vfree(q->prp_list_pages);
+    qemu_vfree(q->sq.queue);
+    qemu_vfree(q->cq.queue);
    qemu_mutex_destroy(&q->lock);
    g_free(q);
 }
@ -206,9 +202,8 @@ static void nvme_free_req_queue_cb(void *opaque)
    NVMeQueuePair *q = opaque;

    qemu_mutex_lock(&q->lock);
-    while (q->free_req_head != -1 &&
-           qemu_co_enter_next(&q->free_req_queue, &q->lock)) {
-        /* Retry waiting requests */
+    while (qemu_co_enter_next(&q->free_req_queue, &q->lock)) {
+        /* Retry all pending requests */
    }
    qemu_mutex_unlock(&q->lock);
 }
@ -225,7 +220,6 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,

    q = g_try_new0(NVMeQueuePair, 1);
    if (!q) {
-        error_setg(errp, "Cannot allocate queue pair");
        return NULL;
    }
    trace_nvme_create_queue_pair(idx, q, size, aio_context,
@ -234,7 +228,6 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
                          qemu_real_host_page_size);
    q->prp_list_pages = qemu_try_memalign(qemu_real_host_page_size, bytes);
    if (!q->prp_list_pages) {
-        error_setg(errp, "Cannot allocate PRP page list");
        goto fail;
    }
    memset(q->prp_list_pages, 0, bytes);
@ -244,9 +237,8 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
    qemu_co_queue_init(&q->free_req_queue);
    q->completion_bh = aio_bh_new(aio_context, nvme_process_completion_bh, q);
    r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, bytes,
-                          false, &prp_list_iova, errp);
+                          false, &prp_list_iova);
    if (r) {
-        error_prepend(errp, "Cannot map buffer for DMA: ");
        goto fail;
    }
    q->free_req_head = -1;
@ -520,10 +512,10 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
 {
    BDRVNVMeState *s = bs->opaque;
    bool ret = false;
-    QEMU_AUTO_VFREE union {
+    union {
        NvmeIdCtrl ctrl;
        NvmeIdNs ns;
-    } *id = NULL;
+    } *id;
    NvmeLBAF *lbaf;
    uint16_t oncs;
    int r;
@ -539,9 +531,9 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
        error_setg(errp, "Cannot allocate buffer for identify response");
        goto out;
    }
-    r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova, errp);
+    r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova);
    if (r) {
-        error_prepend(errp, "Cannot map buffer for DMA: ");
+        error_setg(errp, "Cannot map buffer for DMA");
        goto out;
    }

@ -601,12 +593,15 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
    s->blkshift = lbaf->ds;
 out:
    qemu_vfio_dma_unmap(s->vfio, id);
+    qemu_vfree(id);

    return ret;
 }

-static void nvme_poll_queue(NVMeQueuePair *q)
+static bool nvme_poll_queue(NVMeQueuePair *q)
 {
+    bool progress = false;
+
    const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES;
    NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset];

@ -617,23 +612,30 @@ static void nvme_poll_queue(NVMeQueuePair *q)
     * cannot race with itself.
     */
    if ((le16_to_cpu(cqe->status) & 0x1) == q->cq_phase) {
-        return;
+        return false;
    }

    qemu_mutex_lock(&q->lock);
    while (nvme_process_completion(q)) {
        /* Keep polling */
+        progress = true;
    }
    qemu_mutex_unlock(&q->lock);
+
+    return progress;
 }

-static void nvme_poll_queues(BDRVNVMeState *s)
+static bool nvme_poll_queues(BDRVNVMeState *s)
 {
+    bool progress = false;
    int i;

    for (i = 0; i < s->queue_count; i++) {
-        nvme_poll_queue(s->queues[i]);
+        if (nvme_poll_queue(s->queues[i])) {
+            progress = true;
+        }
    }
+    return progress;
 }

 static void nvme_handle_event(EventNotifier *n)
@ -694,30 +696,8 @@ static bool nvme_poll_cb(void *opaque)
    EventNotifier *e = opaque;
    BDRVNVMeState *s = container_of(e, BDRVNVMeState,
                                    irq_notifier[MSIX_SHARED_IRQ_IDX]);
-    int i;

-    for (i = 0; i < s->queue_count; i++) {
-        NVMeQueuePair *q = s->queues[i];
-        const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES;
-        NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset];
-
-        /*
-         * q->lock isn't needed because nvme_process_completion() only runs in
-         * the event loop thread and cannot race with itself.
-         */
-        if ((le16_to_cpu(cqe->status) & 0x1) != q->cq_phase) {
-            return true;
-        }
-    }
-    return false;
-}
-
-static void nvme_poll_ready(EventNotifier *e)
-{
-    BDRVNVMeState *s = container_of(e, BDRVNVMeState,
-                                    irq_notifier[MSIX_SHARED_IRQ_IDX]);
-
-    nvme_poll_queues(s);
+    return nvme_poll_queues(s);
 }

 static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
@ -852,8 +832,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
    }
    aio_set_event_notifier(bdrv_get_aio_context(bs),
                           &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
-                           false, nvme_handle_event, nvme_poll_cb,
-                           nvme_poll_ready);
+                           false, nvme_handle_event, nvme_poll_cb);

    if (!nvme_identify(bs, namespace, errp)) {
        ret = -EIO;
@ -938,7 +917,7 @@ static void nvme_close(BlockDriverState *bs)
    g_free(s->queues);
    aio_set_event_notifier(bdrv_get_aio_context(bs),
                           &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
-                           false, NULL, NULL, NULL);
+                           false, NULL, NULL);
    event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]);
    qemu_vfio_pci_unmap_bar(s->vfio, 0, s->bar0_wo_map,
                            0, sizeof(NvmeBar) + NVME_DOORBELL_SIZE);
@ -1038,7 +1017,6 @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd,
    uint64_t *pagelist = req->prp_list_page;
    int i, j, r;
    int entries = 0;
-    Error *local_err = NULL, **errp = NULL;

    assert(qiov->size);
    assert(QEMU_IS_ALIGNED(qiov->size, s->page_size));
@ -1051,7 +1029,7 @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd,
 try_map:
        r = qemu_vfio_dma_map(s->vfio,
                              qiov->iov[i].iov_base,
-                              len, true, &iova, errp);
+                              len, true, &iova);
        if (r == -ENOSPC) {
            /*
             * In addition to the -ENOMEM error, the VFIO_IOMMU_MAP_DMA
@ -1086,8 +1064,6 @@ try_map:
                    goto fail;
                }
            }
-            errp = &local_err;
-
            goto try_map;
        }
        if (r) {
@ -1131,9 +1107,6 @@ fail:
     * because they are already mapped before calling this function; for
     * temporary mappings, a later nvme_cmd_(un)map_qiov will reclaim by
     * calling qemu_vfio_dma_reset_temporary when necessary. */
-    if (local_err) {
-        error_reportf_err(local_err, "Cannot map buffer for DMA: ");
-    }
    return r;
 }

@ -1238,7 +1211,7 @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 {
    BDRVNVMeState *s = bs->opaque;
    int r;
-    QEMU_AUTO_VFREE uint8_t *buf = NULL;
+    uint8_t *buf = NULL;
    QEMUIOVector local_qiov;
    size_t len = QEMU_ALIGN_UP(bytes, qemu_real_host_page_size);
    assert(QEMU_IS_ALIGNED(offset, s->page_size));
@ -1265,21 +1238,20 @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
    if (!r && !is_write) {
        qemu_iovec_from_buf(qiov, 0, buf, bytes);
    }
+    qemu_vfree(buf);
    return r;
 }

 static coroutine_fn int nvme_co_preadv(BlockDriverState *bs,
-                                       int64_t offset, int64_t bytes,
-                                       QEMUIOVector *qiov,
-                                       BdrvRequestFlags flags)
+                                       uint64_t offset, uint64_t bytes,
+                                       QEMUIOVector *qiov, int flags)
 {
    return nvme_co_prw(bs, offset, bytes, qiov, false, flags);
 }

 static coroutine_fn int nvme_co_pwritev(BlockDriverState *bs,
-                                        int64_t offset, int64_t bytes,
-                                        QEMUIOVector *qiov,
-                                        BdrvRequestFlags flags)
+                                        uint64_t offset, uint64_t bytes,
+                                        QEMUIOVector *qiov, int flags)
 {
    return nvme_co_prw(bs, offset, bytes, qiov, true, flags);
 }
@ -1314,29 +1286,19 @@ static coroutine_fn int nvme_co_flush(BlockDriverState *bs)

 static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs,
                                              int64_t offset,
-                                              int64_t bytes,
+                                              int bytes,
                                              BdrvRequestFlags flags)
 {
    BDRVNVMeState *s = bs->opaque;
    NVMeQueuePair *ioq = s->queues[INDEX_IO(0)];
    NVMeRequest *req;
-    uint32_t cdw12;
+
+    uint32_t cdw12 = ((bytes >> s->blkshift) - 1) & 0xFFFF;

    if (!s->supports_write_zeroes) {
        return -ENOTSUP;
    }

-    if (bytes == 0) {
-        return 0;
-    }
-
-    cdw12 = ((bytes >> s->blkshift) - 1) & 0xFFFF;
-    /*
-     * We should not lose information. pwrite_zeroes_alignment and
-     * max_pwrite_zeroes guarantees it.
-     */
-    assert(((cdw12 + 1) << s->blkshift) == bytes);
-
    NvmeCmd cmd = {
        .opcode = NVME_CMD_WRITE_ZEROES,
        .nsid = cpu_to_le32(s->nsid),
@ -1378,12 +1340,12 @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs,

 static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
                                         int64_t offset,
-                                         int64_t bytes)
+                                         int bytes)
 {
    BDRVNVMeState *s = bs->opaque;
    NVMeQueuePair *ioq = s->queues[INDEX_IO(0)];
    NVMeRequest *req;
-    QEMU_AUTO_VFREE NvmeDsmRange *buf = NULL;
+    NvmeDsmRange *buf;
    QEMUIOVector local_qiov;
    int ret;

@ -1405,14 +1367,6 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,

    assert(s->queue_count > 1);

-    /*
-     * Filling the @buf requires @offset and @bytes to satisfy restrictions
-     * defined in nvme_refresh_limits().
-     */
-    assert(QEMU_IS_ALIGNED(bytes, 1UL << s->blkshift));
-    assert(QEMU_IS_ALIGNED(offset, 1UL << s->blkshift));
-    assert((bytes >> s->blkshift) <= UINT32_MAX);
-
    buf = qemu_try_memalign(s->page_size, s->page_size);
    if (!buf) {
        return -ENOMEM;
@ -1458,6 +1412,7 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
    trace_nvme_dsm_done(s, offset, bytes, ret);
 out:
    qemu_iovec_destroy(&local_qiov);
+    qemu_vfree(buf);
    return ret;

 }
@ -1507,18 +1462,6 @@ static void nvme_refresh_limits(BlockDriverState *bs, Error **errp)
    bs->bl.opt_mem_alignment = s->page_size;
    bs->bl.request_alignment = s->page_size;
    bs->bl.max_transfer = s->max_transfer;
-
-    /*
-     * Look at nvme_co_pwrite_zeroes: after shift and decrement we should get
-     * at most 0xFFFF
-     */
-    bs->bl.max_pwrite_zeroes = 1ULL << (s->blkshift + 16);
-    bs->bl.pwrite_zeroes_alignment = MAX(bs->bl.request_alignment,
-                                         1UL << s->blkshift);
-
-    bs->bl.max_pdiscard = (uint64_t)UINT32_MAX << s->blkshift;
-    bs->bl.pdiscard_alignment = MAX(bs->bl.request_alignment,
-                                    1UL << s->blkshift);
 }

 static void nvme_detach_aio_context(BlockDriverState *bs)
@ -1534,7 +1477,7 @@ static void nvme_detach_aio_context(BlockDriverState *bs)

    aio_set_event_notifier(bdrv_get_aio_context(bs),
                           &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
-                           false, NULL, NULL, NULL);
+                           false, NULL, NULL);
 }

 static void nvme_attach_aio_context(BlockDriverState *bs,
@ -1544,8 +1487,7 @@ static void nvme_attach_aio_context(BlockDriverState *bs,

    s->aio_context = new_context;
    aio_set_event_notifier(new_context, &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
-                           false, nvme_handle_event, nvme_poll_cb,
-                           nvme_poll_ready);
+                           false, nvme_handle_event, nvme_poll_cb);

    for (unsigned i = 0; i < s->queue_count; i++) {
        NVMeQueuePair *q = s->queues[i];
@ -1579,15 +1521,14 @@ static void nvme_aio_unplug(BlockDriverState *bs)
 static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size)
 {
    int ret;
-    Error *local_err = NULL;
    BDRVNVMeState *s = bs->opaque;

-    ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL, &local_err);
+    ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL);
    if (ret) {
        /* FIXME: we may run out of IOVA addresses after repeated
         * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap
         * doesn't reclaim addresses for fixed mappings. */
-        error_reportf_err(local_err, "nvme_register_buf failed: ");
+        error_report("nvme_register_buf failed: %s", strerror(-ret));
    }
 }

--- a/block/preallocate.c
+++ b/block/preallocate.c
@ -227,15 +227,15 @@ static void preallocate_reopen_abort(BDRVReopenState *state)
 }

 static coroutine_fn int preallocate_co_preadv_part(
-        BlockDriverState *bs, int64_t offset, int64_t bytes,
-        QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags)
+        BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+        QEMUIOVector *qiov, size_t qiov_offset, int flags)
 {
    return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
                               flags);
 }

 static int coroutine_fn preallocate_co_pdiscard(BlockDriverState *bs,
-                                               int64_t offset, int64_t bytes)
+                                               int64_t offset, int bytes)
 {
    return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
@ -337,7 +337,7 @@ static bool coroutine_fn handle_write(BlockDriverState *bs, int64_t offset,
 }

 static int coroutine_fn preallocate_co_pwrite_zeroes(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+        int64_t offset, int bytes, BdrvRequestFlags flags)
 {
    bool want_merge_zero =
        !(flags & ~(BDRV_REQ_ZERO_WRITE | BDRV_REQ_NO_FALLBACK));
@ -349,11 +349,11 @@ static int coroutine_fn preallocate_co_pwrite_zeroes(BlockDriverState *bs,
 }

 static coroutine_fn int preallocate_co_pwritev_part(BlockDriverState *bs,
-                                                    int64_t offset,
-                                                    int64_t bytes,
+                                                    uint64_t offset,
+                                                    uint64_t bytes,
                                                    QEMUIOVector *qiov,
                                                    size_t qiov_offset,
-                                                    BdrvRequestFlags flags)
+                                                    int flags)
 {
    handle_write(bs, offset, bytes, false);

--- a/block/qcow.c
+++ b/block/qcow.c
@ -617,9 +617,9 @@ static void qcow_refresh_limits(BlockDriverState *bs, Error **errp)
    bs->bl.request_alignment = BDRV_SECTOR_SIZE;
 }

-static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, int64_t offset,
-                                       int64_t bytes, QEMUIOVector *qiov,
-                                       BdrvRequestFlags flags)
+static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset,
+                                       uint64_t bytes, QEMUIOVector *qiov,
+                                       int flags)
 {
    BDRVQcowState *s = bs->opaque;
    int offset_in_cluster;
@ -714,9 +714,9 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, int64_t offset,
    return ret;
 }

-static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, int64_t offset,
-                                        int64_t bytes, QEMUIOVector *qiov,
-                                        BdrvRequestFlags flags)
+static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, uint64_t offset,
+                                        uint64_t bytes, QEMUIOVector *qiov,
+                                        int flags)
 {
    BDRVQcowState *s = bs->opaque;
    int offset_in_cluster;
@ -1047,8 +1047,8 @@ static int qcow_make_empty(BlockDriverState *bs)
 /* XXX: put compressed sectors first, then all the cluster aligned
   tables to avoid losing bytes in alignment */
 static coroutine_fn int
-qcow_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
-                           QEMUIOVector *qiov)
+qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
+                           uint64_t bytes, QEMUIOVector *qiov)
 {
    BDRVQcowState *s = bs->opaque;
    z_stream strm;
--- a/Show More
+++ b/Show More
 @ -1 +1 @@
 .2.50
 .1.1