From 87bf1fe5cbffefe6b7ee13a7015ae285250ad2db Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 18 Nov 2021 15:46:14 -0500 Subject: [PATCH 1/7] python/machine: add @sock_dir property Analogous to temp_dir and log_dir, add a sock_dir property that defaults to @temp_dir -- instead of base_temp_dir -- when the user hasn't overridden the sock dir value in the initializer. This gives us a much more unique directory to put sockfiles in by default. Signed-off-by: John Snow Reviewed-by: Willian Rampazzo Message-id: 20211118204620.1897674-2-jsnow@redhat.com Signed-off-by: John Snow --- python/qemu/machine/machine.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py index a487c39745..b1dd77b538 100644 --- a/python/qemu/machine/machine.py +++ b/python/qemu/machine/machine.py @@ -134,8 +134,9 @@ class QEMUMachine: self._qmp_timer = qmp_timer self._name = name or "qemu-%d" % os.getpid() + self._temp_dir: Optional[str] = None self._base_temp_dir = base_temp_dir - self._sock_dir = sock_dir or self._base_temp_dir + self._sock_dir = sock_dir self._log_dir = log_dir if monitor_address is not None: @@ -143,7 +144,7 @@ class QEMUMachine: self._remove_monitor_sockfile = False else: self._monitor_address = os.path.join( - self._sock_dir, f"{self._name}-monitor.sock" + self.sock_dir, f"{self._name}-monitor.sock" ) self._remove_monitor_sockfile = True @@ -163,14 +164,13 @@ class QEMUMachine: self._qmp_set = True # Enable QMP monitor by default. self._qmp_connection: Optional[QEMUMonitorProtocol] = None self._qemu_full_args: Tuple[str, ...] = () - self._temp_dir: Optional[str] = None self._launched = False self._machine: Optional[str] = None self._console_index = 0 self._console_set = False self._console_device_type: Optional[str] = None self._console_address = os.path.join( - self._sock_dir, f"{self._name}-console.sock" + self.sock_dir, f"{self._name}-console.sock" ) self._console_socket: Optional[socket.socket] = None self._remove_files: List[str] = [] @@ -816,6 +816,15 @@ class QEMUMachine: dir=self._base_temp_dir) return self._temp_dir + @property + def sock_dir(self) -> str: + """ + Returns the directory used for sockfiles by this machine. + """ + if self._sock_dir: + return self._sock_dir + return self.temp_dir + @property def log_dir(self) -> str: """ From 6eeb3de7e1aff91ce6e092a39f85946d12664385 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 18 Nov 2021 15:46:15 -0500 Subject: [PATCH 2/7] python/machine: remove _remove_monitor_sockfile property It doesn't matter if it was the user or the class itself that specified where the sockfile should be created; the fact is that if we are using a sockfile here, we created it and we can clean it up. Signed-off-by: John Snow Reviewed-by: Willian Rampazzo Message-id: 20211118204620.1897674-3-jsnow@redhat.com Signed-off-by: John Snow --- python/qemu/machine/machine.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py index b1dd77b538..ea9e07805d 100644 --- a/python/qemu/machine/machine.py +++ b/python/qemu/machine/machine.py @@ -141,12 +141,10 @@ class QEMUMachine: if monitor_address is not None: self._monitor_address = monitor_address - self._remove_monitor_sockfile = False else: self._monitor_address = os.path.join( self.sock_dir, f"{self._name}-monitor.sock" ) - self._remove_monitor_sockfile = True self._console_log_path = console_log if self._console_log_path: @@ -315,8 +313,7 @@ class QEMUMachine: self._remove_files.append(self._console_address) if self._qmp_set: - if self._remove_monitor_sockfile: - assert isinstance(self._monitor_address, str) + if isinstance(self._monitor_address, str): self._remove_files.append(self._monitor_address) self._qmp_connection = QEMUMonitorProtocol( self._monitor_address, From 72b17fe715056c96ea73f187ab46721788b3a782 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 18 Nov 2021 15:46:16 -0500 Subject: [PATCH 3/7] python/machine: add instance disambiguator to default nickname MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If you create two instances of QEMUMachine(), they'll both create the same nickname by default -- which is not that helpful. Luckily, they'll both create unique temporary directories ... but due to user configuration, they may share logging and sockfile directories, meaning two instances can collide. The Python logging will also be quite confusing, with no differentiation between the two instances. Add an instance disambiguator (The memory address of the instance) to the default nickname to foolproof this in all cases. Signed-off-by: John Snow Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Willian Rampazzo Message-id: 20211118204620.1897674-4-jsnow@redhat.com Signed-off-by: John Snow --- python/qemu/machine/machine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py index ea9e07805d..ad529fd92a 100644 --- a/python/qemu/machine/machine.py +++ b/python/qemu/machine/machine.py @@ -133,7 +133,7 @@ class QEMUMachine: self._wrapper = wrapper self._qmp_timer = qmp_timer - self._name = name or "qemu-%d" % os.getpid() + self._name = name or f"qemu-{os.getpid()}-{id(self):02x}" self._temp_dir: Optional[str] = None self._base_temp_dir = base_temp_dir self._sock_dir = sock_dir From b1ca99199320fcc010f407b84ac00d96e7e4baa1 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 18 Nov 2021 15:46:17 -0500 Subject: [PATCH 4/7] python/machine: move more variable initializations to _pre_launch No need to clear them only to set them later. Signed-off-by: John Snow Reviewed-by: Willian Rampazzo Message-id: 20211118204620.1897674-5-jsnow@redhat.com Signed-off-by: John Snow --- python/qemu/machine/machine.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py index ad529fd92a..f92e73de40 100644 --- a/python/qemu/machine/machine.py +++ b/python/qemu/machine/machine.py @@ -327,6 +327,14 @@ class QEMUMachine: self._qemu_log_path = os.path.join(self.log_dir, self._name + ".log") self._qemu_log_file = open(self._qemu_log_path, 'wb') + self._iolog = None + self._qemu_full_args = tuple(chain( + self._wrapper, + [self._binary], + self._base_args, + self._args + )) + def _post_launch(self) -> None: if self._qmp_connection: self._qmp.accept(self._qmp_timer) @@ -390,8 +398,6 @@ class QEMUMachine: if self._launched: raise QEMUMachineError('VM already launched') - self._iolog = None - self._qemu_full_args = () try: self._launch() self._launched = True @@ -410,12 +416,6 @@ class QEMUMachine: Launch the VM and establish a QMP connection """ self._pre_launch() - self._qemu_full_args = tuple( - chain(self._wrapper, - [self._binary], - self._base_args, - self._args) - ) LOG.debug('VM launch command: %r', ' '.join(self._qemu_full_args)) # Cleaning up of this subprocess is guaranteed by _do_shutdown. From 1611e6cf4e7163f6102b37010a8b7e7120f468b5 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 18 Nov 2021 15:46:18 -0500 Subject: [PATCH 5/7] python/machine: handle "fast" QEMU terminations In the case that the QEMU process actually launches -- but then dies so quickly that we can't establish a QMP connection to it -- QEMUMachine currently calls _post_shutdown() assuming that it never launched the VM process. This isn't true, though: it "merely" may have failed to establish a QMP connection and the process is in the middle of its own exit path. If we don't wait for the subprocess, the caller may get a bogus `None` return for .exitcode(). This behavior was observed from device-crash-test; after the switch to Async QMP, the timings were changed such that it was now seemingly possible to witness the failure of "vm.launch()" *prior* to the exitcode becoming available. The semantic of the `_launched` property is changed in this patch. Instead of representing the condition "launch() executed successfully", it will now represent "has forked a child process successfully". This way, wait() when called in the exit path won't become a no-op. Signed-off-by: John Snow Reviewed-by: Willian Rampazzo Message-id: 20211118204620.1897674-6-jsnow@redhat.com Signed-off-by: John Snow --- python/qemu/machine/machine.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py index f92e73de40..67ab06ca2b 100644 --- a/python/qemu/machine/machine.py +++ b/python/qemu/machine/machine.py @@ -349,9 +349,6 @@ class QEMUMachine: Called to cleanup the VM instance after the process has exited. May also be called after a failed launch. """ - # Comprehensive reset for the failed launch case: - self._early_cleanup() - try: self._close_qmp_connection() except Exception as err: # pylint: disable=broad-except @@ -400,9 +397,16 @@ class QEMUMachine: try: self._launch() - self._launched = True except: - self._post_shutdown() + # We may have launched the process but it may + # have exited before we could connect via QMP. + # Assume the VM didn't launch or is exiting. + # If we don't wait for the process, exitcode() may still be + # 'None' by the time control is ceded back to the caller. + if self._launched: + self.wait() + else: + self._post_shutdown() LOG.debug('Error launching VM') if self._qemu_full_args: @@ -426,6 +430,7 @@ class QEMUMachine: stderr=subprocess.STDOUT, shell=False, close_fds=False) + self._launched = True self._post_launch() def _close_qmp_connection(self) -> None: @@ -457,8 +462,8 @@ class QEMUMachine: """ Perform any cleanup that needs to happen before the VM exits. - May be invoked by both soft and hard shutdown in failover scenarios. - Called additionally by _post_shutdown for comprehensive cleanup. + This method may be called twice upon shutdown, once each by soft + and hard shutdown in failover scenarios. """ # If we keep the console socket open, we may deadlock waiting # for QEMU to exit, while QEMU is waiting for the socket to From 206439cd8937a3dc556537074d5d37e5d74eb0d0 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 18 Nov 2021 15:46:19 -0500 Subject: [PATCH 6/7] scripts/device-crash-test: Use a QMP timeout Despite all the previous fixes, it's still possible for device-crash-test to wedge itself in the case that QEMU terminates *so quickly* that it doesn't even begin a connection attempt to our QMP client. Python will just joyfully wait ad infinitum for a connection that will now never arrive. The real fix is to use asyncio to simultaneously poll both the health of the launched process AND the connection attempt. That's quite a bit more invasive than just setting a connection timeout, though. Do the very simplest thing for now. Signed-off-by: John Snow Message-id: 20211118204620.1897674-7-jsnow@redhat.com Signed-off-by: John Snow --- scripts/device-crash-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/device-crash-test b/scripts/device-crash-test index 1c73dac93e..7fbd99158b 100755 --- a/scripts/device-crash-test +++ b/scripts/device-crash-test @@ -353,7 +353,7 @@ def checkOneCase(args, testcase): '-device', qemuOptsEscape(device)] cmdline = ' '.join([binary] + args) dbg("will launch QEMU: %s", cmdline) - vm = QEMUMachine(binary=binary, args=args) + vm = QEMUMachine(binary=binary, args=args, qmp_timer=15) exc = None exc_traceback = None From a57cb3e23d5ac918a69d0aab918470ff0b429ff9 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 18 Nov 2021 15:46:20 -0500 Subject: [PATCH 7/7] python/aqmp: fix send_fd_scm for python 3.6.x 3.6 doesn't play keepaway with the socket object, so we don't need to go fishing for it on this version. In fact, so long as 'sendmsg' is still available, it's probably preferable to just use that method and only go fishing for forbidden details when we absolutely have to. Reported-by: Thomas Huth Signed-off-by: John Snow Reviewed-by: Willian Rampazzo Message-id: 20211118204620.1897674-8-jsnow@redhat.com Signed-off-by: John Snow --- python/qemu/aqmp/qmp_client.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/qemu/aqmp/qmp_client.py b/python/qemu/aqmp/qmp_client.py index f987da02eb..8105e29fa8 100644 --- a/python/qemu/aqmp/qmp_client.py +++ b/python/qemu/aqmp/qmp_client.py @@ -639,9 +639,12 @@ class QMPClient(AsyncProtocol[Message], Events): if sock.family != socket.AF_UNIX: raise AQMPError("Sending file descriptors requires a UNIX socket.") - # Void the warranty sticker. - # Access to sendmsg in asyncio is scheduled for removal in Python 3.11. - sock = sock._sock # pylint: disable=protected-access + if not hasattr(sock, 'sendmsg'): + # We need to void the warranty sticker. + # Access to sendmsg is scheduled for removal in Python 3.11. + # Find the real backing socket to use it anyway. + sock = sock._sock # pylint: disable=protected-access + sock.sendmsg( [b' '], [(socket.SOL_SOCKET, socket.SCM_RIGHTS, struct.pack('@i', fd))]