From e0bd0cd43e4105dcb4e7f1849879170ae3d9da78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Tue, 11 May 2021 10:23:52 +0100 Subject: [PATCH 01/13] docs: add table of contents to QAPI references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The QAPI reference docs for the guest agent, storage daemon and QMP are all rather long and hard to navigate unless you already know the name of the command and can do full text search for it. A table of contents in each doc will help people locate stuff much more easily. Reviewed-by: Connor Kuehl Signed-off-by: Daniel P. Berrangé --- docs/interop/qemu-ga-ref.rst | 3 +++ docs/interop/qemu-qmp-ref.rst | 3 +++ docs/interop/qemu-storage-daemon-qmp-ref.rst | 3 +++ 3 files changed, 9 insertions(+) diff --git a/docs/interop/qemu-ga-ref.rst b/docs/interop/qemu-ga-ref.rst index 3f1c4f908f..db1e946124 100644 --- a/docs/interop/qemu-ga-ref.rst +++ b/docs/interop/qemu-ga-ref.rst @@ -10,4 +10,7 @@ QEMU Guest Agent Protocol Reference TODO: display the QEMU version, both here and in our Sphinx manuals more generally. +.. contents:: + :depth: 3 + .. qapi-doc:: qga/qapi-schema.json diff --git a/docs/interop/qemu-qmp-ref.rst b/docs/interop/qemu-qmp-ref.rst index c8abaaf8e3..b5bebf6b9a 100644 --- a/docs/interop/qemu-qmp-ref.rst +++ b/docs/interop/qemu-qmp-ref.rst @@ -10,4 +10,7 @@ QEMU QMP Reference Manual TODO: display the QEMU version, both here and in our Sphinx manuals more generally. +.. contents:: + :depth: 3 + .. qapi-doc:: qapi/qapi-schema.json diff --git a/docs/interop/qemu-storage-daemon-qmp-ref.rst b/docs/interop/qemu-storage-daemon-qmp-ref.rst index caf9dad23a..d0ebb42ebd 100644 --- a/docs/interop/qemu-storage-daemon-qmp-ref.rst +++ b/docs/interop/qemu-storage-daemon-qmp-ref.rst @@ -10,4 +10,7 @@ QEMU Storage Daemon QMP Reference Manual TODO: display the QEMU version, both here and in our Sphinx manuals more generally. +.. contents:: + :depth: 3 + .. qapi-doc:: storage-daemon/qapi/qapi-schema.json From 491024a5b4efcf79ef46ddfd5c02957102d60175 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Tue, 23 Feb 2021 15:35:45 +0000 Subject: [PATCH 02/13] docs: document how to pass secret data to QEMU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Marc-André Lureau Signed-off-by: Daniel P. Berrangé --- docs/system/index.rst | 1 + docs/system/secrets.rst | 162 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 docs/system/secrets.rst diff --git a/docs/system/index.rst b/docs/system/index.rst index b05af716a9..6aa2f8c05c 100644 --- a/docs/system/index.rst +++ b/docs/system/index.rst @@ -30,6 +30,7 @@ Contents: guest-loader vnc-security tls + secrets gdb managed-startup cpu-hotplug diff --git a/docs/system/secrets.rst b/docs/system/secrets.rst new file mode 100644 index 0000000000..4a177369b6 --- /dev/null +++ b/docs/system/secrets.rst @@ -0,0 +1,162 @@ +.. _secret data: + +Providing secret data to QEMU +----------------------------- + +There are a variety of objects in QEMU which require secret data to be provided +by the administrator or management application. For example, network block +devices often require a password, LUKS block devices require a passphrase to +unlock key material, remote desktop services require an access password. +QEMU has a general purpose mechanism for providing secret data to QEMU in a +secure manner, using the ``secret`` object type. + +At startup this can be done using the ``-object secret,...`` command line +argument. At runtime this can be done using the ``object_add`` QMP / HMP +monitor commands. The examples that follow will illustrate use of ``-object`` +command lines, but they all apply equivalentely in QMP / HMP. When creating +a ``secret`` object it must be given a unique ID string. This ID is then +used to identify the object when configuring the thing which need the data. + + +INSECURE: Passing secrets as clear text inline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**The following should never be done in a production environment or on a +multi-user host. Command line arguments are usually visible in the process +listings and are often collected in log files by system monitoring agents +or bug reporting tools. QMP/HMP commands and their arguments are also often +logged and attached to bug reports. This all risks compromising secrets that +are passed inline.** + +For the convenience of people debugging / developing with QEMU, it is possible +to pass secret data inline on the command line. + +:: + + -object secret,id=secvnc0,data=87539319 + + +Again it is possible to provide the data in base64 encoded format, which is +particularly useful if the data contains binary characters that would clash +with argument parsing. + +:: + + -object secret,id=secvnc0,data=ODc1MzkzMTk=,format=base64 + + +**Note: base64 encoding does not provide any security benefit.** + +Passing secrets as clear text via a file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The simplest approach to providing data securely is to use a file to store +the secret: + +:: + + -object secret,id=secvnc0,file=vnc-password.txt + + +In this example the file ``vnc-password.txt`` contains the plain text secret +data. It is important to note that the contents of the file are treated as an +opaque blob. The entire raw file contents is used as the value, thus it is +important not to mistakenly add any trailing newline character in the file if +this newline is not intended to be part of the secret data. + +In some cases it might be more convenient to pass the secret data in base64 +format and have QEMU decode to get the raw bytes before use: + +:: + + -object secret,id=sec0,file=vnc-password.txt,format=base64 + + +The file should generally be given mode ``0600`` or ``0400`` permissions, and +have its user/group ownership set to the same account that the QEMU process +will be launched under. If using mandatory access control such as SELinux, then +the file should be labelled to only grant access to the specific QEMU process +that needs access. This will prevent other processes/users from compromising the +secret data. + + +Passing secrets as cipher text inline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To address the insecurity of passing secrets inline as clear text, it is +possible to configure a second secret as an AES key to use for decrypting +the data. + +The secret used as the AES key must always be configured using the file based +storage mechanism: + +:: + + -object secret,id=secmaster,file=masterkey.data,format=base64 + + +In this case the ``masterkey.data`` file would be initialized with 32 +cryptographically secure random bytes, which are then base64 encoded. +The contents of this file will by used as an AES-256 key to encrypt the +real secret that can now be safely passed to QEMU inline as cipher text + +:: + + -object secret,id=secvnc0,keyid=secmaster,data=BASE64-CIPHERTEXT,iv=BASE64-IV,format=base64 + + +In this example ``BASE64-CIPHERTEXT`` is the result of AES-256-CBC encrypting +the secret with ``masterkey.data`` and then base64 encoding the ciphertext. +The ``BASE64-IV`` data is 16 random bytes which have been base64 encrypted. +These bytes are used as the initialization vector for the AES-256-CBC value. + +A single master key can be used to encrypt all subsequent secrets, **but it is +critical that a different initialization vector is used for every secret**. + +Passing secrets via the Linux keyring +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The earlier mechanisms described are platform agnostic. If using QEMU on a Linux +host, it is further possible to pass secrets to QEMU using the Linux keyring: + +:: + + -object secret_keyring,id=secvnc0,serial=1729 + + +This instructs QEMU to load data from the Linux keyring secret identified by +the serial number ``1729``. It is possible to combine use of the keyring with +other features mentioned earlier such as base64 encoding: + +:: + + -object secret_keyring,id=secvnc0,serial=1729,format=base64 + + +and also encryption with a master key: + +:: + + -object secret_keyring,id=secvnc0,keyid=secmaster,serial=1729,iv=BASE64-IV + + +Best practice +~~~~~~~~~~~~~ + +It is recommended for production deployments to use a master key secret, and +then pass all subsequent inline secrets encrypted with the master key. + +Each QEMU instance must have a distinct master key, and that must be generated +from a cryptographically secure random data source. The master key should be +deleted immediately upon QEMU shutdown. If passing the master key as a file, +the key file must have access control rules applied that restrict access to +just the one QEMU process that is intended to use it. Alternatively the Linux +keyring can be used to pass the master key to QEMU. + +The secrets for individual QEMU device backends must all then be encrypted +with this master key. + +This procedure helps ensure that the individual secrets for QEMU backends will +not be compromised, even if ``-object`` CLI args or ``object_add`` monitor +commands are collected in log files and attached to public bug support tickets. +The only item that needs strongly protecting is the master key file. From 1c45af36e77ca315b33f237786f8a9fda512a8d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Fri, 14 May 2021 18:20:30 +0100 Subject: [PATCH 03/13] docs: document usage of the authorization framework MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The authorization framework provides a way to control access to network services after a client has been authenticated. This documents how to actually use it. Reviewed-by: Marc-André Lureau Signed-off-by: Daniel P. Berrangé --- docs/system/authz.rst | 263 ++++++++++++++++++++++++++++++++++++++++++ docs/system/index.rst | 1 + 2 files changed, 264 insertions(+) create mode 100644 docs/system/authz.rst diff --git a/docs/system/authz.rst b/docs/system/authz.rst new file mode 100644 index 0000000000..942af39602 --- /dev/null +++ b/docs/system/authz.rst @@ -0,0 +1,263 @@ +.. _client authorization: + +Client authorization +-------------------- + +When configuring a QEMU network backend with either TLS certificates or SASL +authentication, access will be granted if the client successfully proves +their identity. If the authorization identity database is scoped to the QEMU +client this may be sufficient. It is common, however, for the identity database +to be much broader and thus authentication alone does not enable sufficient +access control. In this case QEMU provides a flexible system for enforcing +finer grained authorization on clients post-authentication. + +Identity providers +~~~~~~~~~~~~~~~~~~ + +At the time of writing there are two authentication frameworks used by QEMU +that emit an identity upon completion. + + * TLS x509 certificate distinguished name. + + When configuring the QEMU backend as a network server with TLS, there + are a choice of credentials to use. The most common scenario is to utilize + x509 certificates. The simplest configuration only involves issuing + certificates to the servers, allowing the client to avoid a MITM attack + against their intended server. + + It is possible, however, to enable mutual verification by requiring that + the client provide a certificate to the server to prove its own identity. + This is done by setting the property ``verify-peer=yes`` on the + ``tls-creds-x509`` object, which is in fact the default. + + When peer verification is enabled, client will need to be issued with a + certificate by the same certificate authority as the server. If this is + still not sufficiently strong access control the Distinguished Name of + the certificate can be used as an identity in the QEMU authorization + framework. + + * SASL username. + + When configuring the QEMU backend as a network server with SASL, upon + completion of the SASL authentication mechanism, a username will be + provided. The format of this username will vary depending on the choice + of mechanism configured for SASL. It might be a simple UNIX style user + ``joebloggs``, while if using Kerberos/GSSAPI it can have a realm + attached ``joebloggs@QEMU.ORG``. Whatever format the username is presented + in, it can be used with the QEMU authorization framework. + +Authorization drivers +~~~~~~~~~~~~~~~~~~~~~ + +The QEMU authorization framework is a general purpose design with choice of +user customizable drivers. These are provided as objects that can be +created at startup using the ``-object`` argument, or at runtime using the +``object_add`` monitor command. + +Simple +^^^^^^ + +This authorization driver provides a simple mechanism for granting access +based on an exact match against a single identity. This is useful when it is +known that only a single client is to be allowed access. + +A possible use case would be when configuring QEMU for an incoming live +migration. It is known exactly which source QEMU the migration is expected +to arrive from. The x509 certificate associated with this source QEMU would +thus be used as the identity to match against. Alternatively if the virtual +machine is dedicated to a specific tenant, then the VNC server would be +configured with SASL and the username of only that tenant listed. + +To create an instance of this driver via QMP: + +:: + + { + "execute": "object-add", + "arguments": { + "qom-type": "authz-simple", + "id": "authz0", + "props": { + "identity": "fred" + } + } + } + + +Or via the command line + +:: + + -object authz-simple,id=authz0,identity=fred + + +List +^^^^ + +In some network backends it will be desirable to grant access to a range of +clients. This authorization driver provides a list mechanism for granting +access by matching identities against a list of permitted one. Each match +rule has an associated policy and a catch all policy applies if no rule +matches. The match can either be done as an exact string comparison, or can +use the shell-like glob syntax, which allows for use of wildcards. + +To create an instance of this class via QMP: + +:: + + { + "execute": "object-add", + "arguments": { + "qom-type": "authz-list", + "id": "authz0", + "props": { + "rules": [ + { "match": "fred", "policy": "allow", "format": "exact" }, + { "match": "bob", "policy": "allow", "format": "exact" }, + { "match": "danb", "policy": "deny", "format": "exact" }, + { "match": "dan*", "policy": "allow", "format": "glob" } + ], + "policy": "deny" + } + } + } + + +Due to the way this driver requires setting nested properties, creating +it on the command line will require use of the JSON syntax for ``-object``. +In most cases, however, the next driver will be more suitable. + +List file +^^^^^^^^^ + +This is a variant on the previous driver that allows for a more dynamic +access control policy by storing the match rules in a standalone file +that can be reloaded automatically upon change. + +To create an instance of this class via QMP: + +:: + + { + "execute": "object-add", + "arguments": { + "qom-type": "authz-list-file", + "id": "authz0", + "props": { + "filename": "/etc/qemu/myvm-vnc.acl", + "refresh": true + } + } + } + + +If ``refresh`` is ``yes``, inotify is used to monitor for changes +to the file and auto-reload the rules. + +The ``myvm-vnc.acl`` file should contain the match rules in a format that +closely matches the previous driver: + +:: + + { + "rules": [ + { "match": "fred", "policy": "allow", "format": "exact" }, + { "match": "bob", "policy": "allow", "format": "exact" }, + { "match": "danb", "policy": "deny", "format": "exact" }, + { "match": "dan*", "policy": "allow", "format": "glob" } + ], + "policy": "deny" + } + + +The object can be created on the command line using + +:: + + -object authz-list-file,id=authz0,\ + filename=/etc/qemu/myvm-vnc.acl,refresh=on + + +PAM +^^^ + +In some scenarios it might be desirable to integrate with authorization +mechanisms that are implemented outside of QEMU. In order to allow maximum +flexibility, QEMU provides a driver that uses the ``PAM`` framework. + +To create an instance of this class via QMP: + +:: + + { + "execute": "object-add", + "arguments": { + "qom-type": "authz-pam", + "id": "authz0", + "parameters": { + "service": "qemu-vnc-tls" + } + } + } + + +The driver only uses the PAM "account" verification +subsystem. The above config would require a config +file /etc/pam.d/qemu-vnc-tls. For a simple file +lookup it would contain + +:: + + account requisite pam_listfile.so item=user sense=allow \ + file=/etc/qemu/vnc.allow + + +The external file would then contain a list of usernames. +If x509 cert was being used as the username, a suitable +entry would match the distinguished name: + +:: + + CN=laptop.berrange.com,O=Berrange Home,L=London,ST=London,C=GB + + +On the command line it can be created using + +:: + + -object authz-pam,id=authz0,service=qemu-vnc-tls + + +There are a variety of PAM plugins that can be used which are not illustrated +here, and it is possible to implement brand new plugins using the PAM API. + + +Connecting backends +~~~~~~~~~~~~~~~~~~~ + +The authorization driver is created using the ``-object`` argument and then +needs to be associated with a network service. The authorization driver object +will be given a unique ID that needs to be referenced. + +The property to set in the network service will vary depending on the type of +identity to verify. By convention, any network server backend that uses TLS +will provide ``tls-authz`` property, while any server using SASL will provide +a ``sasl-authz`` property. + +Thus an example using SASL and authorization for the VNC server would look +like: + +:: + + $QEMU --object authz-simple,id=authz0,identity=fred \ + --vnc 0.0.0.0:1,sasl,sasl-authz=authz0 + +While to validate both the x509 certificate and SASL username: + +:: + + echo "CN=laptop.qemu.org,O=QEMU Project,L=London,ST=London,C=GB" >> tls.acl + $QEMU --object authz-simple,id=authz0,identity=fred \ + --object authz-list-file,id=authz1,filename=tls.acl \ + --object tls-creds-x509,id=tls0,dir=/etc/qemu/tls,verify-peer=yes \ + --vnc 0.0.0.0:1,sasl,sasl-authz=auth0,tls-creds=tls0,tls-authz=authz1 diff --git a/docs/system/index.rst b/docs/system/index.rst index 6aa2f8c05c..6092eb2d91 100644 --- a/docs/system/index.rst +++ b/docs/system/index.rst @@ -31,6 +31,7 @@ Contents: vnc-security tls secrets + authz gdb managed-startup cpu-hotplug From e2bf32dfabbfe6aabde4a0400b25b768b4481785 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 4 Mar 2021 18:14:26 +0000 Subject: [PATCH 04/13] docs: recommend SCRAM-SHA-256 SASL mech instead of SHA-1 variant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SHA-256 variant better meats modern security expectations. Also warn that the password file is storing entries in clear text. Reviewed-by: Marc-André Lureau Signed-off-by: Daniel P. Berrangé --- docs/system/vnc-security.rst | 7 ++++--- qemu.sasl | 11 ++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/docs/system/vnc-security.rst b/docs/system/vnc-security.rst index 830f6acc73..4c1769eeb8 100644 --- a/docs/system/vnc-security.rst +++ b/docs/system/vnc-security.rst @@ -168,7 +168,7 @@ used is drastically reduced. In fact only the GSSAPI SASL mechanism provides an acceptable level of security by modern standards. Previous versions of QEMU referred to the DIGEST-MD5 mechanism, however, it has multiple serious flaws described in detail in RFC 6331 and thus should -never be used any more. The SCRAM-SHA-1 mechanism provides a simple +never be used any more. The SCRAM-SHA-256 mechanism provides a simple username/password auth facility similar to DIGEST-MD5, but does not support session encryption, so can only be used in combination with TLS. @@ -191,11 +191,12 @@ reasonable configuration is :: - mech_list: scram-sha-1 + mech_list: scram-sha-256 sasldb_path: /etc/qemu/passwd.db The ``saslpasswd2`` program can be used to populate the ``passwd.db`` -file with accounts. +file with accounts. Note that the ``passwd.db`` file stores passwords +in clear text. Other SASL configurations will be left as an exercise for the reader. Note that all mechanisms, except GSSAPI, should be combined with use of diff --git a/qemu.sasl b/qemu.sasl index fb8a92ba58..abdfc686be 100644 --- a/qemu.sasl +++ b/qemu.sasl @@ -19,15 +19,15 @@ mech_list: gssapi # If using TLS with VNC, or a UNIX socket only, it is possible to # enable plugins which don't provide session encryption. The -# 'scram-sha-1' plugin allows plain username/password authentication +# 'scram-sha-256' plugin allows plain username/password authentication # to be performed # -#mech_list: scram-sha-1 +#mech_list: scram-sha-256 # You can also list many mechanisms at once, and the VNC server will # negotiate which to use by considering the list enabled on the VNC # client. -#mech_list: scram-sha-1 gssapi +#mech_list: scram-sha-256 gssapi # Some older builds of MIT kerberos on Linux ignore this option & # instead need KRB5_KTNAME env var. @@ -38,7 +38,8 @@ mech_list: gssapi # mechanism this can be commented out. keytab: /etc/qemu/krb5.tab -# If using scram-sha-1 for username/passwds, then this is the file +# If using scram-sha-256 for username/passwds, then this is the file # containing the passwds. Use 'saslpasswd2 -a qemu [username]' -# to add entries, and 'sasldblistusers2 -f [sasldb_path]' to browse it +# to add entries, and 'sasldblistusers2 -f [sasldb_path]' to browse it. +# Note that this file stores passwords in clear text. #sasldb_path: /etc/qemu/passwd.db From 213de8a2fb12f6962ded0240c900f728a72f8217 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 4 Mar 2021 18:15:20 +0000 Subject: [PATCH 05/13] sasl: remove comment about obsolete kerberos versions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is not relevant to any OS distro that QEMU currently targets. Reviewed-by: Marc-André Lureau Signed-off-by: Daniel P. Berrangé --- qemu.sasl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/qemu.sasl b/qemu.sasl index abdfc686be..851acc7e8f 100644 --- a/qemu.sasl +++ b/qemu.sasl @@ -29,10 +29,6 @@ mech_list: gssapi # client. #mech_list: scram-sha-256 gssapi -# Some older builds of MIT kerberos on Linux ignore this option & -# instead need KRB5_KTNAME env var. -# For modern Linux, and other OS, this should be sufficient -# # This file needs to be populated with the service principal that # was created on the Kerberos v5 server. If switching to a non-gssapi # mechanism this can be commented out. From 626ff6515d41854dc8a880849ae2744c20a70ba7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 15 Apr 2021 14:33:51 +0100 Subject: [PATCH 06/13] migration: add trace point when vm_stop_force_state fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a critical failure scenario for migration that is hard to diagnose from existing probes. Most likely it is caused by an error from bdrv_flush(), but we're not logging the errno anywhere, hence this new probe. Reviewed-by: Connor Kuehl Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Daniel P. Berrangé --- migration/migration.c | 1 + migration/trace-events | 1 + 2 files changed, 2 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index 4828997f63..4228635d18 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -3115,6 +3115,7 @@ static void migration_completion(MigrationState *s) if (!ret) { bool inactivate = !migrate_colo_enabled(); ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + trace_migration_completion_vm_stop(ret); if (ret >= 0) { ret = migration_maybe_pause(s, ¤t_active_state, MIGRATION_STATUS_DEVICE); diff --git a/migration/trace-events b/migration/trace-events index 860c4f4025..a1c0f034ab 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -149,6 +149,7 @@ migrate_pending(uint64_t size, uint64_t max, uint64_t pre, uint64_t compat, uint migrate_send_rp_message(int msg_type, uint16_t len) "%d: len %d" migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi64 migration_completion_file_err(void) "" +migration_completion_vm_stop(int ret) "ret %d" migration_completion_postcopy_end(void) "" migration_completion_postcopy_end_after_complete(void) "" migration_rate_limit_pre(int ms) "%d ms" From 8af3f5c6d66ac203000c2d8ebebd3b751f575008 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 15 Apr 2021 14:33:51 +0100 Subject: [PATCH 07/13] softmmu: add trace point when bdrv_flush_all fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VM stop process has to flush outstanding I/O and this is a critical failure scenario that is hard to diagnose. Add a probe point that records the flush return code. Reviewed-by: Connor Kuehl Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Daniel P. Berrangé --- softmmu/cpus.c | 7 ++++++- softmmu/trace-events | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/softmmu/cpus.c b/softmmu/cpus.c index a7ee431187..c3caaeb26e 100644 --- a/softmmu/cpus.c +++ b/softmmu/cpus.c @@ -44,6 +44,7 @@ #include "sysemu/whpx.h" #include "hw/boards.h" #include "hw/hw.h" +#include "trace.h" #ifdef CONFIG_LINUX @@ -266,6 +267,7 @@ static int do_vm_stop(RunState state, bool send_stop) bdrv_drain_all(); ret = bdrv_flush_all(); + trace_vm_stop_flush_all(ret); return ret; } @@ -704,12 +706,15 @@ int vm_stop_force_state(RunState state) if (runstate_is_running()) { return vm_stop(state); } else { + int ret; runstate_set(state); bdrv_drain_all(); /* Make sure to return an error if the flush in a previous vm_stop() * failed. */ - return bdrv_flush_all(); + ret = bdrv_flush_all(); + trace_vm_stop_flush_all(ret); + return ret; } } diff --git a/softmmu/trace-events b/softmmu/trace-events index 5262828b8d..d18ac41e4e 100644 --- a/softmmu/trace-events +++ b/softmmu/trace-events @@ -19,6 +19,9 @@ flatview_new(void *view, void *root) "%p (root %p)" flatview_destroy(void *view, void *root) "%p (root %p)" flatview_destroy_rcu(void *view, void *root) "%p (root %p)" +# softmmu.c +vm_stop_flush_all(int ret) "ret %d" + # vl.c vm_state_notify(int running, int reason, const char *reason_str) "running %d reason %d (%s)" load_file(const char *name, const char *path) "name %s location %s" From c7ddc8821d88d958bb6d4ef1279ec3609b17ffda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 15 Apr 2021 14:28:16 +0100 Subject: [PATCH 08/13] block: preserve errno from fdatasync failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When fdatasync() fails on a file backend we set a flag that short-circuits any future attempts to call fdatasync(). The first failure returns the true errno, but the later short- circuited calls return a generic EIO. The latter is unhelpful because fdatasync() can return a variety of errnos, including EACCESS. Reviewed-by: Connor Kuehl Signed-off-by: Daniel P. Berrangé --- block/file-posix.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index f37dfc10b3..5ff78ecb34 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -160,7 +160,7 @@ typedef struct BDRVRawState { bool discard_zeroes:1; bool use_linux_aio:1; bool use_linux_io_uring:1; - bool page_cache_inconsistent:1; + int page_cache_inconsistent; /* errno from fdatasync failure */ bool has_fallocate; bool needs_alignment; bool drop_cache; @@ -1333,7 +1333,7 @@ static int handle_aiocb_flush(void *opaque) int ret; if (s->page_cache_inconsistent) { - return -EIO; + return -s->page_cache_inconsistent; } ret = qemu_fdatasync(aiocb->aio_fildes); @@ -1352,7 +1352,7 @@ static int handle_aiocb_flush(void *opaque) * Obviously, this doesn't affect O_DIRECT, which bypasses the page * cache. */ if ((s->open_flags & O_DIRECT) == 0) { - s->page_cache_inconsistent = true; + s->page_cache_inconsistent = errno; } return -errno; } From 60ff2ae2a21ddc11cc7284194a3013ff864ac03c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 15 Apr 2021 14:28:16 +0100 Subject: [PATCH 09/13] block: add trace point when fdatasync fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A flush failure is a critical failure scenario for some operations. For example, it will prevent migration from completing, as it will make vm_stop() report an error. Thus it is important to have a trace point present for debugging. Reviewed-by: Connor Kuehl Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Daniel P. Berrangé --- block/file-posix.c | 2 ++ block/trace-events | 1 + 2 files changed, 3 insertions(+) diff --git a/block/file-posix.c b/block/file-posix.c index 5ff78ecb34..4189b2bfa6 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1338,6 +1338,8 @@ static int handle_aiocb_flush(void *opaque) ret = qemu_fdatasync(aiocb->aio_fildes); if (ret == -1) { + trace_file_flush_fdatasync_failed(errno); + /* There is no clear definition of the semantics of a failing fsync(), * so we may have to assume the worst. The sad truth is that this * assumption is correct for Linux. Some pages are now probably marked diff --git a/block/trace-events b/block/trace-events index 574760ba9a..b3d2b1e62c 100644 --- a/block/trace-events +++ b/block/trace-events @@ -206,6 +206,7 @@ file_copy_file_range(void *bs, int src, int64_t src_off, int dst, int64_t dst_of file_FindEjectableOpticalMedia(const char *media) "Matching using %s" file_setup_cdrom(const char *partition) "Using %s as optical disc" file_hdev_is_sg(int type, int version) "SG device found: type=%d, version=%d" +file_flush_fdatasync_failed(int err) "errno %d" # ssh.c sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)" From 99be1ac366c20992242f6cd0b9458e3ef52a7a70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 15 Apr 2021 14:50:07 +0100 Subject: [PATCH 10/13] block: remove duplicate trace.h include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Connor Kuehl Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Daniel P. Berrangé --- block/file-posix.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index 4189b2bfa6..b3fbb9bd63 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -106,8 +106,6 @@ #include #endif -#include "trace.h" - /* OS X does not have O_DSYNC */ #ifndef O_DSYNC #ifdef O_SYNC From 85cd1cc6687e827f3e5e94ad2e13444b75d0c5fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Fri, 30 Apr 2021 12:59:06 +0100 Subject: [PATCH 11/13] migration: use GDateTime for formatting timestamp in snapshot names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GDateTime APIs provided by GLib avoid portability pitfalls, such as some platforms where 'struct timeval.tv_sec' field is still 'long' instead of 'time_t'. When combined with automatic cleanup, GDateTime often results in simpler code too. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Daniel P. Berrangé --- migration/savevm.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/migration/savevm.c b/migration/savevm.c index 52e2d72e4b..72848b946c 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2775,8 +2775,7 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, QEMUFile *f; int saved_vm_running; uint64_t vm_state_size; - qemu_timeval tv; - struct tm tm; + g_autoptr(GDateTime) now = g_date_time_new_now_local(); AioContext *aio_context; if (migration_is_blocked(errp)) { @@ -2836,9 +2835,8 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, memset(sn, 0, sizeof(*sn)); /* fill auxiliary fields */ - qemu_gettimeofday(&tv); - sn->date_sec = tv.tv_sec; - sn->date_nsec = tv.tv_usec * 1000; + sn->date_sec = g_date_time_to_unix(now); + sn->date_nsec = g_date_time_get_microsecond(now) * 1000; sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); if (replay_mode != REPLAY_MODE_NONE) { sn->icount = replay_get_current_icount(); @@ -2849,9 +2847,8 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, if (name) { pstrcpy(sn->name, sizeof(sn->name), name); } else { - /* cast below needed for OpenBSD where tv_sec is still 'long' */ - localtime_r((const time_t *)&tv.tv_sec, &tm); - strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm); + g_autofree char *autoname = g_date_time_format(now, "vm-%Y%m%d%H%M%S"); + pstrcpy(sn->name, sizeof(sn->name), autoname); } /* save the VM state */ From 39683553f9a66b735a003ad43bb4d1460cef4d64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Fri, 30 Apr 2021 12:59:06 +0100 Subject: [PATCH 12/13] block: use GDateTime for formatting timestamp when dumping snapshot info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GDateTime APIs provided by GLib avoid portability pitfalls, such as some platforms where 'struct timeval.tv_sec' field is still 'long' instead of 'time_t'. When combined with automatic cleanup, GDateTime often results in simpler code too. Reviewed-by: Max Reitz Signed-off-by: Daniel P. Berrangé --- block/qapi.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/block/qapi.c b/block/qapi.c index dc69341bfe..cf557e3aea 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -663,10 +663,8 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes, void bdrv_snapshot_dump(QEMUSnapshotInfo *sn) { - char date_buf[128], clock_buf[128]; + char clock_buf[128]; char icount_buf[128] = {0}; - struct tm tm; - time_t ti; int64_t secs; char *sizing = NULL; @@ -674,10 +672,9 @@ void bdrv_snapshot_dump(QEMUSnapshotInfo *sn) qemu_printf("%-10s%-17s%8s%20s%13s%11s", "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK", "ICOUNT"); } else { - ti = sn->date_sec; - localtime_r(&ti, &tm); - strftime(date_buf, sizeof(date_buf), - "%Y-%m-%d %H:%M:%S", &tm); + g_autoptr(GDateTime) date = g_date_time_new_from_unix_local(sn->date_sec); + g_autofree char *date_buf = g_date_time_format(date, "%Y-%m-%d %H:%M:%S"); + secs = sn->vm_clock_nsec / 1000000000; snprintf(clock_buf, sizeof(clock_buf), "%02d:%02d:%02d.%03d", From 970bc16f60937bcfd334f14c614bd4407c247961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Fri, 30 Apr 2021 12:59:06 +0100 Subject: [PATCH 13/13] usb/dev-mtp: use GDateTime for formatting timestamp for objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GDateTime APIs provided by GLib avoid portability pitfalls, such as some platforms where 'struct timeval.tv_sec' field is still 'long' instead of 'time_t'. When combined with automatic cleanup, GDateTime often results in simpler code too. Reviewed-by: Gerd Hoffmann Signed-off-by: Daniel P. Berrangé --- hw/usb/dev-mtp.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/hw/usb/dev-mtp.c b/hw/usb/dev-mtp.c index 2a895a73b0..c1d1694fd0 100644 --- a/hw/usb/dev-mtp.c +++ b/hw/usb/dev-mtp.c @@ -772,12 +772,9 @@ static void usb_mtp_add_str(MTPData *data, const char *str) static void usb_mtp_add_time(MTPData *data, time_t time) { - char buf[16]; - struct tm tm; - - gmtime_r(&time, &tm); - strftime(buf, sizeof(buf), "%Y%m%dT%H%M%S", &tm); - usb_mtp_add_str(data, buf); + g_autoptr(GDateTime) then = g_date_time_new_from_unix_utc(time); + g_autofree char *thenstr = g_date_time_format(then, "%Y%m%dT%H%M%S"); + usb_mtp_add_str(data, thenstr); } /* ----------------------------------------------------------------------- */