9pfs: introduce macOS host support and cleanup

* Add support for Darwin (a.k.a. macOS) hosts. * Code cleanup (move qemu_dirent_dup() from osdep -> 9p-util). * API doc cleanup (convert Doxygen -> kerneldoc format). -----BEGIN PGP SIGNATURE----- iQJLBAABCgA1FiEEltjREM96+AhPiFkBNMK1h2Wkc5UFAmIl6SUXHHFlbXVfb3Nz QGNydWRlYnl0ZS5jb20ACgkQNMK1h2Wkc5WIAg//fAigCqLTYGL1MzOILppRXfJa 3XBdCogLT8m0y+DFoCXgwQx4VU05xPj69633e6nmm4tuyhqMqiIjVQl8EZfH89wD vK3NlMJxevkK8soiOB91iyUD0LifrsdS7RwLF4XQmQ+tQbu6p+zLBjDBHYQScyUh Tms0YD5A+ubWb5s8fZ0NS+zi3GXUAKhnxtypHQjelsL492uXBOdVs+F3a/gCHzhq SyRJn0rdJ+MWZP4QYTpAeOC1YkYd4D52nuuLE4SY/lf3XdHIcUSVAKdWkEuJUAQC u7miCqmkXAjfiiu4/vIGeL2nSwApF6hNAckIoaH6jt3xZEeINmw612oO6uG5Ra3R AqAT1wNASf4e36Ee26pIwcbHVk0fTBjpmaTmlwb9ts1kSaCW5clopvQSkQuER9cz zqGZqD6uWSkfOFaGuLc6ai4/jkxeaM+7liALcXsm68/AnF/5zWPLsF73OrZDjA1H VXVg43+686HRBaHez1eLGmM4dJxlglgVEO35+rPeuztRtwL81v+RhBDcQWsjsxY1 FO5V9LddpfvXgPNlcFNd3+PQnNzy7t4Oz1GKbRZ/bM9hv5+XiObUq+GuO4FvvrLj tmICJ7RYmV91H7MK5bFG1HA1qkdJkXu1GvW6x/knjnSKo6RhEAudDQZhrdjVGjVD vfDxPB4jHm0tKbIbtuU= =Fikk -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/cschoenebeck/tags/pull-9p-20220307' into staging 9pfs: introduce macOS host support and cleanup * Add support for Darwin (a.k.a. macOS) hosts. * Code cleanup (move qemu_dirent_dup() from osdep -> 9p-util). * API doc cleanup (convert Doxygen -> kerneldoc format). # gpg: Signature made Mon 07 Mar 2022 11:14:45 GMT # gpg: using RSA key 96D8D110CF7AF8084F88590134C2B58765A47395 # gpg: issuer "qemu_oss@crudebyte.com" # gpg: Good signature from "Christian Schoenebeck <qemu_oss@crudebyte.com>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: ECAB 1A45 4014 1413 BA38 4926 30DB 47C3 A012 D5F4 # Subkey fingerprint: 96D8 D110 CF7A F808 4F88 5901 34C2 B587 65A4 7395 * remotes/cschoenebeck/tags/pull-9p-20220307: fsdev/p9array.h: convert Doxygen -> kerneldoc format 9pfs/coth.h: drop Doxygen format on v9fs_co_run_in_worker() 9pfs/9p-util.h: convert Doxygen -> kerneldoc format 9pfs/9p.c: convert Doxygen -> kerneldoc format 9pfs/codir.c: convert Doxygen -> kerneldoc format 9pfs/9p.h: convert Doxygen -> kerneldoc format 9pfs: drop Doxygen format from qemu_dirent_dup() API comment 9pfs: move qemu_dirent_dup() from osdep -> 9p-util 9p: darwin: meson: Allow VirtFS on Darwin 9p: darwin: Adjust assumption on virtio-9p-test 9p: darwin: Implement compatibility for mknodat 9p: darwin: Compatibility for f/l*xattr 9p: darwin: *xattr_nofollow implementations 9p: darwin: Move XATTR_SIZE_MAX->P9_XATTR_SIZE_MAX 9p: darwin: Ignore O_{NOATIME, DIRECT} 9p: darwin: Handle struct dirent differences 9p: darwin: Handle struct stat(fs) differences 9p: Rename 9p-util -> 9p-util-linux 9p: linux: Fix a couple Linux assumptions Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2022-03-08 09:06:57 +00:00 · 2022-03-08 09:06:57 +00:00 · f45cc81911
commit f45cc81911
parent 99c4a9e68e 35b6466459
19 changed files with 404 additions and 128 deletions
--- a/fsdev/file-op-9p.h
+++ b/fsdev/file-op-9p.h
@ -16,10 +16,17 @@

 #include <dirent.h>
 #include <utime.h>
-#include <sys/vfs.h>
 #include "qemu-fsdev-throttle.h"
 #include "p9array.h"

+#ifdef CONFIG_LINUX
+# include <sys/vfs.h>
+#endif
+#ifdef CONFIG_DARWIN
+# include <sys/param.h>
+# include <sys/mount.h>
+#endif
+
 #define SM_LOCAL_MODE_BITS    0600
 #define SM_LOCAL_DIR_MODE_BITS    0700

--- a/fsdev/meson.build
+++ b/fsdev/meson.build
@ -7,6 +7,7 @@ fsdev_ss.add(when: ['CONFIG_FSDEV_9P'], if_true: files(
  'qemu-fsdev.c',
 ), if_false: files('qemu-fsdev-dummy.c'))
 softmmu_ss.add_all(when: 'CONFIG_LINUX', if_true: fsdev_ss)
+softmmu_ss.add_all(when: 'CONFIG_DARWIN', if_true: fsdev_ss)

 if have_virtfs_proxy_helper
  executable('virtfs-proxy-helper',
--- a/fsdev/p9array.h
+++ b/fsdev/p9array.h
@ -81,11 +81,11 @@
 */

 /**
- * Declares an array type for the passed @a scalar_type.
+ * P9ARRAY_DECLARE_TYPE() - Declares an array type for the passed @scalar_type.
+ *
+ * @scalar_type: type of the individual array elements
 *
 * This is typically used from a shared header file.
- *
- * @param scalar_type - type of the individual array elements
 */
 #define P9ARRAY_DECLARE_TYPE(scalar_type) \
    typedef struct P9Array##scalar_type { \
@ -97,14 +97,14 @@
    void p9array_auto_free_##scalar_type(scalar_type **auto_var); \

 /**
- * Defines an array type for the passed @a scalar_type and appropriate
- * @a scalar_cleanup_func.
+ * P9ARRAY_DEFINE_TYPE() - Defines an array type for the passed @scalar_type
+ * and appropriate @scalar_cleanup_func.
+ *
+ * @scalar_type: type of the individual array elements
+ * @scalar_cleanup_func: appropriate function to free memory dynamically
+ *                       allocated by individual array elements before
 *
 * This is typically used from a C unit file.
- *
- * @param scalar_type - type of the individual array elements
- * @param scalar_cleanup_func - appropriate function to free memory dynamically
- *                              allocated by individual array elements before
 */
 #define P9ARRAY_DEFINE_TYPE(scalar_type, scalar_cleanup_func) \
    void p9array_new_##scalar_type(scalar_type **auto_var, size_t len) \
@ -132,23 +132,27 @@
    } \

 /**
+ * P9ARRAY_REF() - Declare a reference variable for an array.
+ *
+ * @scalar_type: type of the individual array elements
+ *
 * Used to declare a reference variable (unique pointer) for an array. After
 * leaving the scope of the reference variable, the associated array is
 * automatically freed.
- *
- * @param scalar_type - type of the individual array elements
 */
 #define P9ARRAY_REF(scalar_type) \
    __attribute((__cleanup__(p9array_auto_free_##scalar_type))) scalar_type*

 /**
- * Allocates a new array of passed @a scalar_type with @a len number of array
- * elements and assigns the created array to the reference variable
- * @a auto_var.
+ * P9ARRAY_NEW() - Allocate a new array.
 *
- * @param scalar_type - type of the individual array elements
- * @param auto_var - destination reference variable
- * @param len - amount of array elements to be allocated immediately
+ * @scalar_type: type of the individual array elements
+ * @auto_var: destination reference variable
+ * @len: amount of array elements to be allocated immediately
+ *
+ * Allocates a new array of passed @scalar_type with @len number of array
+ * elements and assigns the created array to the reference variable
+ * @auto_var.
 */
 #define P9ARRAY_NEW(scalar_type, auto_var, len) \
    QEMU_BUILD_BUG_MSG( \
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@ -32,10 +32,12 @@
 #include "qemu/error-report.h"
 #include "qemu/option.h"
 #include <libgen.h>
+#ifdef CONFIG_LINUX
 #include <linux/fs.h>
 #ifdef CONFIG_LINUX_MAGIC_H
 #include <linux/magic.h>
 #endif
+#endif
 #include <sys/ioctl.h>

 #ifndef XFS_SUPER_MAGIC
@ -560,6 +562,15 @@ again:
    if (!entry) {
        return NULL;
    }
+#ifdef CONFIG_DARWIN
+    int off;
+    off = telldir(fs->dir.stream);
+    /* If telldir fails, fail the entire readdir call */
+    if (off < 0) {
+        return NULL;
+    }
+    entry->d_seekoff = off;
+#endif

    if (ctx->export_flags & V9FS_SM_MAPPED) {
        entry->d_type = DT_UNKNOWN;
@ -671,7 +682,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path,

    if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
        fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        err = mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
+        err = qemu_mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
        if (err == -1) {
            goto out;
        }
@ -686,7 +697,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path,
        }
    } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
               fs_ctx->export_flags & V9FS_SM_NONE) {
-        err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
+        err = qemu_mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
        if (err == -1) {
            goto out;
        }
@ -779,16 +790,20 @@ static int local_fstat(FsContext *fs_ctx, int fid_type,
        mode_t tmp_mode;
        dev_t tmp_dev;

-        if (fgetxattr(fd, "user.virtfs.uid", &tmp_uid, sizeof(uid_t)) > 0) {
+        if (qemu_fgetxattr(fd, "user.virtfs.uid",
+                           &tmp_uid, sizeof(uid_t)) > 0) {
            stbuf->st_uid = le32_to_cpu(tmp_uid);
        }
-        if (fgetxattr(fd, "user.virtfs.gid", &tmp_gid, sizeof(gid_t)) > 0) {
+        if (qemu_fgetxattr(fd, "user.virtfs.gid",
+                           &tmp_gid, sizeof(gid_t)) > 0) {
            stbuf->st_gid = le32_to_cpu(tmp_gid);
        }
-        if (fgetxattr(fd, "user.virtfs.mode", &tmp_mode, sizeof(mode_t)) > 0) {
+        if (qemu_fgetxattr(fd, "user.virtfs.mode",
+                           &tmp_mode, sizeof(mode_t)) > 0) {
            stbuf->st_mode = le32_to_cpu(tmp_mode);
        }
-        if (fgetxattr(fd, "user.virtfs.rdev", &tmp_dev, sizeof(dev_t)) > 0) {
+        if (qemu_fgetxattr(fd, "user.virtfs.rdev",
+                           &tmp_dev, sizeof(dev_t)) > 0) {
            stbuf->st_rdev = le64_to_cpu(tmp_dev);
        }
    } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
--- a/hw/9pfs/9p-proxy.c
+++ b/hw/9pfs/9p-proxy.c
@ -123,10 +123,16 @@ static void prstatfs_to_statfs(struct statfs *stfs, ProxyStatFS *prstfs)
    stfs->f_bavail = prstfs->f_bavail;
    stfs->f_files = prstfs->f_files;
    stfs->f_ffree = prstfs->f_ffree;
+#ifdef CONFIG_DARWIN
+    /* f_namelen and f_frsize do not exist on Darwin */
+    stfs->f_fsid.val[0] = prstfs->f_fsid[0] & 0xFFFFFFFFU;
+    stfs->f_fsid.val[1] = prstfs->f_fsid[1] >> 32 & 0xFFFFFFFFU;
+#else
    stfs->f_fsid.__val[0] = prstfs->f_fsid[0] & 0xFFFFFFFFU;
    stfs->f_fsid.__val[1] = prstfs->f_fsid[1] >> 32 & 0xFFFFFFFFU;
    stfs->f_namelen = prstfs->f_namelen;
    stfs->f_frsize = prstfs->f_frsize;
+#endif
 }

 /* Converts proxy_stat structure to VFS stat structure */
@ -143,12 +149,24 @@ static void prstat_to_stat(struct stat *stbuf, ProxyStat *prstat)
   stbuf->st_size = prstat->st_size;
   stbuf->st_blksize = prstat->st_blksize;
   stbuf->st_blocks = prstat->st_blocks;
-   stbuf->st_atim.tv_sec = prstat->st_atim_sec;
-   stbuf->st_atim.tv_nsec = prstat->st_atim_nsec;
+   stbuf->st_atime = prstat->st_atim_sec;
   stbuf->st_mtime = prstat->st_mtim_sec;
-   stbuf->st_mtim.tv_nsec = prstat->st_mtim_nsec;
   stbuf->st_ctime = prstat->st_ctim_sec;
+#ifdef CONFIG_DARWIN
+   stbuf->st_atimespec.tv_sec = prstat->st_atim_sec;
+   stbuf->st_mtimespec.tv_sec = prstat->st_mtim_sec;
+   stbuf->st_ctimespec.tv_sec = prstat->st_ctim_sec;
+   stbuf->st_atimespec.tv_nsec = prstat->st_atim_nsec;
+   stbuf->st_mtimespec.tv_nsec = prstat->st_mtim_nsec;
+   stbuf->st_ctimespec.tv_nsec = prstat->st_ctim_nsec;
+#else
+   stbuf->st_atim.tv_sec = prstat->st_atim_sec;
+   stbuf->st_mtim.tv_sec = prstat->st_mtim_sec;
+   stbuf->st_ctim.tv_sec = prstat->st_ctim_sec;
+   stbuf->st_atim.tv_nsec = prstat->st_atim_nsec;
+   stbuf->st_mtim.tv_nsec = prstat->st_mtim_nsec;
   stbuf->st_ctim.tv_nsec = prstat->st_ctim_nsec;
+#endif
 }

 /*
@ -688,7 +706,21 @@ static off_t proxy_telldir(FsContext *ctx, V9fsFidOpenState *fs)

 static struct dirent *proxy_readdir(FsContext *ctx, V9fsFidOpenState *fs)
 {
-    return readdir(fs->dir.stream);
+    struct dirent *entry;
+    entry = readdir(fs->dir.stream);
+#ifdef CONFIG_DARWIN
+    if (!entry) {
+        return NULL;
+    }
+    int td;
+    td = telldir(fs->dir.stream);
+    /* If telldir fails, fail the entire readdir call */
+    if (td < 0) {
+        return NULL;
+    }
+    entry->d_seekoff = td;
+#endif
+    return entry;
 }

 static void proxy_seekdir(FsContext *ctx, V9fsFidOpenState *fs, off_t off)
--- a/hw/9pfs/9p-synth.c
+++ b/hw/9pfs/9p-synth.c
@ -234,7 +234,11 @@ static void synth_direntry(V9fsSynthNode *node,
             offsetof(struct dirent, d_name) + sz);
    memcpy(entry->d_name, node->name, sz);
    entry->d_ino = node->attr->inode;
+#ifdef CONFIG_DARWIN
+    entry->d_seekoff = off + 1;
+#else
    entry->d_off = off + 1;
+#endif
 }

 static struct dirent *synth_get_dentry(V9fsSynthNode *dir,
@ -439,7 +443,9 @@ static int synth_statfs(FsContext *s, V9fsPath *fs_path,
    stbuf->f_bsize = 512;
    stbuf->f_blocks = 0;
    stbuf->f_files = synth_node_count;
+#ifndef CONFIG_DARWIN
    stbuf->f_namelen = NAME_MAX;
+#endif
    return 0;
 }

--- a/hw/9pfs/9p-util-darwin.c
+++ b/hw/9pfs/9p-util-darwin.c
@ -0,0 +1,97 @@
+/*
+ * 9p utilities (Darwin Implementation)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/xattr.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "9p-util.h"
+
+ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+                             void *value, size_t size)
+{
+    int ret;
+    int fd = openat_file(dirfd, filename,
+                         O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+    if (fd == -1) {
+        return -1;
+    }
+    ret = fgetxattr(fd, name, value, size, 0, 0);
+    close_preserve_errno(fd);
+    return ret;
+}
+
+ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
+                              char *list, size_t size)
+{
+    int ret;
+    int fd = openat_file(dirfd, filename,
+                         O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+    if (fd == -1) {
+        return -1;
+    }
+    ret = flistxattr(fd, list, size, 0);
+    close_preserve_errno(fd);
+    return ret;
+}
+
+ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
+                                const char *name)
+{
+    int ret;
+    int fd = openat_file(dirfd, filename, O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+    if (fd == -1) {
+        return -1;
+    }
+    ret = fremovexattr(fd, name, 0);
+    close_preserve_errno(fd);
+    return ret;
+}
+
+int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+                         void *value, size_t size, int flags)
+{
+    int ret;
+    int fd = openat_file(dirfd, filename, O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+    if (fd == -1) {
+        return -1;
+    }
+    ret = fsetxattr(fd, name, value, size, 0, flags);
+    close_preserve_errno(fd);
+    return ret;
+}
+
+/*
+ * As long as mknodat is not available on macOS, this workaround
+ * using pthread_fchdir_np is needed.
+ *
+ * Radar filed with Apple for implementing mknodat:
+ * rdar://FB9862426 (https://openradar.appspot.com/FB9862426)
+ */
+#if defined CONFIG_PTHREAD_FCHDIR_NP
+
+int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev)
+{
+    int preserved_errno, err;
+    if (!pthread_fchdir_np) {
+        error_report_once("pthread_fchdir_np() not available on this version of macOS");
+        return -ENOTSUP;
+    }
+    if (pthread_fchdir_np(dirfd) < 0) {
+        return -1;
+    }
+    err = mknod(filename, mode, dev);
+    preserved_errno = errno;
+    /* Stop using the thread-local cwd */
+    pthread_fchdir_np(-1);
+    if (err < 0) {
+        errno = preserved_errno;
+    }
+    return err;
+}
+
+#endif
--- a/hw/9pfs/9p-util-linux.c
+++ b/hw/9pfs/9p-util-linux.c
@ -1,5 +1,5 @@
 /*
- * 9p utilities
+ * 9p utilities (Linux Implementation)
 *
 * Copyright IBM, Corp. 2017
 *
@ -61,4 +61,10 @@ int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
    ret = lsetxattr(proc_path, name, value, size, flags);
    g_free(proc_path);
    return ret;
+
+}
+
+int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev)
+{
+    return mknodat(dirfd, filename, mode, dev);
 }
--- a/hw/9pfs/9p-util.h
+++ b/hw/9pfs/9p-util.h
@ -19,6 +19,23 @@
 #define O_PATH_9P_UTIL 0
 #endif

+#ifdef CONFIG_DARWIN
+#define qemu_fgetxattr(...) fgetxattr(__VA_ARGS__, 0, 0)
+#define qemu_lgetxattr(...) getxattr(__VA_ARGS__, 0, XATTR_NOFOLLOW)
+#define qemu_llistxattr(...) listxattr(__VA_ARGS__, XATTR_NOFOLLOW)
+#define qemu_lremovexattr(...) removexattr(__VA_ARGS__, XATTR_NOFOLLOW)
+static inline int qemu_lsetxattr(const char *path, const char *name,
+                                 const void *value, size_t size, int flags) {
+    return setxattr(path, name, value, size, 0, flags | XATTR_NOFOLLOW);
+}
+#else
+#define qemu_fgetxattr fgetxattr
+#define qemu_lgetxattr lgetxattr
+#define qemu_llistxattr llistxattr
+#define qemu_lremovexattr lremovexattr
+#define qemu_lsetxattr lsetxattr
+#endif
+
 static inline void close_preserve_errno(int fd)
 {
    int serrno = errno;
@ -37,10 +54,13 @@ static inline int openat_file(int dirfd, const char *name, int flags,
 {
    int fd, serrno, ret;

+#ifndef CONFIG_DARWIN
 again:
+#endif
    fd = openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK,
                mode);
    if (fd == -1) {
+#ifndef CONFIG_DARWIN
        if (errno == EPERM && (flags & O_NOATIME)) {
            /*
             * The client passed O_NOATIME but we lack permissions to honor it.
@ -53,6 +73,7 @@ again:
            flags &= ~O_NOATIME;
            goto again;
        }
+#endif
        return -1;
    }

@ -78,4 +99,61 @@ ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
 ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
                                const char *name);

+/*
+ * Darwin has d_seekoff, which appears to function similarly to d_off.
+ * However, it does not appear to be supported on all file systems,
+ * so ensure it is manually injected earlier and call here when
+ * needed.
+ */
+static inline off_t qemu_dirent_off(struct dirent *dent)
+{
+#ifdef CONFIG_DARWIN
+    return dent->d_seekoff;
+#else
+    return dent->d_off;
+#endif
+}
+
+/**
+ * qemu_dirent_dup() - Duplicate directory entry @dent.
+ *
+ * @dent: original directory entry to be duplicated
+ * Return: duplicated directory entry which should be freed with g_free()
+ *
+ * It is highly recommended to use this function instead of open coding
+ * duplication of dirent objects, because the actual struct dirent
+ * size may be bigger or shorter than sizeof(struct dirent) and correct
+ * handling is platform specific (see gitlab issue #841).
+ */
+static inline struct dirent *qemu_dirent_dup(struct dirent *dent)
+{
+    size_t sz = 0;
+#if defined _DIRENT_HAVE_D_RECLEN
+    /* Avoid use of strlen() if platform supports d_reclen. */
+    sz = dent->d_reclen;
+#endif
+    /*
+     * Test sz for zero even if d_reclen is available
+     * because some drivers may set d_reclen to zero.
+     */
+    if (sz == 0) {
+        /* Fallback to the most portable way. */
+        sz = offsetof(struct dirent, d_name) +
+                      strlen(dent->d_name) + 1;
+    }
+    return g_memdup(dent, sz);
+}
+
+/*
+ * As long as mknodat is not available on macOS, this workaround
+ * using pthread_fchdir_np is needed. qemu_mknodat is defined in
+ * os-posix.c. pthread_fchdir_np is weakly linked here as a guard
+ * in case it disappears in future macOS versions, because it is
+ * is a private API.
+ */
+#if defined CONFIG_DARWIN && defined CONFIG_PTHREAD_FCHDIR_NP
+int pthread_fchdir_np(int fd) __attribute__((weak_import));
+#endif
+int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev);
+
 #endif
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@ -27,12 +27,17 @@
 #include "virtio-9p.h"
 #include "fsdev/qemu-fsdev.h"
 #include "9p-xattr.h"
+#include "9p-util.h"
 #include "coth.h"
 #include "trace.h"
 #include "migration/blocker.h"
 #include "qemu/xxhash.h"
 #include <math.h>
+#ifdef CONFIG_LINUX
 #include <linux/limits.h>
+#else
+#include <limits.h>
+#endif

 int open_fd_hw;
 int total_open_fd;
@ -133,11 +138,20 @@ static int dotl_to_open_flags(int flags)
        { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
        { P9_DOTL_DSYNC, O_DSYNC },
        { P9_DOTL_FASYNC, FASYNC },
+#ifndef CONFIG_DARWIN
+        { P9_DOTL_NOATIME, O_NOATIME },
+        /*
+         *  On Darwin, we could map to F_NOCACHE, which is
+         *  similar, but doesn't quite have the same
+         *  semantics. However, we don't support O_DIRECT
+         *  even on linux at the moment, so we just ignore
+         *  it here.
+         */
        { P9_DOTL_DIRECT, O_DIRECT },
+#endif
        { P9_DOTL_LARGEFILE, O_LARGEFILE },
        { P9_DOTL_DIRECTORY, O_DIRECTORY },
        { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
-        { P9_DOTL_NOATIME, O_NOATIME },
        { P9_DOTL_SYNC, O_SYNC },
    };

@ -166,10 +180,12 @@ static int get_dotl_openflags(V9fsState *s, int oflags)
     */
    flags = dotl_to_open_flags(oflags);
    flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
+#ifndef CONFIG_DARWIN
    /*
     * Ignore direct disk access hint until the server supports it.
     */
    flags &= ~O_DIRECT;
+#endif
    return flags;
 }

@ -612,8 +628,8 @@ static inline uint64_t mirror64bit(uint64_t value)
           ((uint64_t)mirror8bit((value >> 56) & 0xff));
 }

-/**
- * @brief Parameter k for the Exponential Golomb algorihm to be used.
+/*
+ * Parameter k for the Exponential Golomb algorihm to be used.
 *
 * The smaller this value, the smaller the minimum bit count for the Exp.
 * Golomb generated affixes will be (at lowest index) however for the
@ -626,28 +642,30 @@ static inline uint64_t mirror64bit(uint64_t value)
 * should be small, for a large amount of devices k might be increased
 * instead. The default of k=0 should be fine for most users though.
 *
- * @b IMPORTANT: In case this ever becomes a runtime parameter; the value of
+ * IMPORTANT: In case this ever becomes a runtime parameter; the value of
 * k should not change as long as guest is still running! Because that would
 * cause completely different inode numbers to be generated on guest.
 */
 #define EXP_GOLOMB_K    0

 /**
- * @brief Exponential Golomb algorithm for arbitrary k (including k=0).
+ * expGolombEncode() - Exponential Golomb algorithm for arbitrary k
+ *                     (including k=0).
 *
- * The Exponential Golomb algorithm generates @b prefixes (@b not suffixes!)
+ * @n: natural number (or index) of the prefix to be generated
+ *     (1, 2, 3, ...)
+ * @k: parameter k of Exp. Golomb algorithm to be used
+ *     (see comment on EXP_GOLOMB_K macro for details about k)
+ * Return: prefix for given @n and @k
+ *
+ * The Exponential Golomb algorithm generates prefixes (NOT suffixes!)
 * with growing length and with the mathematical property of being
 * "prefix-free". The latter means the generated prefixes can be prepended
 * in front of arbitrary numbers and the resulting concatenated numbers are
 * guaranteed to be always unique.
 *
 * This is a minor adjustment to the original Exp. Golomb algorithm in the
- * sense that lowest allowed index (@param n) starts with 1, not with zero.
- *
- * @param n - natural number (or index) of the prefix to be generated
- *            (1, 2, 3, ...)
- * @param k - parameter k of Exp. Golomb algorithm to be used
- *            (see comment on EXP_GOLOMB_K macro for details about k)
+ * sense that lowest allowed index (@n) starts with 1, not with zero.
 */
 static VariLenAffix expGolombEncode(uint64_t n, int k)
 {
@ -661,7 +679,9 @@ static VariLenAffix expGolombEncode(uint64_t n, int k)
 }

 /**
- * @brief Converts a suffix into a prefix, or a prefix into a suffix.
+ * invertAffix() - Converts a suffix into a prefix, or a prefix into a suffix.
+ * @affix: either suffix or prefix to be inverted
+ * Return: inversion of passed @affix
 *
 * Simply mirror all bits of the affix value, for the purpose to preserve
 * respectively the mathematical "prefix-free" or "suffix-free" property
@ -685,16 +705,16 @@ static VariLenAffix invertAffix(const VariLenAffix *affix)
 }

 /**
- * @brief Generates suffix numbers with "suffix-free" property.
+ * affixForIndex() - Generates suffix numbers with "suffix-free" property.
+ * @index: natural number (or index) of the suffix to be generated
+ *         (1, 2, 3, ...)
+ * Return: Suffix suitable to assemble unique number.
 *
 * This is just a wrapper function on top of the Exp. Golomb algorithm.
 *
 * Since the Exp. Golomb algorithm generates prefixes, but we need suffixes,
 * this function converts the Exp. Golomb prefixes into appropriate suffixes
 * which are still suitable for generating unique numbers.
- *
- * @param n - natural number (or index) of the suffix to be generated
- *            (1, 2, 3, ...)
 */
 static VariLenAffix affixForIndex(uint64_t index)
 {
@ -794,8 +814,8 @@ static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev)
    return val->prefix_bits;
 }

-/**
- * @brief Slow / full mapping host inode nr -> guest inode nr.
+/*
+ * Slow / full mapping host inode nr -> guest inode nr.
 *
 * This function performs a slower and much more costly remapping of an
 * original file inode number on host to an appropriate different inode
@ -807,7 +827,7 @@ static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev)
 * qid_path_suffixmap() failed. In practice this slow / full mapping is not
 * expected ever to be used at all though.
 *
- * @see qid_path_suffixmap() for details
+ * See qid_path_suffixmap() for details
 *
 */
 static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf,
@ -848,8 +868,8 @@ static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf,
    return 0;
 }

-/**
- * @brief Quick mapping host inode nr -> guest inode nr.
+/*
+ * Quick mapping host inode nr -> guest inode nr.
 *
 * This function performs quick remapping of an original file inode number
 * on host to an appropriate different inode number on guest. This remapping
@ -1265,12 +1285,15 @@ static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path,


 /**
- * Convert host filesystem's block size into an appropriate block size for
- * 9p client (guest OS side). The value returned suggests an "optimum" block
- * size for 9p I/O, i.e. to maximize performance.
+ * blksize_to_iounit() - Block size exposed to 9p client.
+ * Return: block size
 *
 * @pdu: 9p client request
 * @blksize: host filesystem's block size
+ *
+ * Convert host filesystem's block size into an appropriate block size for
+ * 9p client (guest OS side). The value returned suggests an "optimum" block
+ * size for 9p I/O, i.e. to maximize performance.
 */
 static int32_t blksize_to_iounit(const V9fsPDU *pdu, int32_t blksize)
 {
@ -1309,11 +1332,17 @@ static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf,
    v9lstat->st_blksize = stat_to_iounit(pdu, stbuf);
    v9lstat->st_blocks = stbuf->st_blocks;
    v9lstat->st_atime_sec = stbuf->st_atime;
-    v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
    v9lstat->st_mtime_sec = stbuf->st_mtime;
-    v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
    v9lstat->st_ctime_sec = stbuf->st_ctime;
+#ifdef CONFIG_DARWIN
+    v9lstat->st_atime_nsec = stbuf->st_atimespec.tv_nsec;
+    v9lstat->st_mtime_nsec = stbuf->st_mtimespec.tv_nsec;
+    v9lstat->st_ctime_nsec = stbuf->st_ctimespec.tv_nsec;
+#else
+    v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
+    v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
    v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
+#endif
    /* Currently we only support BASIC fields in stat */
    v9lstat->st_result_mask = P9_STATS_BASIC;

@ -2271,7 +2300,7 @@ static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
        count += len;
        v9fs_stat_free(&v9stat);
        v9fs_path_free(&path);
-        saved_dir_pos = dent->d_off;
+        saved_dir_pos = qemu_dirent_off(dent);
    }

    v9fs_readdir_unlock(&fidp->fs.dir);
@ -2376,10 +2405,11 @@ out_nofid:
 }

 /**
- * Returns size required in Rreaddir response for the passed dirent @p name.
+ * v9fs_readdir_response_size() - Returns size required in Rreaddir response
+ * for the passed dirent @name.
 *
- * @param name - directory entry's name (i.e. file name, directory name)
- * @returns required size in bytes
+ * @name: directory entry's name (i.e. file name, directory name)
+ * Return: required size in bytes
 */
 size_t v9fs_readdir_response_size(V9fsString *name)
 {
@ -2410,6 +2440,7 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
    V9fsString name;
    int len, err = 0;
    int32_t count = 0;
+    off_t off;
    struct dirent *dent;
    struct stat *st;
    struct V9fsDirEnt *entries = NULL;
@ -2470,12 +2501,13 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
            qid.version = 0;
        }

+        off = qemu_dirent_off(dent);
        v9fs_string_init(&name);
        v9fs_string_sprintf(&name, "%s", dent->d_name);

        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
        len = pdu_marshal(pdu, 11 + count, "Qqbs",
-                          &qid, dent->d_off,
+                          &qid, off,
                          dent->d_type, &name);

        v9fs_string_free(&name);
@ -3515,9 +3547,15 @@ static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
    f_bavail = stbuf->f_bavail / bsize_factor;
    f_files  = stbuf->f_files;
    f_ffree  = stbuf->f_ffree;
+#ifdef CONFIG_DARWIN
+    fsid_val = (unsigned int)stbuf->f_fsid.val[0] |
+               (unsigned long long)stbuf->f_fsid.val[1] << 32;
+    f_namelen = NAME_MAX;
+#else
    fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
               (unsigned long long)stbuf->f_fsid.__val[1] << 32;
    f_namelen = stbuf->f_namelen;
+#endif

    return pdu_marshal(pdu, offset, "ddqqqqqqd",
                       f_type, f_bsize, f_blocks, f_bfree,
@ -3919,7 +3957,7 @@ static void coroutine_fn v9fs_xattrcreate(void *opaque)
        rflags |= XATTR_REPLACE;
    }

-    if (size > XATTR_SIZE_MAX) {
+    if (size > P9_XATTR_SIZE_MAX) {
        err = -E2BIG;
        goto out_nofid;
    }
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@ -100,8 +100,8 @@ typedef enum P9ProtoVersion {
    V9FS_PROTO_2000L = 0x02,
 } P9ProtoVersion;

-/**
- * @brief Minimum message size supported by this 9pfs server.
+/*
+ * Minimum message size supported by this 9pfs server.
 *
 * A client establishes a session by sending a Tversion request along with a
 * 'msize' parameter which suggests the server a maximum message size ever to be
@ -231,7 +231,7 @@ static inline void v9fs_readdir_init(P9ProtoVersion proto_version, V9fsDir *dir)
    }
 }

-/**
+/*
 * Type for 9p fs drivers' (a.k.a. 9p backends) result of readdir requests,
 * which is a chained list of directory entries.
 */
@ -289,8 +289,8 @@ typedef enum AffixType_t {
    AffixType_Suffix, /* A.k.a. postfix. */
 } AffixType_t;

-/**
- * @brief Unique affix of variable length.
+/*
+ * Unique affix of variable length.
 *
 * An affix is (currently) either a suffix or a prefix, which is either
 * going to be prepended (prefix) or appended (suffix) with some other
@ -304,7 +304,7 @@ typedef struct VariLenAffix {
    AffixType_t type; /* Whether this affix is a suffix or a prefix. */
    uint64_t value; /* Actual numerical value of this affix. */
    /*
-     * Lenght of the affix, that is how many (of the lowest) bits of @c value
+     * Lenght of the affix, that is how many (of the lowest) bits of ``value``
     * must be used for appending/prepending this affix to its final resulting,
     * unique number.
     */
@ -479,4 +479,22 @@ struct V9fsTransport {
    void        (*push_and_notify)(V9fsPDU *pdu);
 };

+#if defined(XATTR_SIZE_MAX)
+/* Linux */
+#define P9_XATTR_SIZE_MAX XATTR_SIZE_MAX
+#elif defined(CONFIG_DARWIN)
+/*
+ * Darwin doesn't seem to define a maximum xattr size in its user
+ * space header, so manually configure it across platforms as 64k.
+ *
+ * Having no limit at all can lead to QEMU crashing during large g_malloc()
+ * calls. Because QEMU does not currently support macOS guests, the below
+ * preliminary solution only works due to its being a reflection of the limit of
+ * Linux guests.
+ */
+#define P9_XATTR_SIZE_MAX 65536
+#else
+#error Missing definition for P9_XATTR_SIZE_MAX for this host system
+#endif
+
 #endif
--- a/hw/9pfs/codir.c
+++ b/hw/9pfs/codir.c
@ -22,6 +22,8 @@
 #include "qemu/coroutine.h"
 #include "qemu/main-loop.h"
 #include "coth.h"
+#include "9p-xattr.h"
+#include "9p-util.h"

 /*
 * Intended to be called from bottom-half (e.g. background I/O thread)
@ -166,7 +168,7 @@ static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
        }

        size += len;
-        saved_dir_pos = dent->d_off;
+        saved_dir_pos = qemu_dirent_off(dent);
    }

    /* restore (last) saved position */
@ -182,14 +184,25 @@ out:
 }

 /**
- * @brief Reads multiple directory entries in one rush.
+ * v9fs_co_readdir_many() - Reads multiple directory entries in one rush.
+ *
+ * @pdu: the causing 9p (T_readdir) client request
+ * @fidp: already opened directory where readdir shall be performed on
+ * @entries: output for directory entries (must not be NULL)
+ * @offset: initial position inside the directory the function shall
+ *          seek to before retrieving the directory entries
+ * @maxsize: maximum result message body size (in bytes)
+ * @dostat: whether a stat() should be performed and returned for
+ *          each directory entry
+ * Return: resulting response message body size (in bytes) on success,
+ *         negative error code otherwise
 *
 * Retrieves the requested (max. amount of) directory entries from the fs
 * driver. This function must only be called by the main IO thread (top half).
 * Internally this function call will be dispatched to a background IO thread
 * (bottom half) where it is eventually executed by the fs driver.
 *
- * @discussion Acquiring multiple directory entries in one rush from the fs
+ * Acquiring multiple directory entries in one rush from the fs
 * driver, instead of retrieving each directory entry individually, is very
 * beneficial from performance point of view. Because for every fs driver
 * request latency is added, which in practice could lead to overall
@ -197,20 +210,9 @@ out:
 * directory) if every directory entry was individually requested from fs
 * driver.
 *
- * @note You must @b ALWAYS call @c v9fs_free_dirents(entries) after calling
+ * NOTE: You must ALWAYS call v9fs_free_dirents(entries) after calling
 * v9fs_co_readdir_many(), both on success and on error cases of this
- * function, to avoid memory leaks once @p entries are no longer needed.
- *
- * @param pdu - the causing 9p (T_readdir) client request
- * @param fidp - already opened directory where readdir shall be performed on
- * @param entries - output for directory entries (must not be NULL)
- * @param offset - initial position inside the directory the function shall
- *                 seek to before retrieving the directory entries
- * @param maxsize - maximum result message body size (in bytes)
- * @param dostat - whether a stat() should be performed and returned for
- *                 each directory entry
- * @returns resulting response message body size (in bytes) on success,
- *          negative error code otherwise
+ * function, to avoid memory leaks once @entries are no longer needed.
 */
 int coroutine_fn v9fs_co_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
                                      struct V9fsDirEnt **entries,
--- a/hw/9pfs/coth.h
+++ b/hw/9pfs/coth.h
@ -19,7 +19,7 @@
 #include "qemu/coroutine.h"
 #include "9p.h"

-/**
+/*
 * we want to use bottom half because we want to make sure the below
 * sequence of events.
 *
@ -29,7 +29,7 @@
 * we cannot swap step 1 and 2, because that would imply worker thread
 * can enter coroutine while step1 is still running
 *
- * @b PERFORMANCE @b CONSIDERATIONS: As a rule of thumb, keep in mind
+ * PERFORMANCE CONSIDERATIONS: As a rule of thumb, keep in mind
 * that hopping between threads adds @b latency! So when handling a
 * 9pfs request, avoid calling v9fs_co_run_in_worker() too often, because
 * this might otherwise sum up to a significant, huge overall latency for
--- a/hw/9pfs/meson.build
+++ b/hw/9pfs/meson.build
@ -4,7 +4,6 @@ fs_ss.add(files(
  '9p-posix-acl.c',
  '9p-proxy.c',
  '9p-synth.c',
-  '9p-util.c',
  '9p-xattr-user.c',
  '9p-xattr.c',
  '9p.c',
@ -14,6 +13,8 @@ fs_ss.add(files(
  'coth.c',
  'coxattr.c',
 ))
+fs_ss.add(when: 'CONFIG_LINUX', if_true: files('9p-util-linux.c'))
+fs_ss.add(when: 'CONFIG_DARWIN', if_true: files('9p-util-darwin.c'))
 fs_ss.add(when: 'CONFIG_XEN', if_true: files('xen-9p-backend.c'))
 softmmu_ss.add_all(when: 'CONFIG_FSDEV_9P', if_true: fs_ss)

--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@ -673,19 +673,6 @@ static inline int platform_does_not_support_system(const char *command)
 }
 #endif /* !HAVE_SYSTEM_FUNCTION */

-/**
- * Duplicate directory entry @dent.
- *
- * It is highly recommended to use this function instead of open coding
- * duplication of @c dirent objects, because the actual @c struct @c dirent
- * size may be bigger or shorter than @c sizeof(struct dirent) and correct
- * handling is platform specific (see gitlab issue #841).
- *
- * @dent - original directory entry to be duplicated
- * @returns duplicated directory entry which should be freed with g_free()
- */
-struct dirent *qemu_dirent_dup(struct dirent *dent);
-
 #ifdef __cplusplus
 }
 #endif
--- a/include/qemu/xattr.h
+++ b/include/qemu/xattr.h
@ -22,7 +22,9 @@
 #ifdef CONFIG_LIBATTR
 #  include <attr/xattr.h>
 #else
-#  define ENOATTR ENODATA
+#  if !defined(ENOATTR)
+#    define ENOATTR ENODATA
+#  endif
 #  include <sys/xattr.h>
 #endif

--- a/meson.build
+++ b/meson.build
@ -1462,14 +1462,16 @@ dbus_display = get_option('dbus_display') \
  .allowed()

 have_virtfs = get_option('virtfs') \
-    .require(targetos == 'linux',
-             error_message: 'virtio-9p (virtfs) requires Linux') \
-    .require(libattr.found() and libcap_ng.found(),
-             error_message: 'virtio-9p (virtfs) requires libcap-ng-devel and libattr-devel') \
+    .require(targetos == 'linux' or targetos == 'darwin',
+             error_message: 'virtio-9p (virtfs) requires Linux or macOS') \
+    .require(targetos == 'linux' or cc.has_function('pthread_fchdir_np'),
+             error_message: 'virtio-9p (virtfs) on macOS requires the presence of pthread_fchdir_np') \
+    .require(targetos == 'darwin' or (libattr.found() and libcap_ng.found()),
+             error_message: 'virtio-9p (virtfs) on Linux requires libcap-ng-devel and libattr-devel') \
    .disable_auto_if(not have_tools and not have_system) \
    .allowed()

-have_virtfs_proxy_helper = have_virtfs and have_tools
+have_virtfs_proxy_helper = targetos != 'darwin' and have_virtfs and have_tools

 foreach k : get_option('trace_backends')
  config_host_data.set('CONFIG_TRACE_' + k.to_upper(), true)
@ -1622,6 +1624,7 @@ config_host_data.set('CONFIG_POSIX_FALLOCATE', cc.has_function('posix_fallocate'
 config_host_data.set('CONFIG_POSIX_MEMALIGN', cc.has_function('posix_memalign'))
 config_host_data.set('CONFIG_PPOLL', cc.has_function('ppoll'))
 config_host_data.set('CONFIG_PREADV', cc.has_function('preadv', prefix: '#include <sys/uio.h>'))
+config_host_data.set('CONFIG_PTHREAD_FCHDIR_NP', cc.has_function('pthread_fchdir_np'))
 config_host_data.set('CONFIG_SEM_TIMEDWAIT', cc.has_function('sem_timedwait', dependencies: threads))
 config_host_data.set('CONFIG_SENDFILE', cc.has_function('sendfile'))
 config_host_data.set('CONFIG_SETNS', cc.has_function('setns') and cc.has_function('unshare'))
--- a/tests/qtest/virtio-9p-test.c
+++ b/tests/qtest/virtio-9p-test.c
@ -1253,7 +1253,7 @@ static void fs_unlinkat_dir(void *obj, void *data, QGuestAllocator *t_alloc)
    /* ... and is actually a directory */
    g_assert((st.st_mode & S_IFMT) == S_IFDIR);

-    do_unlinkat(v9p, "/", "02", AT_REMOVEDIR);
+    do_unlinkat(v9p, "/", "02", P9_DOTL_AT_REMOVEDIR);
    /* directory should be gone now */
    g_assert(stat(new_dir, &st) != 0);
 }
--- a/util/osdep.c
+++ b/util/osdep.c
@ -33,7 +33,6 @@
 extern int madvise(char *, size_t, int);
 #endif

-#include <dirent.h>
 #include "qemu-common.h"
 #include "qemu/cutils.h"
 #include "qemu/sockets.h"
@ -619,23 +618,3 @@ writev(int fd, const struct iovec *iov, int iov_cnt)
    return readv_writev(fd, iov, iov_cnt, true);
 }
 #endif
-
-struct dirent *
-qemu_dirent_dup(struct dirent *dent)
-{
-    size_t sz = 0;
-#if defined _DIRENT_HAVE_D_RECLEN
-    /* Avoid use of strlen() if platform supports d_reclen. */
-    sz = dent->d_reclen;
-#endif
-    /*
-     * Test sz for zero even if d_reclen is available
-     * because some drivers may set d_reclen to zero.
-     */
-    if (sz == 0) {
-        /* Fallback to the most portable way. */
-        sz = offsetof(struct dirent, d_name) +
-                      strlen(dent->d_name) + 1;
-    }
-    return g_memdup(dent, sz);
-}