9pfs: T_readdir latency optimization

Make top half really top half and bottom half really bottom half:

Each T_readdir request handling is hopping between threads (main
I/O thread and background I/O driver threads) several times for
every individual directory entry, which sums up to huge latencies
for handling just a single T_readdir request.

Instead of doing that, collect now all required directory entries
(including all potentially required stat buffers for each entry) in
one rush on a background I/O thread from fs driver by calling the
previously added function v9fs_co_readdir_many() instead of
v9fs_co_readdir(), then assemble the entire resulting network
response message for the readdir request on main I/O thread. The
fs driver is still aborting the directory entry retrieval loop
(on the background I/O thread inside of v9fs_co_readdir_many())
as soon as it would exceed the client's requested maximum R_readdir
response size. So this will not introduce a performance penalty on
another end.

Also: No longer seek initial directory position in v9fs_readdir(),
as this is now handled (more consistently) by
v9fs_co_readdir_many() instead.

Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
Message-Id: <c7c3d1cf4e86611538cef44897842819d9359d7a.1596012787.git.qemu_oss@crudebyte.com>
Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
This commit is contained in:
Christian Schoenebeck 2020-07-29 10:13:05 +02:00
parent 2149675b19
commit 0c4356ba7d
1 changed files with 58 additions and 74 deletions

View File

@ -972,30 +972,6 @@ static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
return 0;
}
static int coroutine_fn dirent_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
struct dirent *dent, V9fsQID *qidp)
{
struct stat stbuf;
V9fsPath path;
int err;
v9fs_path_init(&path);
err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
if (err < 0) {
goto out;
}
err = v9fs_co_lstat(pdu, &path, &stbuf);
if (err < 0) {
goto out;
}
err = stat_to_qid(pdu, &stbuf, qidp);
out:
v9fs_path_free(&path);
return err;
}
V9fsPDU *pdu_alloc(V9fsState *s)
{
V9fsPDU *pdu = NULL;
@ -2328,62 +2304,74 @@ size_t v9fs_readdir_response_size(V9fsString *name)
return 24 + v9fs_string_size(name);
}
static void v9fs_free_dirents(struct V9fsDirEnt *e)
{
struct V9fsDirEnt *next = NULL;
for (; e; e = next) {
next = e->next;
g_free(e->dent);
g_free(e->st);
g_free(e);
}
}
static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
int32_t max_count)
off_t offset, int32_t max_count)
{
size_t size;
V9fsQID qid;
V9fsString name;
int len, err = 0;
int32_t count = 0;
off_t saved_dir_pos;
struct dirent *dent;
struct stat *st;
struct V9fsDirEnt *entries = NULL;
/* save the directory position */
saved_dir_pos = v9fs_co_telldir(pdu, fidp);
if (saved_dir_pos < 0) {
return saved_dir_pos;
/*
* inode remapping requires the device id, which in turn might be
* different for different directory entries, so if inode remapping is
* enabled we have to make a full stat for each directory entry
*/
const bool dostat = pdu->s->ctx.export_flags & V9FS_REMAP_INODES;
/*
* Fetch all required directory entries altogether on a background IO
* thread from fs driver. We don't want to do that for each entry
* individually, because hopping between threads (this main IO thread
* and background IO driver thread) would sum up to huge latencies.
*/
count = v9fs_co_readdir_many(pdu, fidp, &entries, offset, max_count,
dostat);
if (count < 0) {
err = count;
count = 0;
goto out;
}
count = 0;
while (1) {
v9fs_readdir_lock(&fidp->fs.dir);
err = v9fs_co_readdir(pdu, fidp, &dent);
if (err || !dent) {
break;
}
v9fs_string_init(&name);
v9fs_string_sprintf(&name, "%s", dent->d_name);
if ((count + v9fs_readdir_response_size(&name)) > max_count) {
v9fs_readdir_unlock(&fidp->fs.dir);
/* Ran out of buffer. Set dir back to old position and return */
v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
v9fs_string_free(&name);
return count;
}
for (struct V9fsDirEnt *e = entries; e; e = e->next) {
dent = e->dent;
if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
/*
* dirent_to_qid() implies expensive stat call for each entry,
* we must do that here though since inode remapping requires
* the device id, which in turn might be different for
* different entries; we cannot make any assumption to avoid
* that here.
*/
err = dirent_to_qid(pdu, fidp, dent, &qid);
st = e->st;
/* e->st should never be NULL, but just to be sure */
if (!st) {
err = -1;
break;
}
/* remap inode */
err = stat_to_qid(pdu, st, &qid);
if (err < 0) {
v9fs_readdir_unlock(&fidp->fs.dir);
v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
v9fs_string_free(&name);
return err;
break;
}
} else {
/*
* Fill up just the path field of qid because the client uses
* only that. To fill the entire qid structure we will have
* to stat each dirent found, which is expensive. For the
* latter reason we don't call dirent_to_qid() here. Only drawback
* latter reason we don't call stat_to_qid() here. Only drawback
* is that no multi-device export detection of stat_to_qid()
* would be done and provided as error to the user here. But
* user would get that error anyway when accessing those
@ -2396,25 +2384,26 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
qid.version = 0;
}
v9fs_string_init(&name);
v9fs_string_sprintf(&name, "%s", dent->d_name);
/* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
len = pdu_marshal(pdu, 11 + count, "Qqbs",
&qid, dent->d_off,
dent->d_type, &name);
v9fs_readdir_unlock(&fidp->fs.dir);
v9fs_string_free(&name);
if (len < 0) {
v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
v9fs_string_free(&name);
return len;
err = len;
break;
}
count += len;
v9fs_string_free(&name);
saved_dir_pos = dent->d_off;
}
v9fs_readdir_unlock(&fidp->fs.dir);
out:
v9fs_free_dirents(entries);
if (err < 0) {
return err;
}
@ -2457,12 +2446,7 @@ static void coroutine_fn v9fs_readdir(void *opaque)
retval = -EINVAL;
goto out;
}
if (initial_offset == 0) {
v9fs_co_rewinddir(pdu, fidp);
} else {
v9fs_co_seekdir(pdu, fidp, initial_offset);
}
count = v9fs_do_readdir(pdu, fidp, max_count);
count = v9fs_do_readdir(pdu, fidp, (off_t) initial_offset, max_count);
if (count < 0) {
retval = count;
goto out;