afs: Provide mount-time configurable byte-range file locking emulation

Provide byte-range file locking emulation that can be configured at mount
time to one of four modes:

 (1) flock=local.  Locking is done locally only and no reference is made to
     the server.

 (2) flock=openafs.  Byte-range locking is done locally only; whole-file
     locking is done with reference to the server.  Whole-file locks cannot
     be upgraded unless the client holds an exclusive lock.

 (3) flock=strict.  Byte-range and whole-file locking both require a
     sufficient whole-file lock on the server.

 (4) flock=write.  As strict, but the client always gets an exclusive
     whole-file lock on the server.

Signed-off-by: David Howells <dhowells@redhat.com>
This commit is contained in:
David Howells 2019-04-25 14:26:52 +01:00
parent 80548b0399
commit 6c6c1d63c2
6 changed files with 120 additions and 9 deletions

View File

@ -409,7 +409,7 @@ static void afs_defer_unlock(struct afs_vnode *vnode)
* whether we think that we have a locking permit.
*/
static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
afs_lock_type_t type, bool can_sleep)
enum afs_flock_mode mode, afs_lock_type_t type)
{
afs_access_t access;
int ret;
@ -437,13 +437,9 @@ static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
if (type == AFS_LOCK_READ) {
if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE | AFS_ACE_LOCK)))
return -EACCES;
if (vnode->status.lock_count == -1 && !can_sleep)
return -EAGAIN; /* Write locked */
} else {
if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE)))
return -EACCES;
if (vnode->status.lock_count != 0 && !can_sleep)
return -EAGAIN; /* Locked */
}
return 0;
@ -456,24 +452,48 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
{
struct inode *inode = locks_inode(file);
struct afs_vnode *vnode = AFS_FS_I(inode);
enum afs_flock_mode mode = AFS_FS_S(inode->i_sb)->flock_mode;
afs_lock_type_t type;
struct key *key = afs_file_key(file);
bool partial, no_server_lock = false;
int ret;
_enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
if (mode == afs_flock_mode_unset)
mode = afs_flock_mode_openafs;
_enter("{%llx:%llu},%llu-%llu,%u,%u",
vnode->fid.vid, vnode->fid.vnode,
fl->fl_start, fl->fl_end, fl->fl_type, mode);
fl->fl_ops = &afs_lock_ops;
INIT_LIST_HEAD(&fl->fl_u.afs.link);
fl->fl_u.afs.state = AFS_LOCK_PENDING;
partial = (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX);
type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
if (mode == afs_flock_mode_write && partial)
type = AFS_LOCK_WRITE;
ret = afs_do_setlk_check(vnode, key, type, fl->fl_flags & FL_SLEEP);
ret = afs_do_setlk_check(vnode, key, mode, type);
if (ret < 0)
return ret;
trace_afs_flock_op(vnode, fl, afs_flock_op_set_lock);
/* AFS3 protocol only supports full-file locks and doesn't provide any
* method of upgrade/downgrade, so we need to emulate for partial-file
* locks.
*
* The OpenAFS client only gets a server lock for a full-file lock and
* keeps partial-file locks local. Allow this behaviour to be emulated
* (as the default).
*/
if (mode == afs_flock_mode_local ||
(partial && mode == afs_flock_mode_openafs)) {
no_server_lock = true;
goto skip_server_lock;
}
spin_lock(&vnode->lock);
list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
@ -502,6 +522,18 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
}
}
if (vnode->lock_state == AFS_VNODE_LOCK_NONE &&
!(fl->fl_flags & FL_SLEEP)) {
ret = -EAGAIN;
if (type == AFS_LOCK_READ) {
if (vnode->status.lock_count == -1)
goto lock_is_contended; /* Write locked */
} else {
if (vnode->status.lock_count != 0)
goto lock_is_contended; /* Locked */
}
}
if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
goto need_to_wait;
@ -571,6 +603,7 @@ vnode_is_locked:
/* the lock has been granted by the server... */
ASSERTCMP(fl->fl_u.afs.state, ==, AFS_LOCK_GRANTED);
skip_server_lock:
/* ... but the VFS still needs to distribute access on this client. */
trace_afs_flock_ev(vnode, fl, afs_flock_vfs_locking, 0);
ret = locks_lock_file_wait(file, fl);
@ -649,6 +682,8 @@ vfs_rejected_lock:
* deal with.
*/
_debug("vfs refused %d", ret);
if (no_server_lock)
goto error;
spin_lock(&vnode->lock);
list_del_init(&fl->fl_u.afs.link);
afs_defer_unlock(vnode);

View File

@ -1902,7 +1902,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
*bp++ = htonl(type);
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
trace_afs_make_fs_calli(call, &vnode->fid, type);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}

View File

@ -36,11 +36,24 @@
struct pagevec;
struct afs_call;
/*
* Partial file-locking emulation mode. (The problem being that AFS3 only
* allows whole-file locks and no upgrading/downgrading).
*/
enum afs_flock_mode {
afs_flock_mode_unset,
afs_flock_mode_local, /* Local locking only */
afs_flock_mode_openafs, /* Don't get server lock for a partial lock */
afs_flock_mode_strict, /* Always get a server lock for a partial lock */
afs_flock_mode_write, /* Get an exclusive server lock for a partial lock */
};
struct afs_fs_context {
bool force; /* T to force cell type */
bool autocell; /* T if set auto mount operation */
bool dyn_root; /* T if dynamic root */
bool no_cell; /* T if the source is "none" (for dynroot) */
enum afs_flock_mode flock_mode; /* Partial file-locking emulation mode */
afs_voltype_t type; /* type of volume requested */
unsigned int volnamesz; /* size of volume name */
const char *volname; /* name of volume to mount */
@ -221,6 +234,7 @@ struct afs_super_info {
struct net *net_ns; /* Network namespace */
struct afs_cell *cell; /* The cell in which the volume resides */
struct afs_volume *volume; /* volume record */
enum afs_flock_mode flock_mode:8; /* File locking emulation mode */
bool dyn_root; /* True if dynamic root */
};

View File

@ -67,19 +67,30 @@ static atomic_t afs_count_active_inodes;
enum afs_param {
Opt_autocell,
Opt_dyn,
Opt_flock,
Opt_source,
};
static const struct fs_parameter_spec afs_param_specs[] = {
fsparam_flag ("autocell", Opt_autocell),
fsparam_flag ("dyn", Opt_dyn),
fsparam_enum ("flock", Opt_flock),
fsparam_string("source", Opt_source),
{}
};
static const struct fs_parameter_enum afs_param_enums[] = {
{ Opt_flock, "local", afs_flock_mode_local },
{ Opt_flock, "openafs", afs_flock_mode_openafs },
{ Opt_flock, "strict", afs_flock_mode_strict },
{ Opt_flock, "write", afs_flock_mode_write },
{}
};
static const struct fs_parameter_description afs_fs_parameters = {
.name = "kAFS",
.specs = afs_param_specs,
.enums = afs_param_enums,
};
/*
@ -182,11 +193,22 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root)
static int afs_show_options(struct seq_file *m, struct dentry *root)
{
struct afs_super_info *as = AFS_FS_S(root->d_sb);
const char *p = NULL;
if (as->dyn_root)
seq_puts(m, ",dyn");
if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags))
seq_puts(m, ",autocell");
switch (as->flock_mode) {
case afs_flock_mode_unset: break;
case afs_flock_mode_local: p = "local"; break;
case afs_flock_mode_openafs: p = "openafs"; break;
case afs_flock_mode_strict: p = "strict"; break;
case afs_flock_mode_write: p = "write"; break;
}
if (p)
seq_printf(m, ",flock=%s", p);
return 0;
}
@ -315,6 +337,10 @@ static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param)
ctx->dyn_root = true;
break;
case Opt_flock:
ctx->flock_mode = result.uint_32;
break;
default:
return -EINVAL;
}
@ -466,6 +492,7 @@ static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc)
as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
if (as) {
as->net_ns = get_net(fc->net_ns);
as->flock_mode = ctx->flock_mode;
if (ctx->dyn_root) {
as->dyn_root = true;
} else {

View File

@ -1860,7 +1860,7 @@ int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
yfs_check_req(call, bp);
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
trace_afs_make_fs_calli(call, &vnode->fid, type);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}

View File

@ -539,6 +539,41 @@ TRACE_EVENT(afs_make_fs_call,
__print_symbolic(__entry->op, afs_fs_operations))
);
TRACE_EVENT(afs_make_fs_calli,
TP_PROTO(struct afs_call *call, const struct afs_fid *fid,
unsigned int i),
TP_ARGS(call, fid, i),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(unsigned int, i )
__field(enum afs_fs_operation, op )
__field_struct(struct afs_fid, fid )
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->i = i;
__entry->op = call->operation_ID;
if (fid) {
__entry->fid = *fid;
} else {
__entry->fid.vid = 0;
__entry->fid.vnode = 0;
__entry->fid.unique = 0;
}
),
TP_printk("c=%08x %06llx:%06llx:%06x %s i=%u",
__entry->call,
__entry->fid.vid,
__entry->fid.vnode,
__entry->fid.unique,
__print_symbolic(__entry->op, afs_fs_operations),
__entry->i)
);
TRACE_EVENT(afs_make_fs_call1,
TP_PROTO(struct afs_call *call, const struct afs_fid *fid,
const char *name),