a66d2c8f7e
Pull the big VFS changes from Al Viro: "This one is *big* and changes quite a few things around VFS. What's in there: - the first of two really major architecture changes - death to open intents. The former is finally there; it was very long in making, but with Miklos getting through really hard and messy final push in fs/namei.c, we finally have it. Unlike his variant, this one doesn't introduce struct opendata; what we have instead is ->atomic_open() taking preallocated struct file * and passing everything via its fields. Instead of returning struct file *, it returns -E... on error, 0 on success and 1 in "deal with it yourself" case (e.g. symlink found on server, etc.). See comments before fs/namei.c:atomic_open(). That made a lot of goodies finally possible and quite a few are in that pile: ->lookup(), ->d_revalidate() and ->create() do not get struct nameidata * anymore; ->lookup() and ->d_revalidate() get lookup flags instead, ->create() gets "do we want it exclusive" flag. With the introduction of new helper (kern_path_locked()) we are rid of all struct nameidata instances outside of fs/namei.c; it's still visible in namei.h, but not for long. Come the next cycle, declaration will move either to fs/internal.h or to fs/namei.c itself. [me, miklos, hch] - The second major change: behaviour of final fput(). Now we have __fput() done without any locks held by caller *and* not from deep in call stack. That obviously lifts a lot of constraints on the locking in there. Moreover, it's legal now to call fput() from atomic contexts (which has immediately simplified life for aio.c). We also don't need anti-recursion logics in __scm_destroy() anymore. There is a price, though - the damn thing has become partially asynchronous. For fput() from normal process we are guaranteed that pending __fput() will be done before the caller returns to userland, exits or gets stopped for ptrace. For kernel threads and atomic contexts it's done via schedule_work(), so theoretically we might need a way to make sure it's finished; so far only one such place had been found, but there might be more. There's flush_delayed_fput() (do all pending __fput()) and there's __fput_sync() (fput() analog doing __fput() immediately). I hope we won't need them often; see warnings in fs/file_table.c for details. [me, based on task_work series from Oleg merged last cycle] - sync series from Jan - large part of "death to sync_supers()" work from Artem; the only bits missing here are exofs and ext4 ones. As far as I understand, those are going via the exofs and ext4 trees resp.; once they are in, we can put ->write_super() to the rest, along with the thread calling it. - preparatory bits from unionmount series (from dhowells). - assorted cleanups and fixes all over the place, as usual. This is not the last pile for this cycle; there's at least jlayton's ESTALE work and fsfreeze series (the latter - in dire need of fixes, so I'm not sure it'll make the cut this cycle). I'll probably throw symlink/hardlink restrictions stuff from Kees into the next pile, too. Plus there's a lot of misc patches I hadn't thrown into that one - it's large enough as it is..." * 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (127 commits) ext4: switch EXT4_IOC_RESIZE_FS to mnt_want_write_file() btrfs: switch btrfs_ioctl_balance() to mnt_want_write_file() switch dentry_open() to struct path, make it grab references itself spufs: shift dget/mntget towards dentry_open() zoran: don't bother with struct file * in zoran_map ecryptfs: don't reinvent the wheels, please - use struct completion don't expose I_NEW inodes via dentry->d_inode tidy up namei.c a bit unobfuscate follow_up() a bit ext3: pass custom EOF to generic_file_llseek_size() ext4: use core vfs llseek code for dir seeks vfs: allow custom EOF in generic_file_llseek code vfs: Avoid unnecessary WB_SYNC_NONE writeback during sys_sync and reorder sync passes vfs: Remove unnecessary flushing of block devices vfs: Make sys_sync writeout also block device inodes vfs: Create function for iterating over block devices vfs: Reorder operations during sys_sync quota: Move quota syncing to ->sync_fs method quota: Split dquot_quota_sync() to writeback and cache flushing part vfs: Move noop_backing_dev_info check from sync into writeback ...
516 lines
12 KiB
C
516 lines
12 KiB
C
/*
|
|
* linux/fs/ext4/ioctl.c
|
|
*
|
|
* Copyright (C) 1993, 1994, 1995
|
|
* Remy Card (card@masi.ibp.fr)
|
|
* Laboratoire MASI - Institut Blaise Pascal
|
|
* Universite Pierre et Marie Curie (Paris VI)
|
|
*/
|
|
|
|
#include <linux/fs.h>
|
|
#include <linux/jbd2.h>
|
|
#include <linux/capability.h>
|
|
#include <linux/time.h>
|
|
#include <linux/compat.h>
|
|
#include <linux/mount.h>
|
|
#include <linux/file.h>
|
|
#include <asm/uaccess.h>
|
|
#include "ext4_jbd2.h"
|
|
#include "ext4.h"
|
|
|
|
#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
|
|
|
|
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
|
{
|
|
struct inode *inode = filp->f_dentry->d_inode;
|
|
struct super_block *sb = inode->i_sb;
|
|
struct ext4_inode_info *ei = EXT4_I(inode);
|
|
unsigned int flags;
|
|
|
|
ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
|
|
|
|
switch (cmd) {
|
|
case EXT4_IOC_GETFLAGS:
|
|
ext4_get_inode_flags(ei);
|
|
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
|
|
return put_user(flags, (int __user *) arg);
|
|
case EXT4_IOC_SETFLAGS: {
|
|
handle_t *handle = NULL;
|
|
int err, migrate = 0;
|
|
struct ext4_iloc iloc;
|
|
unsigned int oldflags, mask, i;
|
|
unsigned int jflag;
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
return -EACCES;
|
|
|
|
if (get_user(flags, (int __user *) arg))
|
|
return -EFAULT;
|
|
|
|
err = mnt_want_write_file(filp);
|
|
if (err)
|
|
return err;
|
|
|
|
flags = ext4_mask_flags(inode->i_mode, flags);
|
|
|
|
err = -EPERM;
|
|
mutex_lock(&inode->i_mutex);
|
|
/* Is it quota file? Do not allow user to mess with it */
|
|
if (IS_NOQUOTA(inode))
|
|
goto flags_out;
|
|
|
|
oldflags = ei->i_flags;
|
|
|
|
/* The JOURNAL_DATA flag is modifiable only by root */
|
|
jflag = flags & EXT4_JOURNAL_DATA_FL;
|
|
|
|
/*
|
|
* The IMMUTABLE and APPEND_ONLY flags can only be changed by
|
|
* the relevant capability.
|
|
*
|
|
* This test looks nicer. Thanks to Pauline Middelink
|
|
*/
|
|
if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
|
|
if (!capable(CAP_LINUX_IMMUTABLE))
|
|
goto flags_out;
|
|
}
|
|
|
|
/*
|
|
* The JOURNAL_DATA flag can only be changed by
|
|
* the relevant capability.
|
|
*/
|
|
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
|
|
if (!capable(CAP_SYS_RESOURCE))
|
|
goto flags_out;
|
|
}
|
|
if (oldflags & EXT4_EXTENTS_FL) {
|
|
/* We don't support clearning extent flags */
|
|
if (!(flags & EXT4_EXTENTS_FL)) {
|
|
err = -EOPNOTSUPP;
|
|
goto flags_out;
|
|
}
|
|
} else if (flags & EXT4_EXTENTS_FL) {
|
|
/* migrate the file */
|
|
migrate = 1;
|
|
flags &= ~EXT4_EXTENTS_FL;
|
|
}
|
|
|
|
if (flags & EXT4_EOFBLOCKS_FL) {
|
|
/* we don't support adding EOFBLOCKS flag */
|
|
if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
|
|
err = -EOPNOTSUPP;
|
|
goto flags_out;
|
|
}
|
|
} else if (oldflags & EXT4_EOFBLOCKS_FL)
|
|
ext4_truncate(inode);
|
|
|
|
handle = ext4_journal_start(inode, 1);
|
|
if (IS_ERR(handle)) {
|
|
err = PTR_ERR(handle);
|
|
goto flags_out;
|
|
}
|
|
if (IS_SYNC(inode))
|
|
ext4_handle_sync(handle);
|
|
err = ext4_reserve_inode_write(handle, inode, &iloc);
|
|
if (err)
|
|
goto flags_err;
|
|
|
|
for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
|
|
if (!(mask & EXT4_FL_USER_MODIFIABLE))
|
|
continue;
|
|
if (mask & flags)
|
|
ext4_set_inode_flag(inode, i);
|
|
else
|
|
ext4_clear_inode_flag(inode, i);
|
|
}
|
|
|
|
ext4_set_inode_flags(inode);
|
|
inode->i_ctime = ext4_current_time(inode);
|
|
|
|
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
|
|
flags_err:
|
|
ext4_journal_stop(handle);
|
|
if (err)
|
|
goto flags_out;
|
|
|
|
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
|
|
err = ext4_change_inode_journal_flag(inode, jflag);
|
|
if (err)
|
|
goto flags_out;
|
|
if (migrate)
|
|
err = ext4_ext_migrate(inode);
|
|
flags_out:
|
|
mutex_unlock(&inode->i_mutex);
|
|
mnt_drop_write_file(filp);
|
|
return err;
|
|
}
|
|
case EXT4_IOC_GETVERSION:
|
|
case EXT4_IOC_GETVERSION_OLD:
|
|
return put_user(inode->i_generation, (int __user *) arg);
|
|
case EXT4_IOC_SETVERSION:
|
|
case EXT4_IOC_SETVERSION_OLD: {
|
|
handle_t *handle;
|
|
struct ext4_iloc iloc;
|
|
__u32 generation;
|
|
int err;
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
return -EPERM;
|
|
|
|
if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
|
|
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
|
|
ext4_warning(sb, "Setting inode version is not "
|
|
"supported with metadata_csum enabled.");
|
|
return -ENOTTY;
|
|
}
|
|
|
|
err = mnt_want_write_file(filp);
|
|
if (err)
|
|
return err;
|
|
if (get_user(generation, (int __user *) arg)) {
|
|
err = -EFAULT;
|
|
goto setversion_out;
|
|
}
|
|
|
|
mutex_lock(&inode->i_mutex);
|
|
handle = ext4_journal_start(inode, 1);
|
|
if (IS_ERR(handle)) {
|
|
err = PTR_ERR(handle);
|
|
goto unlock_out;
|
|
}
|
|
err = ext4_reserve_inode_write(handle, inode, &iloc);
|
|
if (err == 0) {
|
|
inode->i_ctime = ext4_current_time(inode);
|
|
inode->i_generation = generation;
|
|
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
|
|
}
|
|
ext4_journal_stop(handle);
|
|
|
|
unlock_out:
|
|
mutex_unlock(&inode->i_mutex);
|
|
setversion_out:
|
|
mnt_drop_write_file(filp);
|
|
return err;
|
|
}
|
|
case EXT4_IOC_GROUP_EXTEND: {
|
|
ext4_fsblk_t n_blocks_count;
|
|
int err, err2=0;
|
|
|
|
err = ext4_resize_begin(sb);
|
|
if (err)
|
|
return err;
|
|
|
|
if (get_user(n_blocks_count, (__u32 __user *)arg)) {
|
|
err = -EFAULT;
|
|
goto group_extend_out;
|
|
}
|
|
|
|
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
|
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
|
|
ext4_msg(sb, KERN_ERR,
|
|
"Online resizing not supported with bigalloc");
|
|
err = -EOPNOTSUPP;
|
|
goto group_extend_out;
|
|
}
|
|
|
|
err = mnt_want_write_file(filp);
|
|
if (err)
|
|
goto group_extend_out;
|
|
|
|
err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
|
|
if (EXT4_SB(sb)->s_journal) {
|
|
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
|
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
|
|
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
|
}
|
|
if (err == 0)
|
|
err = err2;
|
|
mnt_drop_write_file(filp);
|
|
group_extend_out:
|
|
ext4_resize_end(sb);
|
|
return err;
|
|
}
|
|
|
|
case EXT4_IOC_MOVE_EXT: {
|
|
struct move_extent me;
|
|
struct file *donor_filp;
|
|
int err;
|
|
|
|
if (!(filp->f_mode & FMODE_READ) ||
|
|
!(filp->f_mode & FMODE_WRITE))
|
|
return -EBADF;
|
|
|
|
if (copy_from_user(&me,
|
|
(struct move_extent __user *)arg, sizeof(me)))
|
|
return -EFAULT;
|
|
me.moved_len = 0;
|
|
|
|
donor_filp = fget(me.donor_fd);
|
|
if (!donor_filp)
|
|
return -EBADF;
|
|
|
|
if (!(donor_filp->f_mode & FMODE_WRITE)) {
|
|
err = -EBADF;
|
|
goto mext_out;
|
|
}
|
|
|
|
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
|
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
|
|
ext4_msg(sb, KERN_ERR,
|
|
"Online defrag not supported with bigalloc");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
err = mnt_want_write_file(filp);
|
|
if (err)
|
|
goto mext_out;
|
|
|
|
err = ext4_move_extents(filp, donor_filp, me.orig_start,
|
|
me.donor_start, me.len, &me.moved_len);
|
|
mnt_drop_write_file(filp);
|
|
|
|
if (copy_to_user((struct move_extent __user *)arg,
|
|
&me, sizeof(me)))
|
|
err = -EFAULT;
|
|
mext_out:
|
|
fput(donor_filp);
|
|
return err;
|
|
}
|
|
|
|
case EXT4_IOC_GROUP_ADD: {
|
|
struct ext4_new_group_data input;
|
|
int err, err2=0;
|
|
|
|
err = ext4_resize_begin(sb);
|
|
if (err)
|
|
return err;
|
|
|
|
if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
|
|
sizeof(input))) {
|
|
err = -EFAULT;
|
|
goto group_add_out;
|
|
}
|
|
|
|
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
|
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
|
|
ext4_msg(sb, KERN_ERR,
|
|
"Online resizing not supported with bigalloc");
|
|
err = -EOPNOTSUPP;
|
|
goto group_add_out;
|
|
}
|
|
|
|
err = mnt_want_write_file(filp);
|
|
if (err)
|
|
goto group_add_out;
|
|
|
|
err = ext4_group_add(sb, &input);
|
|
if (EXT4_SB(sb)->s_journal) {
|
|
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
|
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
|
|
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
|
}
|
|
if (err == 0)
|
|
err = err2;
|
|
mnt_drop_write_file(filp);
|
|
group_add_out:
|
|
ext4_resize_end(sb);
|
|
return err;
|
|
}
|
|
|
|
case EXT4_IOC_MIGRATE:
|
|
{
|
|
int err;
|
|
if (!inode_owner_or_capable(inode))
|
|
return -EACCES;
|
|
|
|
err = mnt_want_write_file(filp);
|
|
if (err)
|
|
return err;
|
|
/*
|
|
* inode_mutex prevent write and truncate on the file.
|
|
* Read still goes through. We take i_data_sem in
|
|
* ext4_ext_swap_inode_data before we switch the
|
|
* inode format to prevent read.
|
|
*/
|
|
mutex_lock(&(inode->i_mutex));
|
|
err = ext4_ext_migrate(inode);
|
|
mutex_unlock(&(inode->i_mutex));
|
|
mnt_drop_write_file(filp);
|
|
return err;
|
|
}
|
|
|
|
case EXT4_IOC_ALLOC_DA_BLKS:
|
|
{
|
|
int err;
|
|
if (!inode_owner_or_capable(inode))
|
|
return -EACCES;
|
|
|
|
err = mnt_want_write_file(filp);
|
|
if (err)
|
|
return err;
|
|
err = ext4_alloc_da_blocks(inode);
|
|
mnt_drop_write_file(filp);
|
|
return err;
|
|
}
|
|
|
|
case EXT4_IOC_RESIZE_FS: {
|
|
ext4_fsblk_t n_blocks_count;
|
|
struct super_block *sb = inode->i_sb;
|
|
int err = 0, err2 = 0;
|
|
|
|
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
|
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
|
|
ext4_msg(sb, KERN_ERR,
|
|
"Online resizing not (yet) supported with bigalloc");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
|
|
EXT4_FEATURE_INCOMPAT_META_BG)) {
|
|
ext4_msg(sb, KERN_ERR,
|
|
"Online resizing not (yet) supported with meta_bg");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
|
|
sizeof(__u64))) {
|
|
return -EFAULT;
|
|
}
|
|
|
|
if (n_blocks_count > MAX_32_NUM &&
|
|
!EXT4_HAS_INCOMPAT_FEATURE(sb,
|
|
EXT4_FEATURE_INCOMPAT_64BIT)) {
|
|
ext4_msg(sb, KERN_ERR,
|
|
"File system only supports 32-bit block numbers");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
err = ext4_resize_begin(sb);
|
|
if (err)
|
|
return err;
|
|
|
|
err = mnt_want_write_file(filp);
|
|
if (err)
|
|
goto resizefs_out;
|
|
|
|
err = ext4_resize_fs(sb, n_blocks_count);
|
|
if (EXT4_SB(sb)->s_journal) {
|
|
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
|
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
|
|
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
|
}
|
|
if (err == 0)
|
|
err = err2;
|
|
mnt_drop_write_file(filp);
|
|
resizefs_out:
|
|
ext4_resize_end(sb);
|
|
return err;
|
|
}
|
|
|
|
case FITRIM:
|
|
{
|
|
struct request_queue *q = bdev_get_queue(sb->s_bdev);
|
|
struct fstrim_range range;
|
|
int ret = 0;
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
return -EPERM;
|
|
|
|
if (!blk_queue_discard(q))
|
|
return -EOPNOTSUPP;
|
|
|
|
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
|
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
|
|
ext4_msg(sb, KERN_ERR,
|
|
"FITRIM not supported with bigalloc");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
if (copy_from_user(&range, (struct fstrim_range __user *)arg,
|
|
sizeof(range)))
|
|
return -EFAULT;
|
|
|
|
range.minlen = max((unsigned int)range.minlen,
|
|
q->limits.discard_granularity);
|
|
ret = ext4_trim_fs(sb, &range);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
if (copy_to_user((struct fstrim_range __user *)arg, &range,
|
|
sizeof(range)))
|
|
return -EFAULT;
|
|
|
|
return 0;
|
|
}
|
|
|
|
default:
|
|
return -ENOTTY;
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
{
|
|
/* These are just misnamed, they actually get/put from/to user an int */
|
|
switch (cmd) {
|
|
case EXT4_IOC32_GETFLAGS:
|
|
cmd = EXT4_IOC_GETFLAGS;
|
|
break;
|
|
case EXT4_IOC32_SETFLAGS:
|
|
cmd = EXT4_IOC_SETFLAGS;
|
|
break;
|
|
case EXT4_IOC32_GETVERSION:
|
|
cmd = EXT4_IOC_GETVERSION;
|
|
break;
|
|
case EXT4_IOC32_SETVERSION:
|
|
cmd = EXT4_IOC_SETVERSION;
|
|
break;
|
|
case EXT4_IOC32_GROUP_EXTEND:
|
|
cmd = EXT4_IOC_GROUP_EXTEND;
|
|
break;
|
|
case EXT4_IOC32_GETVERSION_OLD:
|
|
cmd = EXT4_IOC_GETVERSION_OLD;
|
|
break;
|
|
case EXT4_IOC32_SETVERSION_OLD:
|
|
cmd = EXT4_IOC_SETVERSION_OLD;
|
|
break;
|
|
case EXT4_IOC32_GETRSVSZ:
|
|
cmd = EXT4_IOC_GETRSVSZ;
|
|
break;
|
|
case EXT4_IOC32_SETRSVSZ:
|
|
cmd = EXT4_IOC_SETRSVSZ;
|
|
break;
|
|
case EXT4_IOC32_GROUP_ADD: {
|
|
struct compat_ext4_new_group_input __user *uinput;
|
|
struct ext4_new_group_input input;
|
|
mm_segment_t old_fs;
|
|
int err;
|
|
|
|
uinput = compat_ptr(arg);
|
|
err = get_user(input.group, &uinput->group);
|
|
err |= get_user(input.block_bitmap, &uinput->block_bitmap);
|
|
err |= get_user(input.inode_bitmap, &uinput->inode_bitmap);
|
|
err |= get_user(input.inode_table, &uinput->inode_table);
|
|
err |= get_user(input.blocks_count, &uinput->blocks_count);
|
|
err |= get_user(input.reserved_blocks,
|
|
&uinput->reserved_blocks);
|
|
if (err)
|
|
return -EFAULT;
|
|
old_fs = get_fs();
|
|
set_fs(KERNEL_DS);
|
|
err = ext4_ioctl(file, EXT4_IOC_GROUP_ADD,
|
|
(unsigned long) &input);
|
|
set_fs(old_fs);
|
|
return err;
|
|
}
|
|
case EXT4_IOC_MOVE_EXT:
|
|
case FITRIM:
|
|
case EXT4_IOC_RESIZE_FS:
|
|
break;
|
|
default:
|
|
return -ENOIOCTLCMD;
|
|
}
|
|
return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
|
|
}
|
|
#endif
|