From 3fdd94acd50d607cf6a971455307e711fd8ee16e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 6 Jan 2019 15:05:40 +0100 Subject: [PATCH 01/26] binderfs: remove wrong kern_mount() call The binderfs filesystem never needs to be mounted by the kernel itself. This is conceptually wrong and should never have been done in the first place. Fixes: 3ad20fe393b ("binder: implement binderfs") Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 7496b10532aa..6f68d6217eb3 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -40,8 +40,6 @@ #define INTSTRLEN 21 #define BINDERFS_MAX_MINOR (1U << MINORBITS) -static struct vfsmount *binderfs_mnt; - static dev_t binderfs_dev; static DEFINE_MUTEX(binderfs_minors_mutex); static DEFINE_IDA(binderfs_minors); @@ -530,14 +528,6 @@ static int __init init_binderfs(void) return ret; } - binderfs_mnt = kern_mount(&binder_fs_type); - if (IS_ERR(binderfs_mnt)) { - ret = PTR_ERR(binderfs_mnt); - binderfs_mnt = NULL; - unregister_filesystem(&binder_fs_type); - unregister_chrdev_region(binderfs_dev, BINDERFS_MAX_MINOR); - } - return ret; } From b6c770d7c9dc7185b17d53a9d5ca1278c182d6fa Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 6 Jan 2019 15:05:41 +0100 Subject: [PATCH 02/26] binderfs: make each binderfs mount a new instance When currently mounting binderfs in the same ipc namespace twice: mount -t binder binder /A mount -t binder binder /B then the binderfs instances mounted on /A and /B will be the same, i.e. they will have the same superblock. This was the first approach that seemed reasonable. However, this leads to some problems and inconsistencies: /* private binderfs instance in same ipc namespace */ There is no way for a user to request a private binderfs instance in the same ipc namespace. This request has been made in a private mail to me by two independent people. /* bind-mounts */ If users want the same binderfs instance to appear in multiple places they can use bind mounts. So there is no value in having a request for a new binderfs mount giving them the same instance. /* unexpected behavior */ It's surprising that request to mount binderfs is not giving the user a new instance like tmpfs, devpts, ramfs, and others do. /* past mistakes */ Other pseudo-filesystems once made the same mistakes of giving back the same superblock when actually requesting a new mount (cf. devpts's deprecated "newinstance" option). We should not make the same mistake. Once we've committed to always giving back the same superblock in the same IPC namespace with the next kernel release we will not be able to make that change so better to do it now. /* kdbusfs */ It was pointed out to me that kdbusfs - which is conceptually closely related to binderfs - also allowed users to get a private kdbusfs instance in the same IPC namespace by making each mount of kdbusfs a separate instance. I think that makes a lot of sense. Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 41 ++------------------------------------ 1 file changed, 2 insertions(+), 39 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 6f68d6217eb3..4990d65d4850 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -379,7 +379,7 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) struct binderfs_info *info; int ret = -ENOMEM; struct inode *inode = NULL; - struct ipc_namespace *ipc_ns = sb->s_fs_info; + struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; get_ipc_ns(ipc_ns); @@ -450,48 +450,11 @@ err_without_dentry: return ret; } -static int binderfs_test_super(struct super_block *sb, void *data) -{ - struct binderfs_info *info = sb->s_fs_info; - - if (info) - return info->ipc_ns == data; - - return 0; -} - -static int binderfs_set_super(struct super_block *sb, void *data) -{ - sb->s_fs_info = data; - return set_anon_super(sb, NULL); -} - static struct dentry *binderfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - struct super_block *sb; - struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; - - if (!ns_capable(ipc_ns->user_ns, CAP_SYS_ADMIN)) - return ERR_PTR(-EPERM); - - sb = sget_userns(fs_type, binderfs_test_super, binderfs_set_super, - flags, ipc_ns->user_ns, ipc_ns); - if (IS_ERR(sb)) - return ERR_CAST(sb); - - if (!sb->s_root) { - int ret = binderfs_fill_super(sb, data, flags & SB_SILENT ? 1 : 0); - if (ret) { - deactivate_locked_super(sb); - return ERR_PTR(ret); - } - - sb->s_flags |= SB_ACTIVE; - } - - return dget(sb->s_root); + return mount_nodev(fs_type, flags, data, binderfs_fill_super); } static void binderfs_kill_super(struct super_block *sb) From ba50bf1ce9a51fc97db58b96d01306aa70bc3979 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 17 Dec 2018 20:16:09 +0000 Subject: [PATCH 03/26] Drivers: hv: vmbus: Check for ring when getting debug info fc96df16a1ce is good and can already fix the "return stack garbage" issue, but let's also improve hv_ringbuffer_get_debuginfo(), which would silently return stack garbage, if people forget to check channel->state or ring_info->ring_buffer, when using the function in the future. Having an error check in the function would eliminate the potential risk. Add a Fixes tag to indicate the patch depdendency. Fixes: fc96df16a1ce ("Drivers: hv: vmbus: Return -EINVAL for the sys files for unopened channels") Cc: stable@vger.kernel.org Cc: K. Y. Srinivasan Cc: Haiyang Zhang Signed-off-by: Stephen Hemminger Signed-off-by: Dexuan Cui Signed-off-by: Sasha Levin --- drivers/hv/ring_buffer.c | 29 +++++++------ drivers/hv/vmbus_drv.c | 91 +++++++++++++++++++++++++++------------- include/linux/hyperv.h | 5 ++- 3 files changed, 78 insertions(+), 47 deletions(-) diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 64d0c85d5161..1f1a55e07733 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -164,26 +164,25 @@ hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi, } /* Get various debug metrics for the specified ring buffer. */ -void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, - struct hv_ring_buffer_debug_info *debug_info) +int hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, + struct hv_ring_buffer_debug_info *debug_info) { u32 bytes_avail_towrite; u32 bytes_avail_toread; - if (ring_info->ring_buffer) { - hv_get_ringbuffer_availbytes(ring_info, - &bytes_avail_toread, - &bytes_avail_towrite); + if (!ring_info->ring_buffer) + return -EINVAL; - debug_info->bytes_avail_toread = bytes_avail_toread; - debug_info->bytes_avail_towrite = bytes_avail_towrite; - debug_info->current_read_index = - ring_info->ring_buffer->read_index; - debug_info->current_write_index = - ring_info->ring_buffer->write_index; - debug_info->current_interrupt_mask = - ring_info->ring_buffer->interrupt_mask; - } + hv_get_ringbuffer_availbytes(ring_info, + &bytes_avail_toread, + &bytes_avail_towrite); + debug_info->bytes_avail_toread = bytes_avail_toread; + debug_info->bytes_avail_towrite = bytes_avail_towrite; + debug_info->current_read_index = ring_info->ring_buffer->read_index; + debug_info->current_write_index = ring_info->ring_buffer->write_index; + debug_info->current_interrupt_mask + = ring_info->ring_buffer->interrupt_mask; + return 0; } EXPORT_SYMBOL_GPL(hv_ringbuffer_get_debuginfo); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index d0ff65675292..403fee01572c 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -313,12 +313,16 @@ static ssize_t out_intr_mask_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", outbound.current_interrupt_mask); } static DEVICE_ATTR_RO(out_intr_mask); @@ -328,12 +332,15 @@ static ssize_t out_read_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.current_read_index); } static DEVICE_ATTR_RO(out_read_index); @@ -344,12 +351,15 @@ static ssize_t out_write_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.current_write_index); } static DEVICE_ATTR_RO(out_write_index); @@ -360,12 +370,15 @@ static ssize_t out_read_bytes_avail_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.bytes_avail_toread); } static DEVICE_ATTR_RO(out_read_bytes_avail); @@ -376,12 +389,15 @@ static ssize_t out_write_bytes_avail_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); } static DEVICE_ATTR_RO(out_write_bytes_avail); @@ -391,12 +407,15 @@ static ssize_t in_intr_mask_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.current_interrupt_mask); } static DEVICE_ATTR_RO(in_intr_mask); @@ -406,12 +425,15 @@ static ssize_t in_read_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.current_read_index); } static DEVICE_ATTR_RO(in_read_index); @@ -421,12 +443,15 @@ static ssize_t in_write_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.current_write_index); } static DEVICE_ATTR_RO(in_write_index); @@ -437,12 +462,15 @@ static ssize_t in_read_bytes_avail_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.bytes_avail_toread); } static DEVICE_ATTR_RO(in_read_bytes_avail); @@ -453,12 +481,15 @@ static ssize_t in_write_bytes_avail_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); } static DEVICE_ATTR_RO(in_write_bytes_avail); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index f0885cc01db6..dcb6977afce9 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1159,8 +1159,9 @@ struct hv_ring_buffer_debug_info { u32 bytes_avail_towrite; }; -void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, - struct hv_ring_buffer_debug_info *debug_info); + +int hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, + struct hv_ring_buffer_debug_info *debug_info); /* Vmbus interface */ #define vmbus_driver_register(driver) \ From da8ced360ca8ad72d8f41f5c8fcd5b0e63e1555f Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 4 Jan 2019 15:19:42 +0100 Subject: [PATCH 04/26] hv_balloon: avoid touching uninitialized struct page during tail onlining Hyper-V memory hotplug protocol has 2M granularity and in Linux x86 we use 128M. To deal with it we implement partial section onlining by registering custom page onlining callback (hv_online_page()). Later, when more memory arrives we try to online the 'tail' (see hv_bring_pgs_online()). It was found that in some cases this 'tail' onlining causes issues: BUG: Bad page state in process kworker/0:2 pfn:109e3a page:ffffe08344278e80 count:0 mapcount:1 mapping:0000000000000000 index:0x0 flags: 0xfffff80000000() raw: 000fffff80000000 dead000000000100 dead000000000200 0000000000000000 raw: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 page dumped because: nonzero mapcount ... Workqueue: events hot_add_req [hv_balloon] Call Trace: dump_stack+0x5c/0x80 bad_page.cold.112+0x7f/0xb2 free_pcppages_bulk+0x4b8/0x690 free_unref_page+0x54/0x70 hv_page_online_one+0x5c/0x80 [hv_balloon] hot_add_req.cold.24+0x182/0x835 [hv_balloon] ... Turns out that we now have deferred struct page initialization for memory hotplug so e.g. memory_block_action() in drivers/base/memory.c does pages_correctly_probed() check and in that check it avoids inspecting struct pages and checks sections instead. But in Hyper-V balloon driver we do PageReserved(pfn_to_page()) check and this is now wrong. Switch to checking online_section_nr() instead. Signed-off-by: Vitaly Kuznetsov Cc: stable@kernel.org Signed-off-by: Sasha Levin --- drivers/hv/hv_balloon.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 5301fef16c31..7c6349a50ef1 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -888,12 +888,14 @@ static unsigned long handle_pg_range(unsigned long pg_start, pfn_cnt -= pgs_ol; /* * Check if the corresponding memory block is already - * online by checking its last previously backed page. - * In case it is we need to bring rest (which was not - * backed previously) online too. + * online. It is possible to observe struct pages still + * being uninitialized here so check section instead. + * In case the section is online we need to bring the + * rest of pfns (which were not backed previously) + * online too. */ if (start_pfn > has->start_pfn && - !PageReserved(pfn_to_page(start_pfn - 1))) + online_section_nr(pfn_to_section_nr(start_pfn))) hv_bring_pgs_online(has, start_pfn, pgs_ol); } From b5679cebf780c6f1c2451a73bf1842a4409840e7 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 9 Jan 2019 20:56:06 +0000 Subject: [PATCH 05/26] vmbus: fix subchannel removal The changes to split ring allocation from open/close, broke the cleanup of subchannels. This resulted in problems using uio on network devices because the subchannel was left behind when the network device was unbound. The cause was in the disconnect logic which used list splice to move the subchannel list into a local variable. This won't work because the subchannel list is needed later during the process of the rescind messages (relid2channel). The fix is to just leave the subchannel list in place which is what the original code did. The list is cleaned up later when the host rescind is processed. Without the fix, we have a lot of "hang" issues in netvsc when we try to change the NIC's MTU, set the number of channels, etc. Fixes: ae6935ed7d42 ("vmbus: split ring buffer allocation from open") Cc: stable@vger.kernel.org Signed-off-by: Stephen Hemminger Signed-off-by: Dexuan Cui Signed-off-by: Sasha Levin --- drivers/hv/channel.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index ce0ba2062723..bea4c9850247 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -701,19 +701,12 @@ static int vmbus_close_internal(struct vmbus_channel *channel) int vmbus_disconnect_ring(struct vmbus_channel *channel) { struct vmbus_channel *cur_channel, *tmp; - unsigned long flags; - LIST_HEAD(list); int ret; if (channel->primary_channel != NULL) return -EINVAL; - /* Snapshot the list of subchannels */ - spin_lock_irqsave(&channel->lock, flags); - list_splice_init(&channel->sc_list, &list); - spin_unlock_irqrestore(&channel->lock, flags); - - list_for_each_entry_safe(cur_channel, tmp, &list, sc_list) { + list_for_each_entry_safe(cur_channel, tmp, &channel->sc_list, sc_list) { if (cur_channel->rescind) wait_for_completion(&cur_channel->rescind_event); From 849d540ddfcd4f232f3b2cf40a2e07eccbd6212c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 2 Jan 2019 12:32:18 +0100 Subject: [PATCH 06/26] binderfs: implement "max" mount option Since binderfs can be mounted by userns root in non-initial user namespaces some precautions are in order. First, a way to set a maximum on the number of binder devices that can be allocated per binderfs instance and second, a way to reserve a reasonable chunk of binderfs devices for the initial ipc namespace. A first approach as seen in [1] used sysctls similiar to devpts but was shown to be flawed (cf. [2] and [3]) since some aspects were unneeded. This is an alternative approach which avoids sysctls completely and instead switches to a single mount option. Starting with this commit binderfs instances can be mounted with a limit on the number of binder devices that can be allocated. The max= mount option serves as a per-instance limit. If max= is set then only number of binder devices can be allocated in this binderfs instance. This allows to safely bind-mount binderfs instances into unprivileged user namespaces since userns root in a non-initial user namespace cannot change the mount option as long as it does not own the mount namespace the binderfs mount was created in and hence cannot drain the host of minor device numbers [1]: https://lore.kernel.org/lkml/20181221133909.18794-1-christian@brauner.io/ [2]; https://lore.kernel.org/lkml/20181221163316.GA8517@kroah.com/ [3]: https://lore.kernel.org/lkml/CAHRSSEx+gDVW4fKKK8oZNAir9G5icJLyodO8hykv3O0O1jt2FQ@mail.gmail.com/ [4]: https://lore.kernel.org/lkml/20181221192044.5yvfnuri7gdop4rs@brauner.io/ Cc: Todd Kjos Cc: Greg Kroah-Hartman Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 104 ++++++++++++++++++++++++++++++++++--- 1 file changed, 98 insertions(+), 6 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 4990d65d4850..89788969bc04 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +45,24 @@ static dev_t binderfs_dev; static DEFINE_MUTEX(binderfs_minors_mutex); static DEFINE_IDA(binderfs_minors); +/** + * binderfs_mount_opts - mount options for binderfs + * @max: maximum number of allocatable binderfs binder devices + */ +struct binderfs_mount_opts { + int max; +}; + +enum { + Opt_max, + Opt_err +}; + +static const match_table_t tokens = { + { Opt_max, "max=%d" }, + { Opt_err, NULL } +}; + /** * binderfs_info - information about a binderfs mount * @ipc_ns: The ipc namespace the binderfs mount belongs to. @@ -53,13 +72,16 @@ static DEFINE_IDA(binderfs_minors); * created. * @root_gid: gid that needs to be used when a new binder device is * created. + * @mount_opts: The mount options in use. + * @device_count: The current number of allocated binder devices. */ struct binderfs_info { struct ipc_namespace *ipc_ns; struct dentry *control_dentry; kuid_t root_uid; kgid_t root_gid; - + struct binderfs_mount_opts mount_opts; + int device_count; }; static inline struct binderfs_info *BINDERFS_I(const struct inode *inode) @@ -108,10 +130,17 @@ static int binderfs_binder_device_create(struct inode *ref_inode, /* Reserve new minor number for the new device. */ mutex_lock(&binderfs_minors_mutex); - minor = ida_alloc_max(&binderfs_minors, BINDERFS_MAX_MINOR, GFP_KERNEL); - mutex_unlock(&binderfs_minors_mutex); - if (minor < 0) + if (++info->device_count <= info->mount_opts.max) + minor = ida_alloc_max(&binderfs_minors, BINDERFS_MAX_MINOR, + GFP_KERNEL); + else + minor = -ENOSPC; + if (minor < 0) { + --info->device_count; + mutex_unlock(&binderfs_minors_mutex); return minor; + } + mutex_unlock(&binderfs_minors_mutex); ret = -ENOMEM; device = kzalloc(sizeof(*device), GFP_KERNEL); @@ -185,6 +214,7 @@ err: kfree(name); kfree(device); mutex_lock(&binderfs_minors_mutex); + --info->device_count; ida_free(&binderfs_minors, minor); mutex_unlock(&binderfs_minors_mutex); iput(inode); @@ -230,6 +260,7 @@ static long binder_ctl_ioctl(struct file *file, unsigned int cmd, static void binderfs_evict_inode(struct inode *inode) { struct binder_device *device = inode->i_private; + struct binderfs_info *info = BINDERFS_I(inode); clear_inode(inode); @@ -237,6 +268,7 @@ static void binderfs_evict_inode(struct inode *inode) return; mutex_lock(&binderfs_minors_mutex); + --info->device_count; ida_free(&binderfs_minors, device->miscdev.minor); mutex_unlock(&binderfs_minors_mutex); @@ -244,9 +276,65 @@ static void binderfs_evict_inode(struct inode *inode) kfree(device); } +/** + * binderfs_parse_mount_opts - parse binderfs mount options + * @data: options to set (can be NULL in which case defaults are used) + */ +static int binderfs_parse_mount_opts(char *data, + struct binderfs_mount_opts *opts) +{ + char *p; + opts->max = BINDERFS_MAX_MINOR; + + while ((p = strsep(&data, ",")) != NULL) { + substring_t args[MAX_OPT_ARGS]; + int token; + int max_devices; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_max: + if (match_int(&args[0], &max_devices) || + (max_devices < 0 || + (max_devices > BINDERFS_MAX_MINOR))) + return -EINVAL; + + opts->max = max_devices; + break; + default: + pr_err("Invalid mount options\n"); + return -EINVAL; + } + } + + return 0; +} + +static int binderfs_remount(struct super_block *sb, int *flags, char *data) +{ + struct binderfs_info *info = sb->s_fs_info; + return binderfs_parse_mount_opts(data, &info->mount_opts); +} + +static int binderfs_show_mount_opts(struct seq_file *seq, struct dentry *root) +{ + struct binderfs_info *info; + + info = root->d_sb->s_fs_info; + if (info->mount_opts.max <= BINDERFS_MAX_MINOR) + seq_printf(seq, ",max=%d", info->mount_opts.max); + + return 0; +} + static const struct super_operations binderfs_super_ops = { - .statfs = simple_statfs, - .evict_inode = binderfs_evict_inode, + .evict_inode = binderfs_evict_inode, + .remount_fs = binderfs_remount, + .show_options = binderfs_show_mount_opts, + .statfs = simple_statfs, }; static int binderfs_rename(struct inode *old_dir, struct dentry *old_dentry, @@ -407,6 +495,10 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) if (!info) goto err_without_dentry; + ret = binderfs_parse_mount_opts(data, &info->mount_opts); + if (ret) + goto err_without_dentry; + info->ipc_ns = ipc_ns; info->root_gid = make_kgid(sb->s_user_ns, 0); if (!gid_valid(info->root_gid)) From c13295ad219d8bb0e47942d4cfc8251de449a67e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 11 Jan 2019 00:25:41 +0100 Subject: [PATCH 07/26] binderfs: rename header to binderfs.h It doesn't make sense to call the header binder_ctl.h when its sole existence is tied to binderfs. So give it a sensible name. Users will far more easily remember binderfs.h than binder_ctl.h. Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 2 +- include/uapi/linux/android/{binder_ctl.h => binderfs.h} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename include/uapi/linux/android/{binder_ctl.h => binderfs.h} (100%) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 89788969bc04..f6341893b5ba 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include "binder_internal.h" diff --git a/include/uapi/linux/android/binder_ctl.h b/include/uapi/linux/android/binderfs.h similarity index 100% rename from include/uapi/linux/android/binder_ctl.h rename to include/uapi/linux/android/binderfs.h From 36bdf3cae09df891b191f3955c8e54a2e05d67d0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 11 Jan 2019 11:19:40 +0100 Subject: [PATCH 08/26] binderfs: reserve devices for initial mount The binderfs instance in the initial ipc namespace will always have a reserve of 4 binder devices unless explicitly capped by specifying a lower value via the "max" mount option. This ensures when binder devices are removed (on accident or on purpose) they can always be recreated without risking that all minor numbers have already been used up. Cc: Todd Kjos Cc: Greg Kroah-Hartman Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index f6341893b5ba..ad3ad2f7f9f4 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -40,6 +40,8 @@ #define INODE_OFFSET 3 #define INTSTRLEN 21 #define BINDERFS_MAX_MINOR (1U << MINORBITS) +/* Ensure that the initial ipc namespace always has devices available. */ +#define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 4) static dev_t binderfs_dev; static DEFINE_MUTEX(binderfs_minors_mutex); @@ -127,11 +129,14 @@ static int binderfs_binder_device_create(struct inode *ref_inode, struct inode *inode = NULL; struct super_block *sb = ref_inode->i_sb; struct binderfs_info *info = sb->s_fs_info; + bool use_reserve = (info->ipc_ns == &init_ipc_ns); /* Reserve new minor number for the new device. */ mutex_lock(&binderfs_minors_mutex); if (++info->device_count <= info->mount_opts.max) - minor = ida_alloc_max(&binderfs_minors, BINDERFS_MAX_MINOR, + minor = ida_alloc_max(&binderfs_minors, + use_reserve ? BINDERFS_MAX_MINOR : + BINDERFS_MAX_MINOR_CAPPED, GFP_KERNEL); else minor = -ENOSPC; From 7fefaadd6a962987baac50e7b3c4c3d5ef9b55c6 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 12 Jan 2019 01:06:03 +0100 Subject: [PATCH 09/26] binderfs: handle !CONFIG_IPC_NS builds kbuild reported a build faile in [1]. This is triggered when CONFIG_IPC_NS is not set. So let's make the use of init_ipc_ns conditional on CONFIG_IPC_NS being set. [1]: https://lists.01.org/pipermail/kbuild-all/2019-January/056903.html Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index ad3ad2f7f9f4..9518e2e7da05 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -129,7 +129,11 @@ static int binderfs_binder_device_create(struct inode *ref_inode, struct inode *inode = NULL; struct super_block *sb = ref_inode->i_sb; struct binderfs_info *info = sb->s_fs_info; +#if defined(CONFIG_IPC_NS) bool use_reserve = (info->ipc_ns == &init_ipc_ns); +#else + bool use_reserve = true; +#endif /* Reserve new minor number for the new device. */ mutex_lock(&binderfs_minors_mutex); From 82e59cbe5fdc0d521f9037861af21af6d5814afd Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Sun, 13 Jan 2019 14:24:46 +0200 Subject: [PATCH 10/26] mei: dma: silent the reject message Not all FW versions support DMA on their first release, hence it is normal behavior to receive a reject response upon DMA setup request. In order to prevent confusion, the DMA setup reject message is printed only in debug level. Cc: #v5.0+ Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hbm.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c index 78c26cebf5d4..8f7616557c97 100644 --- a/drivers/misc/mei/hbm.c +++ b/drivers/misc/mei/hbm.c @@ -1187,9 +1187,15 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr) dma_setup_res = (struct hbm_dma_setup_response *)mei_msg; if (dma_setup_res->status) { - dev_info(dev->dev, "hbm: dma setup response: failure = %d %s\n", - dma_setup_res->status, - mei_hbm_status_str(dma_setup_res->status)); + u8 status = dma_setup_res->status; + + if (status == MEI_HBMS_NOT_ALLOWED) { + dev_dbg(dev->dev, "hbm: dma setup not allowed\n"); + } else { + dev_info(dev->dev, "hbm: dma setup response: failure = %d %s\n", + status, + mei_hbm_status_str(status)); + } dev->hbm_f_dr_supported = 0; mei_dmam_ring_free(dev); } From 173436ba800d01178a8b19e5de4a8cb02c0db760 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 13 Jan 2019 14:24:47 +0200 Subject: [PATCH 11/26] mei: me: mark LBG devices as having dma support The LBG server platform sports DMA support. Cc: #v5.0+ Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/pci-me.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 73ace2d59dea..c2bf3e99955e 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -88,7 +88,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, MEI_ME_PCH8_SPS_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, MEI_ME_PCH8_SPS_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)}, From f7ee8ead151f9d0b8dac6ab6c3ff49bbe809c564 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Sun, 13 Jan 2019 14:24:48 +0200 Subject: [PATCH 12/26] mei: me: add denverton innovation engine device IDs Add the Denverton innovation engine (IE) device ids. The IE is an ME-like device which provides HW security offloading. Cc: Signed-off-by: Tomas Winkler Signed-off-by: Alexander Usyskin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 2 ++ drivers/misc/mei/pci-me.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index e4b10b2d1a08..23739a60517f 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -127,6 +127,8 @@ #define MEI_DEV_ID_BXT_M 0x1A9A /* Broxton M */ #define MEI_DEV_ID_APL_I 0x5A9A /* Apollo Lake I */ +#define MEI_DEV_ID_DNV_IE 0x19E5 /* Denverton IE */ + #define MEI_DEV_ID_GLK 0x319A /* Gemini Lake */ #define MEI_DEV_ID_KBP 0xA2BA /* Kaby Point */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index c2bf3e99955e..e89497f858ae 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -93,6 +93,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_DNV_IE, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_GLK, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_KBP, MEI_ME_PCH8_CFG)}, From 7e7ca7744a539f1a172e3b81c29d000787e3d774 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 16 Jan 2019 10:42:59 +0000 Subject: [PATCH 13/26] binderfs: fix error return code in binderfs_fill_super() Fix to return a negative error code -ENOMEM from the new_inode() and d_make_root() error handling cases instead of 0, as done elsewhere in this function. Fixes: 849d540ddfcd ("binderfs: implement "max" mount option") Signed-off-by: Wei Yongjun Reviewed-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 9518e2e7da05..e4ff4c3fa371 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -518,6 +518,7 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = info; + ret = -ENOMEM; inode = new_inode(sb); if (!inode) goto err_without_dentry; From e25df7812c91f62581301f9a7ac102acf92e4937 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 16 Jan 2019 10:46:16 -0600 Subject: [PATCH 14/26] misc: ibmvsm: Fix potential NULL pointer dereference There is a potential NULL pointer dereference in case kzalloc() fails and returns NULL. Fix this by adding a NULL check on *session* Also, update the function header with information about the expected return on failure and remove unnecessary variable rc. This issue was detected with the help of Coccinelle. Fixes: 0eca353e7ae7 ("misc: IBM Virtual Management Channel Driver (VMC)") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ibmvmc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/misc/ibmvmc.c b/drivers/misc/ibmvmc.c index b8aaa684c397..2ed23c99f59f 100644 --- a/drivers/misc/ibmvmc.c +++ b/drivers/misc/ibmvmc.c @@ -820,21 +820,24 @@ static int ibmvmc_send_msg(struct crq_server_adapter *adapter, * * Return: * 0 - Success + * Non-zero - Failure */ static int ibmvmc_open(struct inode *inode, struct file *file) { struct ibmvmc_file_session *session; - int rc = 0; pr_debug("%s: inode = 0x%lx, file = 0x%lx, state = 0x%x\n", __func__, (unsigned long)inode, (unsigned long)file, ibmvmc.state); session = kzalloc(sizeof(*session), GFP_KERNEL); + if (!session) + return -ENOMEM; + session->file = file; file->private_data = session; - return rc; + return 0; } /** From 701956d4018e5d5438570e39e8bda47edd32c489 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 9 Jan 2019 13:02:36 -0600 Subject: [PATCH 15/26] char/mwave: fix potential Spectre v1 vulnerability ipcnum is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/char/mwave/mwavedd.c:299 mwave_ioctl() warn: potential spectre issue 'pDrvData->IPCs' [w] (local cap) Fix this by sanitizing ipcnum before using it to index pDrvData->IPCs. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman --- drivers/char/mwave/mwavedd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c index b5e3103c1175..e43c876a9223 100644 --- a/drivers/char/mwave/mwavedd.c +++ b/drivers/char/mwave/mwavedd.c @@ -59,6 +59,7 @@ #include #include #include +#include #include "smapi.h" #include "mwavedd.h" #include "3780i.h" @@ -289,6 +290,8 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, ipcnum); return -EINVAL; } + ipcnum = array_index_nospec(ipcnum, + ARRAY_SIZE(pDrvData->IPCs)); PRINTK_3(TRACE_MWAVE, "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC" " ipcnum %x entry usIntCount %x\n", @@ -317,6 +320,8 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, " Invalid ipcnum %x\n", ipcnum); return -EINVAL; } + ipcnum = array_index_nospec(ipcnum, + ARRAY_SIZE(pDrvData->IPCs)); PRINTK_3(TRACE_MWAVE, "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC" " ipcnum %x, usIntCount %x\n", @@ -383,6 +388,8 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, ipcnum); return -EINVAL; } + ipcnum = array_index_nospec(ipcnum, + ARRAY_SIZE(pDrvData->IPCs)); mutex_lock(&mwave_mutex); if (pDrvData->IPCs[ipcnum].bIsEnabled == true) { pDrvData->IPCs[ipcnum].bIsEnabled = false; From d8e346eb30372233063236edeb7600b92c92b287 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 11 Jan 2019 13:25:25 +0100 Subject: [PATCH 16/26] misc: pvpanic: fix warning implicit declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building and have fragment CONFIG_NO_IOPORT_MAP enabled then the following warning: ../drivers/misc/pvpanic.c: In function ‘pvpanic_walk_resources’: ../drivers/misc/pvpanic.c:73:10: error: implicit declaration of function ‘ioport_map’; did you mean ‘ioremap’? [-Werror=implicit-function-declaration] base = ioport_map(r.start, resource_size(&r)); ^~~~~~~~~~ Since commmit 5d32a66541c4 ("PCI/ACPI: Allow ACPI to be built without CONFIG_PCI set"), its now possible to have ACPI enabled without haveing PCI enabled. However, the pvpanic driver depends on HAS_IOPORT_MAP or HAVE_IOREMAP_PROT when ACPI is enabled. It was fine until commit 725eba2928ad ("misc/pvpanic: add MMIO support") got added. Rework so that we do a extra check ifdef CONFIG_HAS_IOPORT_MAP. Fixes: 5d32a66541c4 ("PCI/ACPI: Allow ACPI to be built without CONFIG_PCI set") Suggested-by: Arnd Bergmann Signed-off-by: Anders Roxell Signed-off-by: Greg Kroah-Hartman --- drivers/misc/pvpanic.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/misc/pvpanic.c b/drivers/misc/pvpanic.c index 595ac065b401..95ff7c5a1dfb 100644 --- a/drivers/misc/pvpanic.c +++ b/drivers/misc/pvpanic.c @@ -70,8 +70,12 @@ pvpanic_walk_resources(struct acpi_resource *res, void *context) struct resource r; if (acpi_dev_resource_io(res, &r)) { +#ifdef CONFIG_HAS_IOPORT_MAP base = ioport_map(r.start, resource_size(&r)); return AE_OK; +#else + return AE_ERROR; +#endif } else if (acpi_dev_resource_memory(res, &r)) { base = ioremap(r.start, resource_size(&r)); return AE_OK; From 6fc23b6ed8fa0ba6cc47b2f8756df1199abc3a5c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 12:01:19 +0100 Subject: [PATCH 17/26] binderfs: use correct include guards in header When we switched over from binder_ctl.h to binderfs.h we forgot to change the include guards. It's minor but it's obviously correct. Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binderfs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/android/binderfs.h b/include/uapi/linux/android/binderfs.h index 65b2efd1a0a5..b41628b77120 100644 --- a/include/uapi/linux/android/binderfs.h +++ b/include/uapi/linux/android/binderfs.h @@ -4,8 +4,8 @@ * */ -#ifndef _UAPI_LINUX_BINDER_CTL_H -#define _UAPI_LINUX_BINDER_CTL_H +#ifndef _UAPI_LINUX_BINDERFS_H +#define _UAPI_LINUX_BINDERFS_H #include #include @@ -31,5 +31,5 @@ struct binderfs_device { */ #define BINDER_CTL_ADD _IOWR('b', 1, struct binderfs_device) -#endif /* _UAPI_LINUX_BINDER_CTL_H */ +#endif /* _UAPI_LINUX_BINDERFS_H */ From 7d0174065f4903fb0ce0bab3d5047284faa7226d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 12:01:20 +0100 Subject: [PATCH 18/26] binderfs: use __u32 for device numbers We allow more then 255 binderfs binder devices to be created since there are workloads that require more than that. If we use __u8 we'll overflow after 255. So let's use a __u32. Note that there's no released kernel with binderfs out there so this is not a regression. Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binderfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/android/binderfs.h b/include/uapi/linux/android/binderfs.h index b41628b77120..87410477aea9 100644 --- a/include/uapi/linux/android/binderfs.h +++ b/include/uapi/linux/android/binderfs.h @@ -22,8 +22,8 @@ */ struct binderfs_device { char name[BINDERFS_MAX_NAME + 1]; - __u8 major; - __u8 minor; + __u32 major; + __u32 minor; }; /** From 7c4d08fc4d5aca073bd4ebecbb9eda5e4d858b71 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 11:48:02 +0100 Subject: [PATCH 19/26] binderfs: remove outdated comment The comment stems from an early version of that patchset and is just confusing now. Cc: Al Viro Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index e4ff4c3fa371..898d847f8505 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -373,10 +373,6 @@ static int binderfs_rename(struct inode *old_dir, struct dentry *old_dentry, static int binderfs_unlink(struct inode *dir, struct dentry *dentry) { - /* - * The control dentry is only ever touched during mount so checking it - * here should not require us to take lock. - */ if (BINDERFS_I(dir)->control_dentry == dentry) return -EPERM; From e98e6fa18636609f14a7f866524950a783cf4fbf Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 11:48:03 +0100 Subject: [PATCH 20/26] binderfs: prevent renaming the control dentry - make binderfs control dentry immutable: We don't allow to unlink it since it is crucial for binderfs to be useable but if we allow to rename it we make the unlink trivial to bypass. So prevent renaming too and simply treat the control dentry as immutable. - add is_binderfs_control_device() helper: Take the opportunity and turn the check for the control dentry into a separate helper is_binderfs_control_device() since it's now used in two places. - simplify binderfs_rename(): Instead of hand-rolling our custom version of simple_rename() just dumb the whole function down to first check whether we're trying to rename the control dentry. If we do EPERM the caller and if not call simple_rename(). Suggested-by: Al Viro Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 898d847f8505..e73f9dbee099 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -346,34 +346,26 @@ static const struct super_operations binderfs_super_ops = { .statfs = simple_statfs, }; +static inline bool is_binderfs_control_device(const struct dentry *dentry) +{ + struct binderfs_info *info = dentry->d_sb->s_fs_info; + return info->control_dentry == dentry; +} + static int binderfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { - struct inode *inode = d_inode(old_dentry); - - /* binderfs doesn't support directories. */ - if (d_is_dir(old_dentry)) + if (is_binderfs_control_device(old_dentry) || + is_binderfs_control_device(new_dentry)) return -EPERM; - if (flags & ~RENAME_NOREPLACE) - return -EINVAL; - - if (!simple_empty(new_dentry)) - return -ENOTEMPTY; - - if (d_really_is_positive(new_dentry)) - simple_unlink(new_dir, new_dentry); - - old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime = - new_dir->i_mtime = inode->i_ctime = current_time(old_dir); - - return 0; + return simple_rename(old_dir, old_dentry, new_dir, new_dentry, flags); } static int binderfs_unlink(struct inode *dir, struct dentry *dentry) { - if (BINDERFS_I(dir)->control_dentry == dentry) + if (is_binderfs_control_device(dentry)) return -EPERM; return simple_unlink(dir, dentry); From 36975fc3e5f241cc4f45df4ab4624d7d5199d9ed Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 11:48:04 +0100 Subject: [PATCH 21/26] binderfs: rework binderfs_fill_super() Al pointed out that on binderfs_fill_super() error deactivate_locked_super() will call binderfs_kill_super() so all of the freeing and putting we currently do in binderfs_fill_super() is unnecessary and buggy. Let's simply return errors and let binderfs_fill_super() take care of cleaning up on error. Suggested-by: Al Viro Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 41 ++++++++++---------------------------- 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index e73f9dbee099..89a2ee1a02f6 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -461,12 +461,9 @@ static const struct inode_operations binderfs_dir_inode_operations = { static int binderfs_fill_super(struct super_block *sb, void *data, int silent) { + int ret; struct binderfs_info *info; - int ret = -ENOMEM; struct inode *inode = NULL; - struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; - - get_ipc_ns(ipc_ns); sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; @@ -488,15 +485,17 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &binderfs_super_ops; sb->s_time_gran = 1; - info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL); - if (!info) - goto err_without_dentry; + sb->s_fs_info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL); + if (!sb->s_fs_info) + return -ENOMEM; + info = sb->s_fs_info; + + info->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns); ret = binderfs_parse_mount_opts(data, &info->mount_opts); if (ret) - goto err_without_dentry; + return ret; - info->ipc_ns = ipc_ns; info->root_gid = make_kgid(sb->s_user_ns, 0); if (!gid_valid(info->root_gid)) info->root_gid = GLOBAL_ROOT_GID; @@ -504,12 +503,9 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) if (!uid_valid(info->root_uid)) info->root_uid = GLOBAL_ROOT_UID; - sb->s_fs_info = info; - - ret = -ENOMEM; inode = new_inode(sb); if (!inode) - goto err_without_dentry; + return -ENOMEM; inode->i_ino = FIRST_INODE; inode->i_fop = &simple_dir_operations; @@ -520,24 +516,9 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_root = d_make_root(inode); if (!sb->s_root) - goto err_without_dentry; + return -ENOMEM; - ret = binderfs_binder_ctl_create(sb); - if (ret) - goto err_with_dentry; - - return 0; - -err_with_dentry: - dput(sb->s_root); - sb->s_root = NULL; - -err_without_dentry: - put_ipc_ns(ipc_ns); - iput(inode); - kfree(info); - - return ret; + return binderfs_binder_ctl_create(sb); } static struct dentry *binderfs_mount(struct file_system_type *fs_type, From 01b3f1fc568352a1ffdcd3ee82a0297f16cc9bd9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 11:48:05 +0100 Subject: [PATCH 22/26] binderfs: rework binderfs_binder_device_create() - switch from d_alloc_name() + d_lookup() to lookup_one_len(): Instead of using d_alloc_name() and then doing a d_lookup() with the allocated dentry to find whether a device with the name we're trying to create already exists switch to using lookup_one_len(). The latter will either return the existing dentry or a new one. - switch from kmalloc() + strscpy() to kmemdup(): Use a more idiomatic way to copy the name for the new dentry that userspace gave us. Suggested-by: Al Viro Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 39 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 89a2ee1a02f6..1e077498a507 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -106,7 +107,7 @@ bool is_binderfs_device(const struct inode *inode) * @userp: buffer to copy information about new device for userspace to * @req: struct binderfs_device as copied from userspace * - * This function allocated a new binder_device and reserves a new minor + * This function allocates a new binder_device and reserves a new minor * number for it. * Minor numbers are limited and tracked globally in binderfs_minors. The * function will stash a struct binder_device for the specific binder @@ -122,10 +123,10 @@ static int binderfs_binder_device_create(struct inode *ref_inode, struct binderfs_device *req) { int minor, ret; - struct dentry *dentry, *dup, *root; + struct dentry *dentry, *root; struct binder_device *device; - size_t name_len = BINDERFS_MAX_NAME + 1; char *name = NULL; + size_t name_len; struct inode *inode = NULL; struct super_block *sb = ref_inode->i_sb; struct binderfs_info *info = sb->s_fs_info; @@ -168,12 +169,13 @@ static int binderfs_binder_device_create(struct inode *ref_inode, inode->i_uid = info->root_uid; inode->i_gid = info->root_gid; - name = kmalloc(name_len, GFP_KERNEL); + req->name[BINDERFS_MAX_NAME] = '\0'; /* NUL-terminate */ + name_len = strlen(req->name); + /* Make sure to include terminating NUL byte */ + name = kmemdup(req->name, name_len + 1, GFP_KERNEL); if (!name) goto err; - strscpy(name, req->name, name_len); - device->binderfs_inode = inode; device->context.binder_context_mgr_uid = INVALID_UID; device->context.name = name; @@ -192,24 +194,21 @@ static int binderfs_binder_device_create(struct inode *ref_inode, root = sb->s_root; inode_lock(d_inode(root)); - dentry = d_alloc_name(root, name); - if (!dentry) { + + /* look it up */ + dentry = lookup_one_len(name, root, name_len); + if (IS_ERR(dentry)) { inode_unlock(d_inode(root)); - ret = -ENOMEM; + ret = PTR_ERR(dentry); goto err; } - /* Verify that the name userspace gave us is not already in use. */ - dup = d_lookup(root, &dentry->d_name); - if (dup) { - if (d_really_is_positive(dup)) { - dput(dup); - dput(dentry); - inode_unlock(d_inode(root)); - ret = -EEXIST; - goto err; - } - dput(dup); + if (d_really_is_positive(dentry)) { + /* already exists */ + dput(dentry); + inode_unlock(d_inode(root)); + ret = -EEXIST; + goto err; } inode->i_private = device; From 4198479524aeccaf53c3a4cc73784982535573fa Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 11:48:06 +0100 Subject: [PATCH 23/26] binderfs: kill_litter_super() before cleanup Al pointed out that first calling kill_litter_super() before cleaning up info is more correct since destroying info doesn't depend on the state of the dentries and inodes. That the opposite remains true is not guaranteed. Suggested-by: Al Viro Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 1e077498a507..ba88be172aee 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -531,11 +531,12 @@ static void binderfs_kill_super(struct super_block *sb) { struct binderfs_info *info = sb->s_fs_info; + kill_litter_super(sb); + if (info && info->ipc_ns) put_ipc_ns(info->ipc_ns); kfree(info); - kill_litter_super(sb); } static struct file_system_type binder_fs_type = { From 29ef1c8e16aed079ac09989d752e38d412b6e1a8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 11:48:07 +0100 Subject: [PATCH 24/26] binderfs: drop lock in binderfs_binder_ctl_create The binderfs_binder_ctl_create() call is a no-op on subsequent calls and the first call is done before we unlock the suberblock. Hence, there is no need to take inode_lock() in there. Let's remove it. Suggested-by: Al Viro Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index ba88be172aee..d537dcdb5d65 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -400,8 +400,6 @@ static int binderfs_binder_ctl_create(struct super_block *sb) if (!device) return -ENOMEM; - inode_lock(d_inode(root)); - /* If we have already created a binder-control node, return. */ if (info->control_dentry) { ret = 0; @@ -440,12 +438,10 @@ static int binderfs_binder_ctl_create(struct super_block *sb) inode->i_private = device; info->control_dentry = dentry; d_add(dentry, inode); - inode_unlock(d_inode(root)); return 0; out: - inode_unlock(d_inode(root)); kfree(device); iput(inode); From 01684db950ea2b840531ab9298a8785776b6f6e8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Jan 2019 11:48:08 +0100 Subject: [PATCH 25/26] binderfs: switch from d_add() to d_instantiate() In a previous commit we switched from a d_alloc_name() + d_lookup() combination to setup a new dentry and find potential duplicates to the more idiomatic lookup_one_len(). As far as I understand, this also means we need to switch from d_add() to d_instantiate() since lookup_one_len() will create a new dentry when it doesn't find an existing one and add the new dentry to the hash queues. So we only need to call d_instantiate() to connect the dentry to the inode and turn it into a positive dentry. If we were to use d_add() we sure see stack traces like the following indicating that adding the same dentry twice over the same inode: [ 744.441889] CPU: 4 PID: 2849 Comm: landscape-sysin Not tainted 5.0.0-rc1-brauner-binderfs #243 [ 744.441889] Hardware name: Dell DCS XS24-SC2 /XS24-SC2 , BIOS S59_3C20 04/07/2011 [ 744.441889] RIP: 0010:__d_lookup_rcu+0x76/0x190 [ 744.441889] Code: 89 75 c0 49 c1 e9 20 49 89 fd 45 89 ce 41 83 e6 07 42 8d 04 f5 00 00 00 00 89 45 c8 eb 0c 48 8b 1b 48 85 db 0f 84 81 00 00 00 <44> 8b 63 fc 4c 3b 6b 10 75 ea 48 83 7b 08 00 74 e3 41 83 e4 fe 41 [ 744.441889] RSP: 0018:ffffb8c984e27ad0 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff13 [ 744.441889] RAX: 0000000000000038 RBX: ffff9407ef770c08 RCX: ffffb8c980011000 [ 744.441889] RDX: ffffb8c984e27b54 RSI: ffffb8c984e27ce0 RDI: ffff9407e6689600 [ 744.441889] RBP: ffffb8c984e27b28 R08: ffffb8c984e27ba4 R09: 0000000000000007 [ 744.441889] R10: ffff9407e5c4f05c R11: 973f3eb9d84a94e5 R12: 0000000000000002 [ 744.441889] R13: ffff9407e6689600 R14: 0000000000000007 R15: 00000007bfef7a13 [ 744.441889] FS: 00007f0db13bb740(0000) GS:ffff9407f3b00000(0000) knlGS:0000000000000000 [ 744.441889] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 744.441889] CR2: 00007f0dacc51024 CR3: 000000032961a000 CR4: 00000000000006e0 [ 744.441889] Call Trace: [ 744.441889] lookup_fast+0x53/0x300 [ 744.441889] walk_component+0x49/0x350 [ 744.441889] ? inode_permission+0x63/0x1a0 [ 744.441889] link_path_walk.part.33+0x1bc/0x5a0 [ 744.441889] ? path_init+0x190/0x310 [ 744.441889] path_lookupat+0x95/0x210 [ 744.441889] filename_lookup+0xb6/0x190 [ 744.441889] ? __check_object_size+0xb8/0x1b0 [ 744.441889] ? strncpy_from_user+0x50/0x1a0 [ 744.441889] user_path_at_empty+0x36/0x40 [ 744.441889] ? user_path_at_empty+0x36/0x40 [ 744.441889] vfs_statx+0x76/0xe0 [ 744.441889] __do_sys_newstat+0x3d/0x70 [ 744.441889] __x64_sys_newstat+0x16/0x20 [ 744.441889] do_syscall_64+0x5a/0x120 [ 744.441889] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 744.441889] RIP: 0033:0x7f0db0ec2775 [ 744.441889] Code: 00 00 00 75 05 48 83 c4 18 c3 e8 26 55 02 00 66 0f 1f 44 00 00 83 ff 01 48 89 f0 77 30 48 89 c7 48 89 d6 b8 04 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 03 f3 c3 90 48 8b 15 e1 b6 2d 00 f7 d8 64 89 [ 744.441889] RSP: 002b:00007ffc36bc9388 EFLAGS: 00000246 ORIG_RAX: 0000000000000004 [ 744.441889] RAX: ffffffffffffffda RBX: 00007ffc36bc9300 RCX: 00007f0db0ec2775 [ 744.441889] RDX: 00007ffc36bc9400 RSI: 00007ffc36bc9400 RDI: 00007f0dad26f050 [ 744.441889] RBP: 0000000000c0bc60 R08: 0000000000000000 R09: 0000000000000001 [ 744.441889] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffc36bc9400 [ 744.441889] R13: 0000000000000001 R14: 00000000ffffff9c R15: 0000000000c0bc60 Cc: Al Viro Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index d537dcdb5d65..6a2185eb66c5 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -212,7 +212,7 @@ static int binderfs_binder_device_create(struct inode *ref_inode, } inode->i_private = device; - d_add(dentry, inode); + d_instantiate(dentry, inode); fsnotify_create(root->d_inode, dentry); inode_unlock(d_inode(root)); From 52768f324241b2d9624d32787cff63ec3e0e420a Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 29 Dec 2018 01:05:40 +0100 Subject: [PATCH 26/26] i3c: master: Fix an error checking typo in 'cdns_i3c_master_probe()' Fix a cut'n'paste typo. Checking 'master->sysclk' is expected here. Fixes: 603f2bee2c54 ("i3c: master: Add driver for Cadence IP") Signed-off-by: Christophe JAILLET Signed-off-by: Greg Kroah-Hartman --- drivers/i3c/master/i3c-master-cdns.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index bbd79b8b1a80..8889a4fdb454 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -1556,8 +1556,8 @@ static int cdns_i3c_master_probe(struct platform_device *pdev) return PTR_ERR(master->pclk); master->sysclk = devm_clk_get(&pdev->dev, "sysclk"); - if (IS_ERR(master->pclk)) - return PTR_ERR(master->pclk); + if (IS_ERR(master->sysclk)) + return PTR_ERR(master->sysclk); irq = platform_get_irq(pdev, 0); if (irq < 0)