From 59f30abe94bff50636c8cad45207a01fdcb2ee49 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Fri, 24 Apr 2015 15:24:27 -0400 Subject: [PATCH 01/31] show isolated cpus in sysfs After system bootup, there is no totally reliable way to see which CPUs are isolated, because the kernel may modify the CPUs specified on the isolcpus= kernel command line option. Export the CPU list that actually got isolated in sysfs, specifically in the file /sys/devices/system/cpu/isolated This can be used by system management tools like libvirt, openstack, and others to ensure proper placement of tasks. Suggested-by: Li Zefan Signed-off-by: Rik van Riel Acked-by: Mike Galbraith Acked-by: Chris Metcalf Signed-off-by: Greg Kroah-Hartman --- drivers/base/cpu.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index f160ea44a86d..ea23ee7b545b 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -265,6 +265,17 @@ static ssize_t print_cpus_offline(struct device *dev, } static DEVICE_ATTR(offline, 0444, print_cpus_offline, NULL); +static ssize_t print_cpus_isolated(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int n = 0, len = PAGE_SIZE-2; + + n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(cpu_isolated_map)); + + return n; +} +static DEVICE_ATTR(isolated, 0444, print_cpus_isolated, NULL); + static void cpu_device_release(struct device *dev) { /* @@ -431,6 +442,7 @@ static struct attribute *cpu_root_attrs[] = { &cpu_attrs[2].attr.attr, &dev_attr_kernel_max.attr, &dev_attr_offline.attr, + &dev_attr_isolated.attr, #ifdef CONFIG_GENERIC_CPU_AUTOPROBE &dev_attr_modalias.attr, #endif From 6570a9a1ce3a1dd227a065fd8ad16778d827b753 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Fri, 24 Apr 2015 15:24:28 -0400 Subject: [PATCH 02/31] show nohz_full cpus in sysfs Currently there is no way to query which CPUs are in nohz_full mode from userspace. Export the CPU list running in nohz_full mode in sysfs, specifically in the file /sys/devices/system/cpu/nohz_full This can be used by system management tools like libvirt, openstack, and others to ensure proper task placement. Signed-off-by: Rik van Riel Acked-by: Mike Galbraith Acked-by: Chris Metcalf Signed-off-by: Greg Kroah-Hartman --- drivers/base/cpu.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index ea23ee7b545b..78720e706176 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "base.h" @@ -276,6 +277,19 @@ static ssize_t print_cpus_isolated(struct device *dev, } static DEVICE_ATTR(isolated, 0444, print_cpus_isolated, NULL); +#ifdef CONFIG_NO_HZ_FULL +static ssize_t print_cpus_nohz_full(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int n = 0, len = PAGE_SIZE-2; + + n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(tick_nohz_full_mask)); + + return n; +} +static DEVICE_ATTR(nohz_full, 0444, print_cpus_nohz_full, NULL); +#endif + static void cpu_device_release(struct device *dev) { /* @@ -443,6 +457,9 @@ static struct attribute *cpu_root_attrs[] = { &dev_attr_kernel_max.attr, &dev_attr_offline.attr, &dev_attr_isolated.attr, +#ifdef CONFIG_NO_HZ_FULL + &dev_attr_nohz_full.attr, +#endif #ifdef CONFIG_GENERIC_CPU_AUTOPROBE &dev_attr_modalias.attr, #endif From ecc8617053e0a97272ef2eee138809f30080e84b Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 30 Mar 2015 16:20:03 -0700 Subject: [PATCH 03/31] module: add extra argument for parse_params() callback This adds an extra argument onto parse_params() to be used as a way to make the unused callback a bit more useful and generic by allowing the caller to pass on a data structure of its choice. An example use case is to allow us to easily make module parameters for every module which we will do next. @ parse @ identifier name, args, params, num, level_min, level_max; identifier unknown, param, val, doing; type s16; @@ extern char *parse_args(const char *name, char *args, const struct kernel_param *params, unsigned num, s16 level_min, s16 level_max, + void *arg, int (*unknown)(char *param, char *val, const char *doing + , void *arg )); @ parse_mod @ identifier name, args, params, num, level_min, level_max; identifier unknown, param, val, doing; type s16; @@ char *parse_args(const char *name, char *args, const struct kernel_param *params, unsigned num, s16 level_min, s16 level_max, + void *arg, int (*unknown)(char *param, char *val, const char *doing + , void *arg )) { ... } @ parse_args_found @ expression R, E1, E2, E3, E4, E5, E6; identifier func; @@ ( R = parse_args(E1, E2, E3, E4, E5, E6, + NULL, func); | R = parse_args(E1, E2, E3, E4, E5, E6, + NULL, &func); | R = parse_args(E1, E2, E3, E4, E5, E6, + NULL, NULL); | parse_args(E1, E2, E3, E4, E5, E6, + NULL, func); | parse_args(E1, E2, E3, E4, E5, E6, + NULL, &func); | parse_args(E1, E2, E3, E4, E5, E6, + NULL, NULL); ) @ parse_args_unused depends on parse_args_found @ identifier parse_args_found.func; @@ int func(char *param, char *val, const char *unused + , void *arg ) { ... } @ mod_unused depends on parse_args_found @ identifier parse_args_found.func; expression A1, A2, A3; @@ - func(A1, A2, A3); + func(A1, A2, A3, NULL); Generated-by: Coccinelle SmPL Cc: cocci@systeme.lip6.fr Cc: Tejun Heo Cc: Arjan van de Ven Cc: Greg Kroah-Hartman Cc: Rusty Russell Cc: Christoph Hellwig Cc: Felipe Contreras Cc: Ewan Milne Cc: Jean Delvare Cc: Hannes Reinecke Cc: Jani Nikula Cc: linux-kernel@vger.kernel.org Reviewed-by: Tejun Heo Acked-by: Rusty Russell Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/hugetlbpage.c | 4 ++-- include/linux/moduleparam.h | 3 ++- init/main.c | 25 +++++++++++++++---------- kernel/module.c | 6 ++++-- kernel/params.c | 11 +++++++---- lib/dynamic_debug.c | 4 ++-- 6 files changed, 32 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0ce968b00b7c..d230158c9770 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -336,7 +336,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate) unsigned long gpage_npages[MMU_PAGE_COUNT]; static int __init do_gpage_early_setup(char *param, char *val, - const char *unused) + const char *unused, void *arg) { static phys_addr_t size; unsigned long npages; @@ -385,7 +385,7 @@ void __init reserve_hugetlb_gpages(void) strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE); parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0, - &do_gpage_early_setup); + NULL, &do_gpage_early_setup); /* * Walk gpage list in reverse, allocating larger page sizes first. diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 1c9effa25e26..13923709d30d 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -357,8 +357,9 @@ extern char *parse_args(const char *name, unsigned num, s16 level_min, s16 level_max, + void *arg, int (*unknown)(char *param, char *val, - const char *doing)); + const char *doing, void *arg)); /* Called by module remove. */ #ifdef CONFIG_SYSFS diff --git a/init/main.c b/init/main.c index 2115055faeac..edcb13440646 100644 --- a/init/main.c +++ b/init/main.c @@ -235,7 +235,8 @@ static int __init loglevel(char *str) early_param("loglevel", loglevel); /* Change NUL term back to "=", to make "param" the whole string. */ -static int __init repair_env_string(char *param, char *val, const char *unused) +static int __init repair_env_string(char *param, char *val, + const char *unused, void *arg) { if (val) { /* param=val or param="val"? */ @@ -252,14 +253,15 @@ static int __init repair_env_string(char *param, char *val, const char *unused) } /* Anything after -- gets handed straight to init. */ -static int __init set_init_arg(char *param, char *val, const char *unused) +static int __init set_init_arg(char *param, char *val, + const char *unused, void *arg) { unsigned int i; if (panic_later) return 0; - repair_env_string(param, val, unused); + repair_env_string(param, val, unused, NULL); for (i = 0; argv_init[i]; i++) { if (i == MAX_INIT_ARGS) { @@ -276,9 +278,10 @@ static int __init set_init_arg(char *param, char *val, const char *unused) * Unknown boot options get handed to init, unless they look like * unused parameters (modprobe will find them in /proc/cmdline). */ -static int __init unknown_bootoption(char *param, char *val, const char *unused) +static int __init unknown_bootoption(char *param, char *val, + const char *unused, void *arg) { - repair_env_string(param, val, unused); + repair_env_string(param, val, unused, NULL); /* Handle obsolete-style parameters */ if (obsolete_checksetup(param)) @@ -410,7 +413,8 @@ static noinline void __init_refok rest_init(void) } /* Check for early params. */ -static int __init do_early_param(char *param, char *val, const char *unused) +static int __init do_early_param(char *param, char *val, + const char *unused, void *arg) { const struct obs_kernel_param *p; @@ -429,7 +433,8 @@ static int __init do_early_param(char *param, char *val, const char *unused) void __init parse_early_options(char *cmdline) { - parse_args("early options", cmdline, NULL, 0, 0, 0, do_early_param); + parse_args("early options", cmdline, NULL, 0, 0, 0, NULL, + do_early_param); } /* Arch code calls this early on, or if not, just before other parsing. */ @@ -535,10 +540,10 @@ asmlinkage __visible void __init start_kernel(void) after_dashes = parse_args("Booting kernel", static_command_line, __start___param, __stop___param - __start___param, - -1, -1, &unknown_bootoption); + -1, -1, NULL, &unknown_bootoption); if (!IS_ERR_OR_NULL(after_dashes)) parse_args("Setting init args", after_dashes, NULL, 0, -1, -1, - set_init_arg); + NULL, set_init_arg); jump_label_init(); @@ -847,7 +852,7 @@ static void __init do_initcall_level(int level) initcall_command_line, __start___param, __stop___param - __start___param, level, level, - &repair_env_string); + NULL, &repair_env_string); for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++) do_one_initcall(*fn); diff --git a/kernel/module.c b/kernel/module.c index 42a1d2afb217..24d1f31d02f2 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3237,7 +3237,8 @@ out: return err; } -static int unknown_module_param_cb(char *param, char *val, const char *modname) +static int unknown_module_param_cb(char *param, char *val, const char *modname, + void *arg) { /* Check for magic 'dyndbg' arg */ int ret = ddebug_dyndbg_module_param_cb(param, val, modname); @@ -3342,7 +3343,8 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Module is ready to execute: parsing args may do that. */ after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, - -32768, 32767, unknown_module_param_cb); + -32768, 32767, NULL, + unknown_module_param_cb); if (IS_ERR(after_dashes)) { err = PTR_ERR(after_dashes); goto bug_cleanup; diff --git a/kernel/params.c b/kernel/params.c index a22d6a759b1a..30288c1e15dd 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -100,8 +100,9 @@ static int parse_one(char *param, unsigned num_params, s16 min_level, s16 max_level, + void *arg, int (*handle_unknown)(char *param, char *val, - const char *doing)) + const char *doing, void *arg)) { unsigned int i; int err; @@ -128,7 +129,7 @@ static int parse_one(char *param, if (handle_unknown) { pr_debug("doing %s: %s='%s'\n", doing, param, val); - return handle_unknown(param, val, doing); + return handle_unknown(param, val, doing, arg); } pr_debug("Unknown argument '%s'\n", param); @@ -194,7 +195,9 @@ char *parse_args(const char *doing, unsigned num, s16 min_level, s16 max_level, - int (*unknown)(char *param, char *val, const char *doing)) + void *arg, + int (*unknown)(char *param, char *val, + const char *doing, void *arg)) { char *param, *val; @@ -214,7 +217,7 @@ char *parse_args(const char *doing, return args; irq_was_disabled = irqs_disabled(); ret = parse_one(param, val, doing, params, num, - min_level, max_level, unknown); + min_level, max_level, arg, unknown); if (irq_was_disabled && !irqs_disabled()) pr_warn("%s: option '%s' enabled irq's!\n", doing, param); diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index d8f3d3150603..e491e02eff54 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -887,7 +887,7 @@ static int ddebug_dyndbg_param_cb(char *param, char *val, /* handle both dyndbg and $module.dyndbg params at boot */ static int ddebug_dyndbg_boot_param_cb(char *param, char *val, - const char *unused) + const char *unused, void *arg) { vpr_info("%s=\"%s\"\n", param, val); return ddebug_dyndbg_param_cb(param, val, NULL, 0); @@ -1028,7 +1028,7 @@ static int __init dynamic_debug_init(void) */ cmdline = kstrdup(saved_command_line, GFP_KERNEL); parse_args("dyndbg params", cmdline, NULL, - 0, 0, 0, &ddebug_dyndbg_boot_param_cb); + 0, 0, 0, NULL, &ddebug_dyndbg_boot_param_cb); kfree(cmdline); return 0; From 765230b5f084863183aa8adb3405ab3f32c0b16e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 30 Mar 2015 16:20:04 -0700 Subject: [PATCH 04/31] driver-core: add asynchronous probing support for drivers Some devices take a long time when initializing, and not all drivers are suited to initialize their devices when they are open. For example, input drivers need to interrogate their devices in order to publish device's capabilities before userspace will open them. When such drivers are compiled into kernel they may stall entire kernel initialization. This change allows drivers request for their probe functions to be called asynchronously during driver and device registration (manual binding is still synchronous). Because async_schedule is used to perform asynchronous calls module loading will still wait for the probing to complete. Note that the end goal is to make the probing asynchronous by default, so annotating drivers with PROBE_PREFER_ASYNCHRONOUS is a temporary measure that allows us to speed up boot process while we validating and fixing the rest of the drivers and preparing userspace. This change is based on earlier patch by "Luis R. Rodriguez" Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/base/base.h | 1 + drivers/base/bus.c | 31 +++++--- drivers/base/dd.c | 159 ++++++++++++++++++++++++++++++++++------- include/linux/device.h | 28 ++++++++ 4 files changed, 187 insertions(+), 32 deletions(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index 251c5d30f963..fd3347d9f153 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -116,6 +116,7 @@ static inline int driver_match_device(struct device_driver *drv, { return drv->bus->match ? drv->bus->match(dev, drv) : 1; } +extern bool driver_allows_async_probing(struct device_driver *drv); extern int driver_add_groups(struct device_driver *drv, const struct attribute_group **groups); diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 79bc203f51ef..500592486e88 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -10,6 +10,7 @@ * */ +#include #include #include #include @@ -549,15 +550,12 @@ void bus_probe_device(struct device *dev) { struct bus_type *bus = dev->bus; struct subsys_interface *sif; - int ret; if (!bus) return; - if (bus->p->drivers_autoprobe) { - ret = device_attach(dev); - WARN_ON(ret < 0); - } + if (bus->p->drivers_autoprobe) + device_initial_probe(dev); mutex_lock(&bus->p->mutex); list_for_each_entry(sif, &bus->p->interfaces, node) @@ -659,6 +657,17 @@ static ssize_t uevent_store(struct device_driver *drv, const char *buf, } static DRIVER_ATTR_WO(uevent); +static void driver_attach_async(void *_drv, async_cookie_t cookie) +{ + struct device_driver *drv = _drv; + int ret; + + ret = driver_attach(drv); + + pr_debug("bus: '%s': driver %s async attach completed: %d\n", + drv->bus->name, drv->name, ret); +} + /** * bus_add_driver - Add a driver to the bus. * @drv: driver. @@ -691,9 +700,15 @@ int bus_add_driver(struct device_driver *drv) klist_add_tail(&priv->knode_bus, &bus->p->klist_drivers); if (drv->bus->p->drivers_autoprobe) { - error = driver_attach(drv); - if (error) - goto out_unregister; + if (driver_allows_async_probing(drv)) { + pr_debug("bus: '%s': probing driver %s asynchronously\n", + drv->bus->name, drv->name); + async_schedule(driver_attach_async, drv); + } else { + error = driver_attach(drv); + if (error) + goto out_unregister; + } } module_add_driver(drv->owner, drv); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index e843fdbe4925..2ad33b21888c 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -417,16 +417,140 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) return ret; } -static int __device_attach(struct device_driver *drv, void *data) +bool driver_allows_async_probing(struct device_driver *drv) { - struct device *dev = data; + return drv->probe_type == PROBE_PREFER_ASYNCHRONOUS; +} + +struct device_attach_data { + struct device *dev; + + /* + * Indicates whether we are are considering asynchronous probing or + * not. Only initial binding after device or driver registration + * (including deferral processing) may be done asynchronously, the + * rest is always synchronous, as we expect it is being done by + * request from userspace. + */ + bool check_async; + + /* + * Indicates if we are binding synchronous or asynchronous drivers. + * When asynchronous probing is enabled we'll execute 2 passes + * over drivers: first pass doing synchronous probing and second + * doing asynchronous probing (if synchronous did not succeed - + * most likely because there was no driver requiring synchronous + * probing - and we found asynchronous driver during first pass). + * The 2 passes are done because we can't shoot asynchronous + * probe for given device and driver from bus_for_each_drv() since + * driver pointer is not guaranteed to stay valid once + * bus_for_each_drv() iterates to the next driver on the bus. + */ + bool want_async; + + /* + * We'll set have_async to 'true' if, while scanning for matching + * driver, we'll encounter one that requests asynchronous probing. + */ + bool have_async; +}; + +static int __device_attach_driver(struct device_driver *drv, void *_data) +{ + struct device_attach_data *data = _data; + struct device *dev = data->dev; + bool async_allowed; + + /* + * Check if device has already been claimed. This may + * happen with driver loading, device discovery/registration, + * and deferred probe processing happens all at once with + * multiple threads. + */ + if (dev->driver) + return -EBUSY; if (!driver_match_device(drv, dev)) return 0; + async_allowed = driver_allows_async_probing(drv); + + if (async_allowed) + data->have_async = true; + + if (data->check_async && async_allowed != data->want_async) + return 0; + return driver_probe_device(drv, dev); } +static void __device_attach_async_helper(void *_dev, async_cookie_t cookie) +{ + struct device *dev = _dev; + struct device_attach_data data = { + .dev = dev, + .check_async = true, + .want_async = true, + }; + + device_lock(dev); + + bus_for_each_drv(dev->bus, NULL, &data, __device_attach_driver); + dev_dbg(dev, "async probe completed\n"); + + pm_request_idle(dev); + + device_unlock(dev); + + put_device(dev); +} + +int __device_attach(struct device *dev, bool allow_async) +{ + int ret = 0; + + device_lock(dev); + if (dev->driver) { + if (klist_node_attached(&dev->p->knode_driver)) { + ret = 1; + goto out_unlock; + } + ret = device_bind_driver(dev); + if (ret == 0) + ret = 1; + else { + dev->driver = NULL; + ret = 0; + } + } else { + struct device_attach_data data = { + .dev = dev, + .check_async = allow_async, + .want_async = false, + }; + + ret = bus_for_each_drv(dev->bus, NULL, &data, + __device_attach_driver); + if (!ret && allow_async && data.have_async) { + /* + * If we could not find appropriate driver + * synchronously and we are allowed to do + * async probes and there are drivers that + * want to probe asynchronously, we'll + * try them. + */ + dev_dbg(dev, "scheduling asynchronous probe\n"); + get_device(dev); + async_schedule(__device_attach_async_helper, dev); + } else { + pm_request_idle(dev); + } + } +out_unlock: + device_unlock(dev); + return ret; +} + /** * device_attach - try to attach device to a driver. * @dev: device. @@ -443,31 +567,15 @@ static int __device_attach(struct device_driver *drv, void *data) */ int device_attach(struct device *dev) { - int ret = 0; - - device_lock(dev); - if (dev->driver) { - if (klist_node_attached(&dev->p->knode_driver)) { - ret = 1; - goto out_unlock; - } - ret = device_bind_driver(dev); - if (ret == 0) - ret = 1; - else { - dev->driver = NULL; - ret = 0; - } - } else { - ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach); - pm_request_idle(dev); - } -out_unlock: - device_unlock(dev); - return ret; + return __device_attach(dev, false); } EXPORT_SYMBOL_GPL(device_attach); +void device_initial_probe(struct device *dev) +{ + __device_attach(dev, true); +} + static int __driver_attach(struct device *dev, void *data) { struct device_driver *drv = data; @@ -522,6 +630,9 @@ static void __device_release_driver(struct device *dev) drv = dev->driver; if (drv) { + if (driver_allows_async_probing(drv)) + async_synchronize_full(); + pm_runtime_get_sync(dev); driver_sysfs_remove(dev); diff --git a/include/linux/device.h b/include/linux/device.h index 6558af90c8fe..7857b46c548b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -195,6 +195,31 @@ extern int bus_unregister_notifier(struct bus_type *bus, extern struct kset *bus_get_kset(struct bus_type *bus); extern struct klist *bus_get_device_klist(struct bus_type *bus); +/** + * enum probe_type - device driver probe type to try + * Device drivers may opt in for special handling of their + * respective probe routines. This tells the core what to + * expect and prefer. + * + * @PROBE_SYNCHRONOUS: Default. Drivers expect their probe routines + * to run synchronously with driver and device registration + * (with the exception of -EPROBE_DEFER handling - re-probing + * always ends up being done asynchronously). + * @PROBE_PREFER_ASYNCHRONOUS: Drivers for "slow" devices which + * probing order is not essential for booting the system may + * opt into executing their probes asynchronously. + * + * Note that the end goal is to switch the kernel to use asynchronous + * probing by default, so annotating drivers with + * %PROBE_PREFER_ASYNCHRONOUS is a temporary measure that allows us + * to speed up boot process while we are validating the rest of the + * drivers. + */ +enum probe_type { + PROBE_SYNCHRONOUS, + PROBE_PREFER_ASYNCHRONOUS, +}; + /** * struct device_driver - The basic device driver structure * @name: Name of the device driver. @@ -202,6 +227,7 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus); * @owner: The module owner. * @mod_name: Used for built-in modules. * @suppress_bind_attrs: Disables bind/unbind via sysfs. + * @probe_type: Type of the probe (synchronous or asynchronous) to use. * @of_match_table: The open firmware table. * @acpi_match_table: The ACPI match table. * @probe: Called to query the existence of a specific device, @@ -235,6 +261,7 @@ struct device_driver { const char *mod_name; /* used for built-in modules */ bool suppress_bind_attrs; /* disables bind/unbind via sysfs */ + enum probe_type probe_type; const struct of_device_id *of_match_table; const struct acpi_device_id *acpi_match_table; @@ -975,6 +1002,7 @@ extern int __must_check device_bind_driver(struct device *dev); extern void device_release_driver(struct device *dev); extern int __must_check device_attach(struct device *dev); extern int __must_check driver_attach(struct device_driver *drv); +extern void device_initial_probe(struct device *dev); extern int __must_check device_reprobe(struct device *dev); /* From f2411da746985e60d4d087f3a43e271c61785927 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 30 Mar 2015 16:20:05 -0700 Subject: [PATCH 05/31] driver-core: add driver module asynchronous probe support Some init systems may wish to express the desire to have device drivers run their probe() code asynchronously. This implements support for this and allows userspace to request async probe as a preference through a generic shared device driver module parameter, async_probe. Implementation for async probe is supported through a module parameter given that since synchronous probe has been prevalent for years some userspace might exist which relies on the fact that the device driver will probe synchronously and the assumption that devices it provides will be immediately available after this. Signed-off-by: Luis R. Rodriguez Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 3 +++ drivers/base/dd.c | 8 +++++++- include/linux/device.h | 8 +++++--- include/linux/module.h | 2 ++ kernel/module.c | 12 ++++++++++-- 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 61ab1628a057..e89fdd5aa605 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -943,6 +943,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. auto selects the default scheme, which automatically enables eagerfpu restore for xsaveopt. + module.async_probe [KNL] + Enable asynchronous probe on this module. + early_ioremap_debug [KNL] Enable debug messages in early_ioremap support. This is useful for tracking down temporary early mappings diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 2ad33b21888c..7a2fa5dcead7 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -419,7 +419,13 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) bool driver_allows_async_probing(struct device_driver *drv) { - return drv->probe_type == PROBE_PREFER_ASYNCHRONOUS; + if (drv->probe_type == PROBE_PREFER_ASYNCHRONOUS) + return true; + + if (drv->owner && drv->owner->async_probe_requested) + return true; + + return false; } struct device_attach_data { diff --git a/include/linux/device.h b/include/linux/device.h index 7857b46c548b..77b7cd9e5467 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -201,10 +201,12 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus); * respective probe routines. This tells the core what to * expect and prefer. * - * @PROBE_SYNCHRONOUS: Default. Drivers expect their probe routines + * @PROBE_DEFAULT_STRATEGY: Drivers expect their probe routines * to run synchronously with driver and device registration * (with the exception of -EPROBE_DEFER handling - re-probing - * always ends up being done asynchronously). + * always ends up being done asynchronously) unless user + * explicitly requested asynchronous probing via module + * parameter. * @PROBE_PREFER_ASYNCHRONOUS: Drivers for "slow" devices which * probing order is not essential for booting the system may * opt into executing their probes asynchronously. @@ -216,7 +218,7 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus); * drivers. */ enum probe_type { - PROBE_SYNCHRONOUS, + PROBE_DEFAULT_STRATEGY, PROBE_PREFER_ASYNCHRONOUS, }; diff --git a/include/linux/module.h b/include/linux/module.h index c883b86ea964..f46a47d3c0dc 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -257,6 +257,8 @@ struct module { bool sig_ok; #endif + bool async_probe_requested; + /* symbols that will be GPL-only in the near future. */ const struct kernel_symbol *gpl_future_syms; const unsigned long *gpl_future_crcs; diff --git a/kernel/module.c b/kernel/module.c index 24d1f31d02f2..ea941bc327d5 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3107,7 +3107,7 @@ static noinline int do_init_module(struct module *mod) * * http://thread.gmane.org/gmane.linux.kernel/1420814 */ - if (current->flags & PF_USED_ASYNC) + if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC)) async_synchronize_full(); mutex_lock(&module_mutex); @@ -3240,8 +3240,16 @@ out: static int unknown_module_param_cb(char *param, char *val, const char *modname, void *arg) { + struct module *mod = arg; + int ret; + + if (strcmp(param, "async_probe") == 0) { + mod->async_probe_requested = true; + return 0; + } + /* Check for magic 'dyndbg' arg */ - int ret = ddebug_dyndbg_module_param_cb(param, val, modname); + ret = ddebug_dyndbg_module_param_cb(param, val, modname); if (ret != 0) pr_warn("%s: unknown parameter '%s' ignored\n", modname, param); return 0; From d173a137c5bd95ee29d02705e5fa8890ef149718 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 30 Mar 2015 16:20:06 -0700 Subject: [PATCH 06/31] driver-core: enable drivers to opt-out of async probe There are drivers that can not be probed asynchronously. One such group is platform drivers registered with platform_driver_probe(), which expects driver's probe routine be discarded after the driver has been registered and initial binding attempt executed. Also platform_driver_probe() an error when no devices were bound to the driver, allowing failing to load such driver module altogether. Other drivers do not work well with asynchronous probing because of driver bug or not optimal driver organization. To allow using such drivers even when user requests asynchronous probing as default boot strategy, let's allow them to opt out. Signed-off-by: Luis R. Rodriguez Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 14 ++++++++++---- include/linux/device.h | 13 +++++++------ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 7a2fa5dcead7..39292535c74e 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -419,13 +419,19 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) bool driver_allows_async_probing(struct device_driver *drv) { - if (drv->probe_type == PROBE_PREFER_ASYNCHRONOUS) + switch (drv->probe_type) { + case PROBE_PREFER_ASYNCHRONOUS: return true; - if (drv->owner && drv->owner->async_probe_requested) - return true; + case PROBE_FORCE_SYNCHRONOUS: + return false; - return false; + default: + if (drv->owner && drv->owner->async_probe_requested) + return true; + + return false; + } } struct device_attach_data { diff --git a/include/linux/device.h b/include/linux/device.h index 77b7cd9e5467..00ac57c26615 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -201,15 +201,15 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus); * respective probe routines. This tells the core what to * expect and prefer. * - * @PROBE_DEFAULT_STRATEGY: Drivers expect their probe routines - * to run synchronously with driver and device registration - * (with the exception of -EPROBE_DEFER handling - re-probing - * always ends up being done asynchronously) unless user - * explicitly requested asynchronous probing via module - * parameter. + * @PROBE_DEFAULT_STRATEGY: Used by drivers that work equally well + * whether probed synchronously or asynchronously. * @PROBE_PREFER_ASYNCHRONOUS: Drivers for "slow" devices which * probing order is not essential for booting the system may * opt into executing their probes asynchronously. + * @PROBE_FORCE_SYNCHRONOUS: Use this to annotate drivers that need + * their probe routines to run synchronously with driver and + * device registration (with the exception of -EPROBE_DEFER + * handling - re-probing always ends up being done asynchronously). * * Note that the end goal is to switch the kernel to use asynchronous * probing by default, so annotating drivers with @@ -220,6 +220,7 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus); enum probe_type { PROBE_DEFAULT_STRATEGY, PROBE_PREFER_ASYNCHRONOUS, + PROBE_FORCE_SYNCHRONOUS, }; /** From 5c36eb2a9ecc0bc5228451b1eeb83e70b6bb7473 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 30 Mar 2015 16:20:07 -0700 Subject: [PATCH 07/31] driver-core: platform_driver_probe() must probe synchronously Because platform_driver_probe() checks, after trying to register driver, if there are any devices that driver successfully bound to, driver's probe routine must be run synchronously. Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index ebf034b97278..063f0ab15259 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -613,6 +613,19 @@ int __init_or_module __platform_driver_probe(struct platform_driver *drv, { int retval, code; + if (drv->driver.probe_type == PROBE_PREFER_ASYNCHRONOUS) { + pr_err("%s: drivers registered with %s can not be probed asynchronously\n", + drv->driver.name, __func__); + return -EINVAL; + } + + /* + * We have to run our probes synchronously because we check if + * we find any devices to bind to and exit with error if there + * are any. + */ + drv->driver.probe_type = PROBE_FORCE_SYNCHRONOUS; + /* * Prevent driver from requesting probe deferral to avoid further * futile probe attempts. From 735c0f8f12402774eff2320657cbb1e7d945164a Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 30 Mar 2015 16:20:08 -0700 Subject: [PATCH 08/31] amd64_edac: enforce synchronous probe While testing asynchronous PCI probe on this driver I noticed it failed because the driver checks if any of the PCI devices have been bound to the driver after registering it, which obviously does not work if probing is asynchronous. While there are patches and discussions on how the driver should behave are ongoing, let's enforce synchronous probe for this driver for now. Reviewed-by: Tejun Heo Signed-off-by: Luis R. Rodriguez Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/edac/amd64_edac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 92772fffc52f..73aea40a9c89 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2964,6 +2964,7 @@ static struct pci_driver amd64_pci_driver = { .probe = probe_one_instance, .remove = remove_one_instance, .id_table = amd64_pci_table, + .driver.probe_type = PROBE_FORCE_SYNCHRONOUS, }; static void setup_pci_device(void) From ec0ccc16a09fc32f7142ef3ddf1c2276fbbb35d0 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 30 Mar 2015 16:20:09 -0700 Subject: [PATCH 09/31] module: add core_param_unsafe Similarly to module_param_unsafe(), add the helper to be used by core code wishing to expose unsafe debugging or testing parameters that taint the kernel when set. Acked-by: Rusty Russell Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/moduleparam.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 13923709d30d..6480dcaca275 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -310,6 +310,15 @@ static inline void __kernel_param_unlock(void) #define core_param(name, var, type, perm) \ param_check_##type(name, &(var)); \ __module_param_call("", name, ¶m_ops_##type, &var, perm, -1, 0) + +/** + * core_param_unsafe - same as core_param but taints kernel + */ +#define core_param_unsafe(name, var, type, perm) \ + param_check_##type(name, &(var)); \ + __module_param_call("", name, ¶m_ops_##type, &var, perm, \ + -1, KERNEL_PARAM_FL_UNSAFE) + #endif /* !MODULE */ /** From ba50150e80088e159ac80f0e71243ae2fa0c4901 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 23 Apr 2015 17:14:31 +0200 Subject: [PATCH 10/31] kernfs: remove outdated and confusing comment Grabbing the parent is not happening anymore since 2010 (e72ceb8ccac5f7 "sysfs: Remove sysfs_get/put_active_two"). Remove this confusing comment. Signed-off-by: Wolfram Sang Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 2bacb9988566..7247252ee9b1 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -785,7 +785,6 @@ static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait) struct kernfs_node *kn = filp->f_path.dentry->d_fsdata; struct kernfs_open_node *on = kn->attr.open; - /* need parent for the kobj, grab both */ if (!kernfs_get_active(kn)) goto trigger; From dc7dfcd838985947ee1a66cdaa41c4968404e0ed Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 23 Apr 2015 17:20:54 +0200 Subject: [PATCH 11/31] MAINTAINERS: add kernfs entry My kernfs patch slipped through because I didn't know which maintainer to CC. Have been told it's gkh. Add an entry, and sort the file patterns while we are here. Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index f8e0afb708b4..1eb59c8c754c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3333,16 +3333,17 @@ F: drivers/block/drbd/ F: lib/lru_cache.c F: Documentation/blockdev/drbd/ -DRIVER CORE, KOBJECTS, DEBUGFS AND SYSFS +DRIVER CORE, KOBJECTS, DEBUGFS, KERNFS AND SYSFS M: Greg Kroah-Hartman T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git S: Supported F: Documentation/kobject.txt F: drivers/base/ -F: fs/sysfs/ F: fs/debugfs/ -F: include/linux/kobj* +F: fs/kernfs/ +F: fs/sysfs/ F: include/linux/debugfs.h +F: include/linux/kobj* F: lib/kobj* DRM DRIVERS From 802a87fd5be9cac1d05879bcdae2620e46b0dbe6 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 20 May 2015 16:36:31 -0700 Subject: [PATCH 12/31] driver-core: make __device_attach() static It is only used within dd.c and thus need not be global. Reported-by: kbuild test robot Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 39292535c74e..42e97d90a59a 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -517,7 +517,7 @@ static void __device_attach_async_helper(void *_dev, async_cookie_t cookie) put_device(dev); } -int __device_attach(struct device *dev, bool allow_async) +static int __device_attach(struct device *dev, bool allow_async) { int ret = 0; From 80c6e1465948c2e91214f01764f427d31ebedb26 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 21 May 2015 15:49:37 -0700 Subject: [PATCH 13/31] driver-core: fix build for !CONFIG_MODULES Commit f2411da74698 ("driver-core: add driver module asynchronous probe support") broke build in case modules are disabled, because in this case "struct module" is not defined and we can't dereference it. Let's define module_requested_async_probing() helper and stub it out if modules are disabled. Reported-by: kbuild test robot Reported-by: Stephen Rothwell Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 2 +- include/linux/module.h | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 42e97d90a59a..8da8e071f01d 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -427,7 +427,7 @@ bool driver_allows_async_probing(struct device_driver *drv) return false; default: - if (drv->owner && drv->owner->async_probe_requested) + if (module_requested_async_probing(drv->owner)) return true; return false; diff --git a/include/linux/module.h b/include/linux/module.h index f46a47d3c0dc..57f5c0a804c0 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -510,6 +510,11 @@ int unregister_module_notifier(struct notifier_block *nb); extern void print_modules(void); +static inline bool module_requested_async_probing(struct module *module) +{ + return module && module->async_probe_requested; +} + #else /* !CONFIG_MODULES... */ /* Given an address, look for it in the exception tables. */ @@ -620,6 +625,12 @@ static inline int unregister_module_notifier(struct notifier_block *nb) static inline void print_modules(void) { } + +static inline bool module_requested_async_probing(struct module *module) +{ + return false; +} + #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS From ed1dc8a89454218a2471f67284765e8c03bdfc6b Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Wed, 29 Apr 2015 10:55:46 +0200 Subject: [PATCH 14/31] sysfs: disambiguate between "error code" and "failure" in comments The sentence "Returns 0 on success or error" might be misinterpreted as "the function will always returns 0", make it less ambiguous. Also, use the word "failure" as the contrary of "success". Signed-off-by: Antonio Ospite Cc: Greg Kroah-Hartman Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/group.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index b400c04371f0..39a019936768 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -135,7 +135,7 @@ static int internal_create_group(struct kobject *kobj, int update, * This function creates a group for the first time. It will explicitly * warn and error if any of the attribute files being created already exist. * - * Returns 0 on success or error. + * Returns 0 on success or error code on failure. */ int sysfs_create_group(struct kobject *kobj, const struct attribute_group *grp) @@ -155,7 +155,7 @@ EXPORT_SYMBOL_GPL(sysfs_create_group); * It will explicitly warn and error if any of the attribute files being * created already exist. * - * Returns 0 on success or error code from sysfs_create_group on error. + * Returns 0 on success or error code from sysfs_create_group on failure. */ int sysfs_create_groups(struct kobject *kobj, const struct attribute_group **groups) @@ -193,7 +193,7 @@ EXPORT_SYMBOL_GPL(sysfs_create_groups); * The primary use for this function is to call it after making a change * that affects group visibility. * - * Returns 0 on success or error. + * Returns 0 on success or error code on failure. */ int sysfs_update_group(struct kobject *kobj, const struct attribute_group *grp) From 2539b258ec028351af954c169ea1b0ff72023a9f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 8 May 2015 14:45:34 +0100 Subject: [PATCH 15/31] drivers/base: cacheinfo: fix annoying typo when DT nodes are absent s/hierarcy/hierarchy/ Maybe the typo will annoy people enough so that they add the missing nodes to their device-tree files, but I still think this is better off fixed. Signed-off-by: Will Deacon Acked-by: Sudeep Holla Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-devices-system-cpu | 2 +- drivers/base/cacheinfo.c | 4 ++-- include/linux/cacheinfo.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 99983e67c13c..0908fb43566d 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -243,7 +243,7 @@ Description: Parameters for the CPU cache attributes coherency_line_size: the minimum amount of data in bytes that gets transferred from memory to cache - level: the cache hierarcy in the multi-level cache configuration + level: the cache hierarchy in the multi-level cache configuration number_of_sets: total number of sets in the cache, a set is a collection of cache lines with the same cache index diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 9c2ba1c97c42..c75bda89821b 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -191,12 +191,12 @@ static int detect_cache_attributes(unsigned int cpu) if (ret) goto free_ci; /* - * For systems using DT for cache hierarcy, of_node and shared_cpu_map + * For systems using DT for cache hierarchy, of_node and shared_cpu_map * will be set up here only if they are not populated already */ ret = cache_shared_cpu_map_setup(cpu); if (ret) { - pr_warn("Unable to detect cache hierarcy from DT for CPU %d\n", + pr_warn("Unable to detect cache hierarchy from DT for CPU %d\n", cpu); goto free_ci; } diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 3daf5ed392c9..2189935075b4 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -19,7 +19,7 @@ enum cache_type { /** * struct cacheinfo - represent a cache leaf node * @type: type of the cache - data, inst or unified - * @level: represents the hierarcy in the multi-level cache + * @level: represents the hierarchy in the multi-level cache * @coherency_line_size: size of each cache line usually representing * the minimum amount of data that gets transferred from memory * @number_of_sets: total number of sets, a set is a collection of cache From f4445f8b204de44a8baa4326b0e56537be867427 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 14 May 2015 15:28:24 +0100 Subject: [PATCH 16/31] drivers: of/base: move of_init to driver_init Commit 5590f3196b29 ("drivers/core/of: Add symlink to device-tree from devices with an OF node") adds the symlink `of_node` for each device pointing to it's device tree node while creating/initialising it. However the devicetree sysfs is created and setup in of_init which is executed at core_initcall level. For all the devices created before of_init, the following error is thrown: "Error -2(-ENOENT) creating of_node link" Like many other components in driver model, initialize the sysfs support for OF/devicetree from driver_init so that it's ready before any devices are created. Fixes: 5590f3196b29 ("drivers/core/of: Add symlink to device-tree from devices with an OF node") Suggested-by: Rob Herring Cc: Grant Likely Cc: Pawel Moll Cc: Benjamin Herrenschmidt Signed-off-by: Sudeep Holla Tested-by: Robert Schwebel Acked-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/base/init.c | 2 ++ drivers/of/base.c | 8 +++----- include/linux/of.h | 6 ++++++ 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/base/init.c b/drivers/base/init.c index da033d3bab3c..48c0e220acc0 100644 --- a/drivers/base/init.c +++ b/drivers/base/init.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "base.h" @@ -34,4 +35,5 @@ void __init driver_init(void) cpu_dev_init(); memory_dev_init(); container_dev_init(); + of_core_init(); } diff --git a/drivers/of/base.c b/drivers/of/base.c index 99764db0875a..f0650265febf 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -189,7 +189,7 @@ int __of_attach_node_sysfs(struct device_node *np) return 0; } -static int __init of_init(void) +void __init of_core_init(void) { struct device_node *np; @@ -198,7 +198,8 @@ static int __init of_init(void) of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj); if (!of_kset) { mutex_unlock(&of_mutex); - return -ENOMEM; + pr_err("devicetree: failed to register existing nodes\n"); + return; } for_each_of_allnodes(np) __of_attach_node_sysfs(np); @@ -207,10 +208,7 @@ static int __init of_init(void) /* Symlink in /proc as required by userspace ABI */ if (of_root) proc_symlink("device-tree", NULL, "/sys/firmware/devicetree/base"); - - return 0; } -core_initcall(of_init); static struct property *__of_find_property(const struct device_node *np, const char *name, int *lenp) diff --git a/include/linux/of.h b/include/linux/of.h index ddeaae6d2083..b871ff9d81d7 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -121,6 +121,8 @@ extern struct device_node *of_stdout; extern raw_spinlock_t devtree_lock; #ifdef CONFIG_OF +void of_core_init(void); + static inline bool is_of_node(struct fwnode_handle *fwnode) { return fwnode && fwnode->type == FWNODE_OF; @@ -376,6 +378,10 @@ bool of_console_check(struct device_node *dn, char *name, int index); #else /* CONFIG_OF */ +static inline void of_core_init(void) +{ +} + static inline bool is_of_node(struct fwnode_handle *fwnode) { return false; From f5727b05d221796baf69667ed5c891d4bd53711e Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 12 May 2015 14:49:40 -0700 Subject: [PATCH 17/31] firmware: fix __getname() missing failure check The request_firmware*() APIs uses __getname() to iterate over the list of paths possible for firmware to be found, the code however never checked for failure on __getname(). Although *very unlikely*, this can still happen. Add the missing check. There is still no checks on the concatenation of the path and filename passed, that requires a bit more work and subsequent patches address this. The commit that introduced this is abb139e7 ("firmware: teach the kernel to load firmware files directly from the filesystem"). mcgrof@ergon ~/linux (git::firmware-fixes) $ git describe --contains abb139e7 v3.7-rc1~120 Cc: Linus Torvalds Cc: Ming Lei Cc: Rusty Russell Cc: David Howells Cc: Kyle McMartin Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_class.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 171841ad1008..49139a1ee25e 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -322,7 +322,11 @@ static int fw_get_filesystem_firmware(struct device *device, { int i; int rc = -ENOENT; - char *path = __getname(); + char *path; + + path = __getname(); + if (!path) + return -ENOMEM; for (i = 0; i < ARRAY_SIZE(fw_path); i++) { struct file *file; From 1ba4de17e0cb9cc3e03ce5b1fafebdd01c48c1f2 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 12 May 2015 14:49:41 -0700 Subject: [PATCH 18/31] firmware: check for file truncation on direct firmware loading When direct firmware loading is used we iterate over a list of possible firmware paths and concatenate the desired firmware name with each path and look for the file there. Should the passed firmware name be too long we end up truncating the file we want to look for, the search however is still done. Add a check for truncation instead of looking for a truncated firmware filename. Cc: Linus Torvalds Cc: Ming Lei Cc: Rusty Russell Cc: David Howells Cc: Kyle McMartin Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_class.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 49139a1ee25e..9ffa70762473 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -320,7 +320,7 @@ fail: static int fw_get_filesystem_firmware(struct device *device, struct firmware_buf *buf) { - int i; + int i, len; int rc = -ENOENT; char *path; @@ -335,7 +335,12 @@ static int fw_get_filesystem_firmware(struct device *device, if (!fw_path[i][0]) continue; - snprintf(path, PATH_MAX, "%s/%s", fw_path[i], buf->fw_id); + len = snprintf(path, PATH_MAX, "%s/%s", + fw_path[i], buf->fw_id); + if (len >= PATH_MAX) { + rc = -ENAMETOOLONG; + break; + } file = filp_open(path, O_RDONLY, 0); if (IS_ERR(file)) From f9692b2699bd82ae0df1d7d495d9fb9c4bd45ad9 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 12 May 2015 14:49:42 -0700 Subject: [PATCH 19/31] firmware: fix possible use after free on name on asynchronous request Asynchronous firmware loading copies the pointer to the name passed as an argument only to be scheduled later and used. This behaviour works well for synchronous calling but in asynchronous mode there's a chance the caller could immediately free the passed string after making the asynchronous call. This could trigger a use after free having the kernel look on disk for arbitrary file names. In order to force-test the issue you can use a test-driver designed to illustrate this issue on github [0], use the next-20150505-fix-use-after-free branch. With this patch applied you get: [ 283.512445] firmware name: test_module_stuff.bin [ 287.514020] firmware name: test_module_stuff.bin [ 287.532489] firmware found Without this patch applied you can end up with something such as: [ 135.624216] firmware name: \xffffff80BJ [ 135.624249] platform fake-dev.0: Direct firmware load for \xffffff80Bi failed with error -2 [ 135.624252] No firmware found [ 135.624252] firmware found Unfortunatley in the worst and most common case however you can typically crash your system with a page fault by trying to free something which you cannot, and/or a NULL pointer dereference [1]. The fix and issue using schedule_work() for asynchronous runs is generalized in the following SmPL grammar patch, when applied to next-20150505 only the firmware_class code is affected. This grammar patch can and should further be generalized to vet for for other kernel asynchronous mechanisms. @ calls_schedule_work @ type T; T *priv_work; identifier func, work_func; identifier work; identifier priv_name, name; expression gfp; @@ func(..., const char *name, ...) { ... priv_work = kzalloc(sizeof(T), gfp); ... - priv_work->priv_name = name; + priv_work->priv_name = kstrdup_const(name, gfp); ... (... when any if (...) { ... + kfree_const(priv_work->priv_name); kfree(priv_work); ... } ) ... when any INIT_WORK(&priv_work->work, work_func); ... schedule_work(&priv_work->work); ... } @ the_work_func depends on calls_schedule_work @ type calls_schedule_work.T; T *priv_work; identifier calls_schedule_work.work_func; identifier calls_schedule_work.priv_name; identifier calls_schedule_work.work; identifier some_work; @@ work_func(...) { ... priv_work = container_of(some_work, T, work); ... + kfree_const(priv_work->priv_name); kfree(priv_work); ... } [0] https://github.com/mcgrof/fake-firmware-test.git [1] The following kernel ring buffer splat: firmware name: test_module_stuff.bin firmware name: firmware found general protection fault: 0000 [#1] SMP Modules linked in: test(O) <...etc-it-does-not-matter> drm sr_mod cdrom xhci_pci xhci_hcd rtsx_pci mfd_core video button sg CPU: 3 PID: 87 Comm: kworker/3:2 Tainted: G O 4.0.0-00010-g22b5bb0-dirty #176 Hardware name: LENOVO 20AW000LUS/20AW000LUS, BIOS GLET43WW (1.18 ) 12/04/2013 Workqueue: events request_firmware_work_func task: ffff8800c7f8e290 ti: ffff8800c7f94000 task.ti: ffff8800c7f94000 RIP: 0010:[] [] fw_free_buf+0xc/0x40 RSP: 0000:ffff8800c7f97d78 EFLAGS: 00010286 RAX: ffffffff81ae3700 RBX: ffffffff816d1181 RCX: 0000000000000006 RDX: 0001ee850ff68500 RSI: 0000000000000246 RDI: c35d5f415e415d41 RBP: ffff8800c7f97d88 R08: 000000000000000a R09: 0000000000000000 R10: 0000000000000358 R11: ffff8800c7f97a7e R12: ffff8800c7ec1e80 R13: ffff88021e2d4cc0 R14: ffff88021e2dff00 R15: 00000000000000c0 FS: 0000000000000000(0000) GS:ffff88021e2c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000034b8cd8 CR3: 000000021073c000 CR4: 00000000001407e0 Stack: ffffffff816d1181 ffff8800c7ec1e80 ffff8800c7f97da8 ffffffff814a58f8 000000000000000a ffffffff816d1181 ffff8800c7f97dc8 ffffffffa047002c ffff88021e2dff00 ffff8802116ac1c0 ffff8800c7f97df8 ffffffff814a65fe Call Trace: [] ? __schedule+0x361/0x940 [] release_firmware+0x58/0x80 [] ? __schedule+0x361/0x940 [] test_mod_cb+0x2c/0x43 [test] [] request_firmware_work_func+0x5e/0x80 [] ? __schedule+0x361/0x940 [] process_one_work+0x14a/0x3f0 [] worker_thread+0x121/0x460 [] ? rescuer_thread+0x310/0x310 [] kthread+0xc9/0xe0 [] ? kthread_create_on_node+0x180/0x180 [] ret_from_fork+0x58/0x90 [] ? kthread_create_on_node+0x180/0x180 Code: c7 c6 dd ad a3 81 48 c7 c7 20 97 ce 81 31 c0 e8 0b b2 ed ff e9 78 ff ff ff 66 0f 1f 44 00 00 0f 1f 44 00 00 55 48 89 e5 41 54 53 <4c> 8b 67 38 48 89 fb 4c 89 e7 e8 85 f7 22 00 f0 83 2b 01 74 0f RIP [] fw_free_buf+0xc/0x40 RSP ---[ end trace 4e62c56a58d0eac1 ]--- BUG: unable to handle kernel paging request at ffffffffffffffd8 IP: [] kthread_data+0x10/0x20 PGD 1c13067 PUD 1c15067 PMD 0 Oops: 0000 [#2] SMP Modules linked in: test(O) <...etc-it-does-not-matter> drm sr_mod cdrom xhci_pci xhci_hcd rtsx_pci mfd_core video button sg CPU: 3 PID: 87 Comm: kworker/3:2 Tainted: G D O 4.0.0-00010-g22b5bb0-dirty #176 Hardware name: LENOVO 20AW000LUS/20AW000LUS, BIOS GLET43WW (1.18 ) 12/04/2013 task: ffff8800c7f8e290 ti: ffff8800c7f94000 task.ti: ffff8800c7f94000 RIP: 0010:[] [] kthread_data+0x10/0x20 RSP: 0018:ffff8800c7f97b18 EFLAGS: 00010096 RAX: 0000000000000000 RBX: 0000000000000003 RCX: 000000000000000d RDX: 0000000000000003 RSI: 0000000000000003 RDI: ffff8800c7f8e290 RBP: ffff8800c7f97b18 R08: 000000000000bc00 R09: 0000000000007e76 R10: 0000000000000001 R11: 000000000000002f R12: ffff8800c7f8e290 R13: 00000000000154c0 R14: 0000000000000003 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88021e2c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000028 CR3: 0000000210675000 CR4: 00000000001407e0 Stack: ffff8800c7f97b38 ffffffff8108dcd5 ffff8800c7f97b38 ffff88021e2d54c0 ffff8800c7f97b88 ffffffff816d1500 ffff880213d42368 ffff8800c7f8e290 ffff8800c7f97b88 ffff8800c7f97fd8 ffff8800c7f8e710 0000000000000246 Call Trace: [] wq_worker_sleeping+0x15/0xa0 [] __schedule+0x6e0/0x940 [] schedule+0x37/0x90 [] do_exit+0x6bc/0xb40 [] oops_end+0x9f/0xe0 [] die+0x4b/0x70 [] do_general_protection+0xe2/0x170 [] general_protection+0x28/0x30 [] ? __schedule+0x361/0x940 [] ? fw_free_buf+0xc/0x40 [] ? __schedule+0x361/0x940 [] release_firmware+0x58/0x80 [] ? __schedule+0x361/0x940 [] test_mod_cb+0x2c/0x43 [test] [] request_firmware_work_func+0x5e/0x80 [] ? __schedule+0x361/0x940 [] process_one_work+0x14a/0x3f0 [] worker_thread+0x121/0x460 [] ? rescuer_thread+0x310/0x310 [] kthread+0xc9/0xe0 [] ? kthread_create_on_node+0x180/0x180 [] ret_from_fork+0x58/0x90 [] ? kthread_create_on_node+0x180/0x180 Code: 00 48 89 e5 5d 48 8b 40 c8 48 c1 e8 02 83 e0 01 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 8b 87 30 05 00 00 55 48 89 e5 <48> 8b 40 d8 5d c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 RIP [] kthread_data+0x10/0x20 RSP CR2: ffffffffffffffd8 ---[ end trace 4e62c56a58d0eac2 ]--- Fixing recursive fault but reboot is needed! Cc: Linus Torvalds Cc: Rusty Russell Cc: David Howells Cc: Ming Lei Cc: Seth Forshee Cc: Kyle McMartin Generated-by: Coccinelle SmPL Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_class.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 9ffa70762473..62e46116d1c3 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -1256,6 +1256,7 @@ static void request_firmware_work_func(struct work_struct *work) put_device(fw_work->device); /* taken in request_firmware_nowait() */ module_put(fw_work->module); + kfree_const(fw_work->name); kfree(fw_work); } @@ -1295,7 +1296,9 @@ request_firmware_nowait( return -ENOMEM; fw_work->module = module; - fw_work->name = name; + fw_work->name = kstrdup_const(name, gfp); + if (!fw_work->name) + return -ENOMEM; fw_work->device = device; fw_work->context = context; fw_work->cont = cont; @@ -1303,6 +1306,7 @@ request_firmware_nowait( (uevent ? FW_OPT_UEVENT : FW_OPT_USERHELPER); if (!try_module_get(module)) { + kfree_const(fw_work->name); kfree(fw_work); return -EFAULT; } From e0fd9b1d9c44c0966e322046912a0a0bce237d42 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 12 May 2015 14:49:43 -0700 Subject: [PATCH 20/31] firmware: use const for remaining firmware names We currently use flexible arrays with a char at the end for the remaining internal firmware name uses. There are two limitations with the way we use this. Since we're using a flexible array for a string on the struct if we wanted to use two strings it means we'd have a disjoint means of handling the strings, one using the flexible array, and another a char * pointer. We're also currently not using 'const' for the string. We wish to later extend some firmware data structures with other string/char pointers, but we also want to be very pedantic about const usage. Since we're going to change things to use 'const' we might as well also address unified way to use multiple strings on the structs. Replace the flexible array practice for strings with kstrdup_const() and kfree_const(), this will avoid allocations when the vmlinux .rodata is used, and just allocate a new proper string for us when needed. This also means we can simplify the struct allocations by removing the string length from the allocation size computation, which would otherwise get even more complicated when supporting multiple strings. Cc: Linus Torvalds Cc: Rusty Russell Cc: David Howells Cc: Ming Lei Cc: Seth Forshee Cc: Kyle McMartin Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_class.c | 40 ++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 62e46116d1c3..8c3aa3c2e94e 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -150,17 +150,17 @@ struct firmware_buf { int page_array_size; struct list_head pending_list; #endif - char fw_id[]; + const char *fw_id; }; struct fw_cache_entry { struct list_head list; - char name[]; + const char *name; }; struct fw_name_devm { unsigned long magic; - char name[]; + const char *name; }; #define to_fwbuf(d) container_of(d, struct firmware_buf, ref) @@ -181,13 +181,17 @@ static struct firmware_buf *__allocate_fw_buf(const char *fw_name, { struct firmware_buf *buf; - buf = kzalloc(sizeof(*buf) + strlen(fw_name) + 1, GFP_ATOMIC); - + buf = kzalloc(sizeof(*buf), GFP_ATOMIC); if (!buf) - return buf; + return NULL; + + buf->fw_id = kstrdup_const(fw_name, GFP_ATOMIC); + if (!buf->fw_id) { + kfree(buf); + return NULL; + } kref_init(&buf->ref); - strcpy(buf->fw_id, fw_name); buf->fwc = fwc; init_completion(&buf->completion); #ifdef CONFIG_FW_LOADER_USER_HELPER @@ -257,6 +261,7 @@ static void __fw_free_buf(struct kref *ref) } else #endif vfree(buf->data); + kfree_const(buf->fw_id); kfree(buf); } @@ -401,6 +406,7 @@ static void fw_name_devm_release(struct device *dev, void *res) if (fwn->magic == (unsigned long)&fw_cache) pr_debug("%s: fw_name-%s devm-%p released\n", __func__, fwn->name, res); + kfree_const(fwn->name); } static int fw_devm_match(struct device *dev, void *res, @@ -431,13 +437,17 @@ static int fw_add_devm_name(struct device *dev, const char *name) if (fwn) return 1; - fwn = devres_alloc(fw_name_devm_release, sizeof(struct fw_name_devm) + - strlen(name) + 1, GFP_KERNEL); + fwn = devres_alloc(fw_name_devm_release, sizeof(struct fw_name_devm), + GFP_KERNEL); if (!fwn) return -ENOMEM; + fwn->name = kstrdup_const(name, GFP_KERNEL); + if (!fwn->name) { + kfree(fwn); + return -ENOMEM; + } fwn->magic = (unsigned long)&fw_cache; - strcpy(fwn->name, name); devres_add(dev, fwn); return 0; @@ -1397,11 +1407,16 @@ static struct fw_cache_entry *alloc_fw_cache_entry(const char *name) { struct fw_cache_entry *fce; - fce = kzalloc(sizeof(*fce) + strlen(name) + 1, GFP_ATOMIC); + fce = kzalloc(sizeof(*fce), GFP_ATOMIC); if (!fce) goto exit; - strcpy(fce->name, name); + fce->name = kstrdup_const(name, GFP_ATOMIC); + if (!fce->name) { + kfree(fce); + fce = NULL; + goto exit; + } exit: return fce; } @@ -1441,6 +1456,7 @@ found: static void free_fw_cache_entry(struct fw_cache_entry *fce) { + kfree_const(fce->name); kfree(fce); } From 36d4b29260753ad78b1ce4363145332c02519adc Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Tue, 26 May 2015 09:31:23 +0200 Subject: [PATCH 21/31] base/platform: Only insert MEM and IO resources platform_device_del only checks the type of the resource in order to call release_resource. On the other hand, platform_device_add calls insert_resource for any resource that has a parent. Make both code branches balanced. Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 063f0ab15259..46a56f694cec 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -341,19 +341,23 @@ int platform_device_add(struct platform_device *pdev) for (i = 0; i < pdev->num_resources; i++) { struct resource *p, *r = &pdev->resource[i]; + unsigned long type = resource_type(r); if (r->name == NULL) r->name = dev_name(&pdev->dev); + if (!(type == IORESOURCE_MEM || type == IORESOURCE_IO)) + continue; + p = r->parent; if (!p) { - if (resource_type(r) == IORESOURCE_MEM) + if (type == IORESOURCE_MEM) p = &iomem_resource; - else if (resource_type(r) == IORESOURCE_IO) + else if (type == IORESOURCE_IO) p = &ioport_resource; } - if (p && insert_resource(p, r)) { + if (insert_resource(p, r)) { dev_err(&pdev->dev, "failed to claim resource %d\n", i); ret = -EBUSY; goto failed; From e50e69d1ac4232af0b6890f16929bf5ceee81538 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Tue, 26 May 2015 09:31:24 +0200 Subject: [PATCH 22/31] base/platform: Continue on insert_resource() error insert_resource() can fail when the resource added overlaps (partially or fully) with another. Device tree and AMBA devices may contain resources that overlap, so they could not call platform_device_add (see 02bbde7849e6 ('Revert "of: use platform_device_add"'))" On the other hand, device trees are released using platform_device_unregister(). This function calls platform_device_del(), which calls release_resource(), that crashes when the resource has not been added with with insert_resource. This was not an issue when the device tree could not be modified online, but this is not the case anymore. This patch let the flow continue when there is an insert error, after notifying the user with a dev_err(). r->parent is set to NULL, so platform_device_del() knows that the resource was not added, and therefore it should not be released. Acked-by: Rob Herring Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 46a56f694cec..5a29387e5ff6 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -332,7 +332,7 @@ int platform_device_add(struct platform_device *pdev) */ ret = ida_simple_get(&platform_devid_ida, 0, 0, GFP_KERNEL); if (ret < 0) - goto err_out; + return ret; pdev->id = ret; pdev->id_auto = true; dev_set_name(&pdev->dev, "%s.%d.auto", pdev->name, pdev->id); @@ -340,7 +340,7 @@ int platform_device_add(struct platform_device *pdev) } for (i = 0; i < pdev->num_resources; i++) { - struct resource *p, *r = &pdev->resource[i]; + struct resource *conflict, *p, *r = &pdev->resource[i]; unsigned long type = resource_type(r); if (r->name == NULL) @@ -357,11 +357,14 @@ int platform_device_add(struct platform_device *pdev) p = &ioport_resource; } - if (insert_resource(p, r)) { - dev_err(&pdev->dev, "failed to claim resource %d\n", i); - ret = -EBUSY; - goto failed; - } + conflict = insert_resource_conflict(p, r); + if (!conflict) + continue; + + dev_err(&pdev->dev, + "ignoring resource %pR (conflicts with %s %pR)\n", + r, conflict->name, conflict); + p->parent = NULL; } pr_debug("Registering platform device '%s'. Parent at %s\n", @@ -371,7 +374,7 @@ int platform_device_add(struct platform_device *pdev) if (ret == 0) return ret; - failed: + /* Failure path */ if (pdev->id_auto) { ida_simple_remove(&platform_devid_ida, pdev->id); pdev->id = PLATFORM_DEVID_AUTO; @@ -381,11 +384,11 @@ int platform_device_add(struct platform_device *pdev) struct resource *r = &pdev->resource[i]; unsigned long type = resource_type(r); - if (type == IORESOURCE_MEM || type == IORESOURCE_IO) + if ((type == IORESOURCE_MEM || type == IORESOURCE_IO) && + r->parent) release_resource(r); } - err_out: return ret; } EXPORT_SYMBOL_GPL(platform_device_add); @@ -414,7 +417,8 @@ void platform_device_del(struct platform_device *pdev) struct resource *r = &pdev->resource[i]; unsigned long type = resource_type(r); - if (type == IORESOURCE_MEM || type == IORESOURCE_IO) + if ((type == IORESOURCE_MEM || type == IORESOURCE_IO) && + r->parent) release_resource(r); } } From b6d2233f2916fa9338786aeab2e936c5a07e4d0c Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Tue, 26 May 2015 09:31:25 +0200 Subject: [PATCH 23/31] of/platform: Use platform_device interface of_platform_device_create_pdata() was using of_device_add() to create the devices, but of_platform_device_destroy was using platform_device_unregister() to free them. of_device_add(), do not call insert_resource(), which initializes the parent field of the resource structure, needed by release_resource(), called by of_platform_device_destroy(). This leads to a NULL pointer deference. Instead of fixing the NULL pointer deference, what could hide other bugs, this patch, replaces of_device_add() with platform_device_data(). Signed-off-by: Ricardo Ribalda Delgado Acked-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/platform.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/of/platform.c b/drivers/of/platform.c index a01f57c9e34e..f011f57f835a 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -183,8 +183,9 @@ static struct platform_device *of_platform_device_create_pdata( dev->dev.bus = &platform_bus_type; dev->dev.platform_data = platform_data; of_dma_configure(&dev->dev, dev->dev.of_node); + dev->name = dev_name(&dev->dev); - if (of_device_add(dev) != 0) { + if (platform_device_add(dev) != 0) { of_dma_deconfigure(&dev->dev); platform_device_put(dev); goto err_clear_flag; From 6d9d4b1469b0d9748145e168fc9ec585e1f3f4b0 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Tue, 26 May 2015 09:31:26 +0200 Subject: [PATCH 24/31] base/platform: Remove code duplication Failure path of platform_device_add was almost the same as platform_device_del. Refactor same code in a function. Acked-by: Rob Herring Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 60 +++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 5a29387e5ff6..cba8e0e83bfc 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -298,6 +298,25 @@ int platform_device_add_data(struct platform_device *pdev, const void *data, } EXPORT_SYMBOL_GPL(platform_device_add_data); +static void platform_device_cleanout(struct platform_device *pdev, int n_res) +{ + int i; + + if (pdev->id_auto) { + ida_simple_remove(&platform_devid_ida, pdev->id); + pdev->id = PLATFORM_DEVID_AUTO; + } + + for (i = 0; i < n_res; i++) { + struct resource *r = &pdev->resource[i]; + unsigned long type = resource_type(r); + + if ((type == IORESOURCE_MEM || type == IORESOURCE_IO) && + r->parent) + release_resource(r); + } +} + /** * platform_device_add - add a platform device to device hierarchy * @pdev: platform device we're adding @@ -371,23 +390,8 @@ int platform_device_add(struct platform_device *pdev) dev_name(&pdev->dev), dev_name(pdev->dev.parent)); ret = device_add(&pdev->dev); - if (ret == 0) - return ret; - - /* Failure path */ - if (pdev->id_auto) { - ida_simple_remove(&platform_devid_ida, pdev->id); - pdev->id = PLATFORM_DEVID_AUTO; - } - - while (--i >= 0) { - struct resource *r = &pdev->resource[i]; - unsigned long type = resource_type(r); - - if ((type == IORESOURCE_MEM || type == IORESOURCE_IO) && - r->parent) - release_resource(r); - } + if (ret) + platform_device_cleanout(pdev, i); return ret; } @@ -403,25 +407,11 @@ EXPORT_SYMBOL_GPL(platform_device_add); */ void platform_device_del(struct platform_device *pdev) { - int i; + if (!pdev) + return; - if (pdev) { - device_del(&pdev->dev); - - if (pdev->id_auto) { - ida_simple_remove(&platform_devid_ida, pdev->id); - pdev->id = PLATFORM_DEVID_AUTO; - } - - for (i = 0; i < pdev->num_resources; i++) { - struct resource *r = &pdev->resource[i]; - unsigned long type = resource_type(r); - - if ((type == IORESOURCE_MEM || type == IORESOURCE_IO) && - r->parent) - release_resource(r); - } - } + device_del(&pdev->dev); + platform_device_cleanout(pdev, pdev->num_resources); } EXPORT_SYMBOL_GPL(platform_device_del); From 9ba8af66432cb8e82553f2e273eb11db0cec7d2d Mon Sep 17 00:00:00 2001 From: Shailendra Verma Date: Mon, 25 May 2015 23:46:11 +0530 Subject: [PATCH 25/31] base:dd - Fix for typo in comment to function driver_deferred_probe_trigger(). Signed-off-by: Shailendra Verma Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 8da8e071f01d..a638bbb1a27a 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -141,7 +141,7 @@ static bool driver_deferred_probe_enable = false; * more than one device is probing at the same time, it is possible for one * probe to complete successfully while another is about to defer. If the second * depends on the first, then it will get put on the pending list after the - * trigger event has already occured and will be stuck there. + * trigger event has already occurred and will be stuck there. * * The atomic 'deferred_trigger_count' is used to determine if a successful * trigger has occurred in the midst of probing a driver. If the trigger count From eaa5cd926345f86e9df1eb6b0490da539f5ce7d0 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Fri, 22 May 2015 00:21:16 +0300 Subject: [PATCH 26/31] fs: sysfs: don't pass count == 0 to bin file readers If count == 0 bytes are requested by a reader, sysfs_kf_bin_read() deliberately returns 0 without passing a potentially harmful value to some externally defined underlying battr->read() function. However in case of (pos == size && count) the next clause always sets count to 0 and this value is handed over to battr->read(). The change intends to make obsolete (and remove later) a redundant sanity check in battr->read(), if it is present, or add more protection to struct bin_attribute users, who does not care about input arguments. Signed-off-by: Vladimir Zapolskiy Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 7c2867b44141..6c95628ea377 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -90,7 +90,7 @@ static ssize_t sysfs_kf_bin_read(struct kernfs_open_file *of, char *buf, return 0; if (size) { - if (pos > size) + if (pos >= size) return 0; if (pos + count > size) count = size - pos; From 303cda0ea7c1c33701812ccb80d37083a4093c7c Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 28 May 2015 17:46:42 -0700 Subject: [PATCH 27/31] firmware: add missing kfree for work on async call The recent fix to use kstrdup_const() failed to add a kfree upon failure of name allocation... Cc: Ming Lei Cc: Seth Forshee Cc: Kyle McMartin Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_class.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 8c3aa3c2e94e..9c4288362a8e 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -1307,8 +1307,10 @@ request_firmware_nowait( fw_work->module = module; fw_work->name = kstrdup_const(name, gfp); - if (!fw_work->name) + if (!fw_work->name) { + kfree(fw_work); return -ENOMEM; + } fw_work->device = device; fw_work->context = context; fw_work->cont = cont; From 8b2dcebae330fb6dffc7717b740aa4b2c4d00451 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 10 Jun 2015 08:36:50 -0700 Subject: [PATCH 28/31] Revert "base/platform: Remove code duplication" This reverts commit 6d9d4b1469b0d9748145e168fc9ec585e1f3f4b0 as it breaks working machines. Cc: Rob Herring Cc: Ricardo Ribalda Delgado Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 60 ++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index cba8e0e83bfc..5a29387e5ff6 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -298,25 +298,6 @@ int platform_device_add_data(struct platform_device *pdev, const void *data, } EXPORT_SYMBOL_GPL(platform_device_add_data); -static void platform_device_cleanout(struct platform_device *pdev, int n_res) -{ - int i; - - if (pdev->id_auto) { - ida_simple_remove(&platform_devid_ida, pdev->id); - pdev->id = PLATFORM_DEVID_AUTO; - } - - for (i = 0; i < n_res; i++) { - struct resource *r = &pdev->resource[i]; - unsigned long type = resource_type(r); - - if ((type == IORESOURCE_MEM || type == IORESOURCE_IO) && - r->parent) - release_resource(r); - } -} - /** * platform_device_add - add a platform device to device hierarchy * @pdev: platform device we're adding @@ -390,8 +371,23 @@ int platform_device_add(struct platform_device *pdev) dev_name(&pdev->dev), dev_name(pdev->dev.parent)); ret = device_add(&pdev->dev); - if (ret) - platform_device_cleanout(pdev, i); + if (ret == 0) + return ret; + + /* Failure path */ + if (pdev->id_auto) { + ida_simple_remove(&platform_devid_ida, pdev->id); + pdev->id = PLATFORM_DEVID_AUTO; + } + + while (--i >= 0) { + struct resource *r = &pdev->resource[i]; + unsigned long type = resource_type(r); + + if ((type == IORESOURCE_MEM || type == IORESOURCE_IO) && + r->parent) + release_resource(r); + } return ret; } @@ -407,11 +403,25 @@ EXPORT_SYMBOL_GPL(platform_device_add); */ void platform_device_del(struct platform_device *pdev) { - if (!pdev) - return; + int i; - device_del(&pdev->dev); - platform_device_cleanout(pdev, pdev->num_resources); + if (pdev) { + device_del(&pdev->dev); + + if (pdev->id_auto) { + ida_simple_remove(&platform_devid_ida, pdev->id); + pdev->id = PLATFORM_DEVID_AUTO; + } + + for (i = 0; i < pdev->num_resources; i++) { + struct resource *r = &pdev->resource[i]; + unsigned long type = resource_type(r); + + if ((type == IORESOURCE_MEM || type == IORESOURCE_IO) && + r->parent) + release_resource(r); + } + } } EXPORT_SYMBOL_GPL(platform_device_del); From 8171d5f7bf26849db51e8eda2bc19c92d7462606 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 10 Jun 2015 08:37:35 -0700 Subject: [PATCH 29/31] Revert "of/platform: Use platform_device interface" This reverts commit b6d2233f2916fa9338786aeab2e936c5a07e4d0c as it breaks working machines. Cc: Rob Herring Cc: Ricardo Ribalda Delgado Signed-off-by: Greg Kroah-Hartman --- drivers/of/platform.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/of/platform.c b/drivers/of/platform.c index f011f57f835a..a01f57c9e34e 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -183,9 +183,8 @@ static struct platform_device *of_platform_device_create_pdata( dev->dev.bus = &platform_bus_type; dev->dev.platform_data = platform_data; of_dma_configure(&dev->dev, dev->dev.of_node); - dev->name = dev_name(&dev->dev); - if (platform_device_add(dev) != 0) { + if (of_device_add(dev) != 0) { of_dma_deconfigure(&dev->dev); platform_device_put(dev); goto err_clear_flag; From 5da7f70997f772d7605c11d9e00018ffac463d92 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 10 Jun 2015 08:38:02 -0700 Subject: [PATCH 30/31] Revert "base/platform: Continue on insert_resource() error" This reverts commit e50e69d1ac4232af0b6890f16929bf5ceee81538 as it breaks working machines. Cc: Rob Herring Cc: Ricardo Ribalda Delgado Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 5a29387e5ff6..46a56f694cec 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -332,7 +332,7 @@ int platform_device_add(struct platform_device *pdev) */ ret = ida_simple_get(&platform_devid_ida, 0, 0, GFP_KERNEL); if (ret < 0) - return ret; + goto err_out; pdev->id = ret; pdev->id_auto = true; dev_set_name(&pdev->dev, "%s.%d.auto", pdev->name, pdev->id); @@ -340,7 +340,7 @@ int platform_device_add(struct platform_device *pdev) } for (i = 0; i < pdev->num_resources; i++) { - struct resource *conflict, *p, *r = &pdev->resource[i]; + struct resource *p, *r = &pdev->resource[i]; unsigned long type = resource_type(r); if (r->name == NULL) @@ -357,14 +357,11 @@ int platform_device_add(struct platform_device *pdev) p = &ioport_resource; } - conflict = insert_resource_conflict(p, r); - if (!conflict) - continue; - - dev_err(&pdev->dev, - "ignoring resource %pR (conflicts with %s %pR)\n", - r, conflict->name, conflict); - p->parent = NULL; + if (insert_resource(p, r)) { + dev_err(&pdev->dev, "failed to claim resource %d\n", i); + ret = -EBUSY; + goto failed; + } } pr_debug("Registering platform device '%s'. Parent at %s\n", @@ -374,7 +371,7 @@ int platform_device_add(struct platform_device *pdev) if (ret == 0) return ret; - /* Failure path */ + failed: if (pdev->id_auto) { ida_simple_remove(&platform_devid_ida, pdev->id); pdev->id = PLATFORM_DEVID_AUTO; @@ -384,11 +381,11 @@ int platform_device_add(struct platform_device *pdev) struct resource *r = &pdev->resource[i]; unsigned long type = resource_type(r); - if ((type == IORESOURCE_MEM || type == IORESOURCE_IO) && - r->parent) + if (type == IORESOURCE_MEM || type == IORESOURCE_IO) release_resource(r); } + err_out: return ret; } EXPORT_SYMBOL_GPL(platform_device_add); @@ -417,8 +414,7 @@ void platform_device_del(struct platform_device *pdev) struct resource *r = &pdev->resource[i]; unsigned long type = resource_type(r); - if ((type == IORESOURCE_MEM || type == IORESOURCE_IO) && - r->parent) + if (type == IORESOURCE_MEM || type == IORESOURCE_IO) release_resource(r); } } From 0e6c861f73ec42ab5c89fda9892f2173c7aaf6cf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 10 Jun 2015 08:38:29 -0700 Subject: [PATCH 31/31] Revert "base/platform: Only insert MEM and IO resources" This reverts commit 36d4b29260753ad78b1ce4363145332c02519adc as it breaks working machines. Cc: Rob Herring Cc: Ricardo Ribalda Delgado Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 46a56f694cec..063f0ab15259 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -341,23 +341,19 @@ int platform_device_add(struct platform_device *pdev) for (i = 0; i < pdev->num_resources; i++) { struct resource *p, *r = &pdev->resource[i]; - unsigned long type = resource_type(r); if (r->name == NULL) r->name = dev_name(&pdev->dev); - if (!(type == IORESOURCE_MEM || type == IORESOURCE_IO)) - continue; - p = r->parent; if (!p) { - if (type == IORESOURCE_MEM) + if (resource_type(r) == IORESOURCE_MEM) p = &iomem_resource; - else if (type == IORESOURCE_IO) + else if (resource_type(r) == IORESOURCE_IO) p = &ioport_resource; } - if (insert_resource(p, r)) { + if (p && insert_resource(p, r)) { dev_err(&pdev->dev, "failed to claim resource %d\n", i); ret = -EBUSY; goto failed;