From f623f75ae443d0c771635d51cc986b9d389bf631 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Aug 2018 00:35:24 +0200 Subject: [PATCH 001/242] rfkill-gpio: include linux/mod_devicetable.h One more driver is apparently broken by the recent change to linux/platform_device.h: net/rfkill/rfkill-gpio.c: In function 'rfkill_gpio_acpi_probe': net/rfkill/rfkill-gpio.c:82:29: error: dereferencing pointer to incomplete type 'const struct acpi_device_id' Include linux/mod_devicetable.h to get the definition of the acpi_device_id structure. Fixes: ac3167257b9f ("headers: separate linux/mod_devicetable.h from linux/platform_device.h") Signed-off-by: Arnd Bergmann Signed-off-by: Johannes Berg --- net/rfkill/rfkill-gpio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 00192a996be0..0f8465852254 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include From 77cfaf52eca5cac30ed029507e0cab065f888995 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 13 Aug 2018 14:16:25 +0200 Subject: [PATCH 002/242] mac80211: Run TXQ teardown code before de-registering interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TXQ teardown code can reference the vif data structures that are stored in the netdev private memory area if there are still packets on the queue when it is being freed. Since the TXQ teardown code is run after the netdevs are freed, this can lead to a use-after-free. Fix this by moving the TXQ teardown code to earlier in ieee80211_unregister_hw(). Reported-by: Ben Greear Tested-by: Ben Greear Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index fb73451ed85e..0358f20b675f 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1182,6 +1182,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&local->ifa6_notifier); #endif + ieee80211_txq_teardown_flows(local); rtnl_lock(); @@ -1210,7 +1211,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) skb_queue_purge(&local->skb_queue); skb_queue_purge(&local->skb_queue_unreliable); skb_queue_purge(&local->skb_queue_tdls_chsw); - ieee80211_txq_teardown_flows(local); destroy_workqueue(local->workqueue); wiphy_unregister(local->hw.wiphy); From 484004339d4514fde425f6e8a9f6a6cc979bb0c3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 Aug 2018 18:17:03 +0200 Subject: [PATCH 003/242] mac80211_hwsim: require at least one channel Syzbot continues to try to create mac80211_hwsim radios, and manages to pass parameters that are later checked with WARN_ON in cfg80211 - catch another one in hwsim directly. Reported-by: syzbot+2a12f11c306afe871c1f@syzkaller.appspotmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 18e819d964f1..fe1b0108f06d 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3194,6 +3194,11 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) if (info->attrs[HWSIM_ATTR_CHANNELS]) param.channels = nla_get_u32(info->attrs[HWSIM_ATTR_CHANNELS]); + if (param.channels < 1) { + GENL_SET_ERR_MSG(info, "must have at least one channel"); + return -EINVAL; + } + if (param.channels > CFG80211_MAX_NUM_DIFFERENT_CHANNELS) { GENL_SET_ERR_MSG(info, "too many channels specified"); return -EINVAL; From 4769c003e0fcff0ee001a9102e2605bdaa5880f0 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 8 Aug 2018 01:07:03 -0700 Subject: [PATCH 004/242] ARM: OMAP2+: Fix null hwmod for ti-sysc debug We may call omap_hwmod_parse_module_range() with no hwmod allocated yet and may have debug enabled. Let's fix this by checking for hwmod before trying to use it's name. Fixes: 6c72b3550672 ("ARM: OMAP2+: Parse module IO range from dts for legacy Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 2ceffd85dd3d..7f759abcf49c 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2220,7 +2220,7 @@ int omap_hwmod_parse_module_range(struct omap_hwmod *oh, size = be32_to_cpup(ranges); pr_debug("omap_hwmod: %s %s at 0x%llx size 0x%llx\n", - oh->name, np->name, base, size); + oh ? oh->name : "", np->name, base, size); res->start = base; res->end = base + size - 1; From 1dbcb97c656eed1a244c960b8b3a469c3d20ce7b Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 8 Aug 2018 01:07:04 -0700 Subject: [PATCH 005/242] ARM: OMAP2+: Fix module address for modules using mpu_rt_idx If we use device tree data for a module interconnect target we want to map the control registers from the module start. Legacy hwmod platform data however is using child IP offsets for cpsw module with mpu_rt_idx. In cases where we have the interconnect target module already using device tree data with legacy hwmod platform data still around, the sysc register area is not adjusted for mpu_rt_idx causing wrong registers being accessed. Let's fix the issue for mixed dts and platform data mode by ioremapping the module registers using child IP offset if mpu_rt_idx is set. For device tree only data there's no reason to use mpu_rt_idx. Fixes: 6c72b3550672 ("ARM: OMAP2+: Parse module IO range from dts for legacy "ti,hwmods" support") Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod.c | 37 ++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 7f759abcf49c..cd65ea4e9c54 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2160,6 +2160,37 @@ static int of_dev_hwmod_lookup(struct device_node *np, return -ENODEV; } +/** + * omap_hwmod_fix_mpu_rt_idx - fix up mpu_rt_idx register offsets + * + * @oh: struct omap_hwmod * + * @np: struct device_node * + * + * Fix up module register offsets for modules with mpu_rt_idx. + * Only needed for cpsw with interconnect target module defined + * in device tree while still using legacy hwmod platform data + * for rev, sysc and syss registers. + * + * Can be removed when all cpsw hwmod platform data has been + * dropped. + */ +static void omap_hwmod_fix_mpu_rt_idx(struct omap_hwmod *oh, + struct device_node *np, + struct resource *res) +{ + struct device_node *child = NULL; + int error; + + child = of_get_next_child(np, child); + if (!child) + return; + + error = of_address_to_resource(child, oh->mpu_rt_idx, res); + if (error) + pr_err("%s: error mapping mpu_rt_idx: %i\n", + __func__, error); +} + /** * omap_hwmod_parse_module_range - map module IO range from device tree * @oh: struct omap_hwmod * @@ -2222,6 +2253,12 @@ int omap_hwmod_parse_module_range(struct omap_hwmod *oh, pr_debug("omap_hwmod: %s %s at 0x%llx size 0x%llx\n", oh ? oh->name : "", np->name, base, size); + if (oh && oh->mpu_rt_idx) { + omap_hwmod_fix_mpu_rt_idx(oh, np, res); + + return 0; + } + res->start = base; res->end = base + size - 1; res->flags = IORESOURCE_MEM; From 0ef8e3bb974af56346b34393e643d491d9141c66 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 8 Aug 2018 01:07:05 -0700 Subject: [PATCH 006/242] bus: ti-sysc: Fix module register ioremap for larger offsets We can have the interconnect target module control registers pretty much anywhere within the module range. The current code attempts an incomplete optimization of the ioremap size but does it wrong and it only works for registers at the beginning of the module. Let's just use the largest control register to calculate the ioremap size. The ioremapped range is for most part cached anyways so there is no need for size optimization. Let's also update the comments accordingly. Fixes: 0eecc636e5a2 ("bus: ti-sysc: Add minimal TI sysc interconnect target driver") Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 80d60f43db56..b31bf03ea497 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -490,32 +490,29 @@ static int sysc_check_registers(struct sysc *ddata) /** * syc_ioremap - ioremap register space for the interconnect target module - * @ddata: deviec driver data + * @ddata: device driver data * * Note that the interconnect target module registers can be anywhere - * within the first child device address space. For example, SGX has - * them at offset 0x1fc00 in the 32MB module address space. We just - * what we need around the interconnect target module registers. + * within the interconnect target module range. For example, SGX has + * them at offset 0x1fc00 in the 32MB module address space. And cpsw + * has them at offset 0x1200 in the CPSW_WR child. Usually the + * the interconnect target module registers are at the beginning of + * the module range though. */ static int sysc_ioremap(struct sysc *ddata) { - u32 size = 0; + int size; - if (ddata->offsets[SYSC_SYSSTATUS] >= 0) - size = ddata->offsets[SYSC_SYSSTATUS]; - else if (ddata->offsets[SYSC_SYSCONFIG] >= 0) - size = ddata->offsets[SYSC_SYSCONFIG]; - else if (ddata->offsets[SYSC_REVISION] >= 0) - size = ddata->offsets[SYSC_REVISION]; - else + size = max3(ddata->offsets[SYSC_REVISION], + ddata->offsets[SYSC_SYSCONFIG], + ddata->offsets[SYSC_SYSSTATUS]); + + if (size < 0 || (size + sizeof(u32)) > ddata->module_size) return -EINVAL; - size &= 0xfff00; - size += SZ_256; - ddata->module_va = devm_ioremap(ddata->dev, ddata->module_pa, - size); + size + sizeof(u32)); if (!ddata->module_va) return -EIO; From 8a54d8fc160e67ad485d95a0322ce1221f80770a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 18 Jun 2018 09:29:57 +0200 Subject: [PATCH 007/242] cfg80211: remove division by size of sizeof(struct ieee80211_wmm_rule) Pointer arithmetic already adjusts by the size of the struct, so the sizeof() calculation is wrong. This is basically the same as Colin King's patch for similar code in the iwlwifi driver. Fixes: 230ebaa189af ("cfg80211: read wmm rules from regulatory database") Signed-off-by: Johannes Berg --- net/wireless/reg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 4fc66a117b7d..283902974fbf 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -452,8 +452,7 @@ reg_copy_regd(const struct ieee80211_regdomain *src_regd) continue; regd->reg_rules[i].wmm_rule = d_wmm + - (src_regd->reg_rules[i].wmm_rule - s_wmm) / - sizeof(struct ieee80211_wmm_rule); + (src_regd->reg_rules[i].wmm_rule - s_wmm); } return regd; } From d3bc0fa8411c35194f99046157e2e26fe60e1d91 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 20 Aug 2018 13:55:45 +0200 Subject: [PATCH 008/242] fsnotify: fix false positive warning on inode delete When inode is getting deleted and someone else holds reference to a mark attached to the inode, we just detach the connector from the inode. In that case fsnotify_put_mark() called from fsnotify_destroy_marks() will decide to recalculate mask for the inode and __fsnotify_recalc_mask() will WARN about invalid connector type: WARNING: CPU: 1 PID: 12015 at fs/notify/mark.c:139 __fsnotify_recalc_mask+0x2d7/0x350 fs/notify/mark.c:139 Actually there's no reason to warn about detached connector in __fsnotify_recalc_mask() so just silently skip updating the mask in such case. Reported-by: syzbot+c34692a51b9a6ca93540@syzkaller.appspotmail.com Fixes: 3ac70bfcde81 ("fsnotify: add helper to get mask from connector") Signed-off-by: Jan Kara --- fs/notify/mark.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 05506d60131c..59cdb27826de 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -132,13 +132,13 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) struct fsnotify_mark *mark; assert_spin_locked(&conn->lock); + /* We can get detached connector here when inode is getting unlinked. */ + if (!fsnotify_valid_obj_type(conn->type)) + return; hlist_for_each_entry(mark, &conn->list, obj_list) { if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) new_mask |= mark->mask; } - if (WARN_ON(!fsnotify_valid_obj_type(conn->type))) - return; - *fsnotify_conn_mask_p(conn) = new_mask; } From f1506a69e3e72196c7c5ce4fd420d5e1a6965ed3 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 28 Jul 2018 16:17:49 -0300 Subject: [PATCH 009/242] thermal: qoriq: Use devm_thermal_zone_of_sensor_register() By using the managed devm_thermal_zone_of_sensor_register() we can drop the explicit call to thermal_zone_of_sensor_unregister() in the qoriq_tmu_remove() function, which simplifies the code a bit. So switch to devm_thermal_zone_of_sensor_register(). Signed-off-by: Fabio Estevam Reviewed-by: Daniel Lezcano Signed-off-by: Eduardo Valentin --- drivers/thermal/qoriq_thermal.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c index c866cc165960..e32d6ac79145 100644 --- a/drivers/thermal/qoriq_thermal.c +++ b/drivers/thermal/qoriq_thermal.c @@ -233,8 +233,9 @@ static int qoriq_tmu_probe(struct platform_device *pdev) if (ret < 0) goto err_tmu; - data->tz = thermal_zone_of_sensor_register(&pdev->dev, data->sensor_id, - data, &tmu_tz_ops); + data->tz = devm_thermal_zone_of_sensor_register(&pdev->dev, + data->sensor_id, + data, &tmu_tz_ops); if (IS_ERR(data->tz)) { ret = PTR_ERR(data->tz); dev_err(&pdev->dev, @@ -261,8 +262,6 @@ static int qoriq_tmu_remove(struct platform_device *pdev) { struct qoriq_tmu_data *data = platform_get_drvdata(pdev); - thermal_zone_of_sensor_unregister(&pdev->dev, data->tz); - /* Disable monitoring */ tmu_write(data, TMR_DISABLE, &data->regs->tmr); From 1a893a5a198eff228ddc1a364830f8928b8f9ac5 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 28 Jul 2018 16:17:50 -0300 Subject: [PATCH 010/242] thermal: qoriq: Simplify the 'site' variable assignment There is no need to assign zero to the variable 'site' and then perform a compound bitwise OR operation afterwards. Make it simpler by assigning the final 'site' value directly. Signed-off-by: Fabio Estevam Reviewed-by: Daniel Lezcano Signed-off-by: Eduardo Valentin --- drivers/thermal/qoriq_thermal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c index e32d6ac79145..f807e4d1f72e 100644 --- a/drivers/thermal/qoriq_thermal.c +++ b/drivers/thermal/qoriq_thermal.c @@ -197,7 +197,7 @@ static int qoriq_tmu_probe(struct platform_device *pdev) int ret; struct qoriq_tmu_data *data; struct device_node *np = pdev->dev.of_node; - u32 site = 0; + u32 site; if (!np) { dev_err(&pdev->dev, "Device OF-Node is NULL"); @@ -244,7 +244,7 @@ static int qoriq_tmu_probe(struct platform_device *pdev) } /* Enable monitoring */ - site |= 0x1 << (15 - data->sensor_id); + site = 0x1 << (15 - data->sensor_id); tmu_write(data, site | TMR_ME | TMR_ALPF, &data->regs->tmr); return 0; From 2dfef650217c0e24754cd4c3abbb43e98131a7cf Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 28 Jul 2018 16:17:51 -0300 Subject: [PATCH 011/242] thermal: qoriq: Switch to SPDX identifier Adopt the SPDX license identifier headers to ease license compliance management. Signed-off-by: Fabio Estevam Reviewed-by: Daniel Lezcano Acked-by: Philippe Ombredanne Signed-off-by: Eduardo Valentin --- drivers/thermal/qoriq_thermal.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c index f807e4d1f72e..450ed66edf58 100644 --- a/drivers/thermal/qoriq_thermal.c +++ b/drivers/thermal/qoriq_thermal.c @@ -1,16 +1,6 @@ -/* - * Copyright 2016 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ +// SPDX-License-Identifier: GPL-2.0 +// +// Copyright 2016 Freescale Semiconductor, Inc. #include #include From c954579087f4c0185206cfa777e697874b1e7d13 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 30 Jul 2018 07:56:06 +0000 Subject: [PATCH 012/242] thermal: rcar_thermal: convert to SPDX identifiers As original license mentioned, it is GPL-2.0 in SPDX. Then, MODULE_LICENSE() should be "GPL v2" instead of "GPL". See ${LINUX}/include/linux/module.h "GPL" [GNU Public License v2 or later] "GPL v2" [GNU Public License v2] Signed-off-by: Kuninori Morimoto Reviewed-by: Daniel Lezcano Reviewed-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Eduardo Valentin --- drivers/thermal/rcar_thermal.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index e77e63070e99..78f932822d38 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * R-Car THS/TSC thermal sensor driver * * Copyright (C) 2012 Renesas Solutions Corp. * Kuninori Morimoto - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ #include #include @@ -660,6 +648,6 @@ static struct platform_driver rcar_thermal_driver = { }; module_platform_driver(rcar_thermal_driver); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("R-Car THS/TSC thermal sensor driver"); MODULE_AUTHOR("Kuninori Morimoto "); From d316522d06e9894429ace4b5f99e181bde4e7bd7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 20 Aug 2018 11:42:35 -0700 Subject: [PATCH 013/242] thermal: rcar_gen3_thermal: convert to SPDX identifiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Kuninori Morimoto Reviewed-by: Geert Uytterhoeven Reviewed-by: Daniel Lezcano Reviewed-by: Niklas Söderlund Reviewed-by: Simon Horman Signed-off-by: Eduardo Valentin --- drivers/thermal/rcar_gen3_thermal.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c index 766521eb7071..7aed5337bdd3 100644 --- a/drivers/thermal/rcar_gen3_thermal.c +++ b/drivers/thermal/rcar_gen3_thermal.c @@ -1,19 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * R-Car Gen3 THS thermal sensor driver * Based on rcar_thermal.c and work from Hien Dang and Khiem Nguyen. * * Copyright (C) 2016 Renesas Electronics Corporation. * Copyright (C) 2016 Sang Engineering - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * */ #include #include From 152395fd03d4ce1e535a75cdbf58105e50587611 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 31 Jul 2018 00:56:49 +0800 Subject: [PATCH 014/242] thermal: of-thermal: disable passive polling when thermal zone is disabled When thermal zone is in passive mode, disabling its mode from sysfs is NOT taking effect at all, it is still polling the temperature of the disabled thermal zone and handling all thermal trips, it makes user confused. The disabling operation should disable the thermal zone behavior completely, for both active and passive mode, this patch clears the passive_delay when thermal zone is disabled and restores it when it is enabled. Signed-off-by: Anson Huang Signed-off-by: Eduardo Valentin --- drivers/thermal/of-thermal.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c index 977a8307fbb1..4f2816559205 100644 --- a/drivers/thermal/of-thermal.c +++ b/drivers/thermal/of-thermal.c @@ -260,10 +260,13 @@ static int of_thermal_set_mode(struct thermal_zone_device *tz, mutex_lock(&tz->lock); - if (mode == THERMAL_DEVICE_ENABLED) + if (mode == THERMAL_DEVICE_ENABLED) { tz->polling_delay = data->polling_delay; - else + tz->passive_delay = data->passive_delay; + } else { tz->polling_delay = 0; + tz->passive_delay = 0; + } mutex_unlock(&tz->lock); From 09a4e0be5826aa66c4ce9954841f110ffe63ef4f Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 16 Aug 2018 21:44:02 -0500 Subject: [PATCH 015/242] isofs: reject hardware sector size > 2048 bytes The largest block size supported by isofs is ISOFS_BLOCK_SIZE (2048), but isofs_fill_super calls sb_min_blocksize and sets the blocksize to the device's logical block size if it's larger than what we ended up with after option parsing. If for some reason we try to mount a hard 4k device as an isofs filesystem, we'll set opt.blocksize to 4096, and when we try to read the superblock we found via: block = iso_blknum << (ISOFS_BLOCK_BITS - s->s_blocksize_bits) with s_blocksize_bits greater than ISOFS_BLOCK_BITS, we'll have a negative shift and the bread will fail somewhat cryptically: isofs_fill_super: bread failed, dev=sda, iso_blknum=17, block=-2147483648 It seems best to just catch and clearly reject mounts of such a device. Reported-by: Bryan Gurney Signed-off-by: Eric Sandeen Signed-off-by: Jan Kara --- fs/isofs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index ec3fba7d492f..488a9e7f8f66 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "isofs.h" #include "zisofs.h" @@ -653,6 +654,12 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) /* * What if bugger tells us to go beyond page size? */ + if (bdev_logical_block_size(s->s_bdev) > 2048) { + printk(KERN_WARNING + "ISOFS: unsupported/invalid hardware sector size %d\n", + bdev_logical_block_size(s->s_bdev)); + goto out_freesbi; + } opt.blocksize = sb_min_blocksize(s, opt.blocksize); sbi->s_high_sierra = 0; /* default is iso9660 */ From 19f5e9e015675fcdbf2c20e804b2e84e80201454 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Mon, 20 Aug 2018 10:54:44 +0200 Subject: [PATCH 016/242] mmc: atmel-mci: fix bad logic of sg_copy_{from,to}_buffer conversion The conversion to sg_copy_{from,to}_buffer has been done in the wrong way. sg_copy_to_buffer is a copy from an SG list to a linear buffer so it can't replace memcpy(buf + offset, &value, remaining) where buf is the virtual address of the SG. Same for sg_copy_to_buffer but in the opposite way. Signed-off-by: Ludovic Desroches Suggested-by: Douglas Gilbert Fixes: 5b4277814e3f ("mmc: atmel-mci: use sg_copy_{from,to}_buffer") Signed-off-by: Ulf Hansson --- drivers/mmc/host/atmel-mci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index 5aa2c9404e92..be53044086c7 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -1976,7 +1976,7 @@ static void atmci_read_data_pio(struct atmel_mci *host) do { value = atmci_readl(host, ATMCI_RDR); if (likely(offset + 4 <= sg->length)) { - sg_pcopy_to_buffer(sg, 1, &value, sizeof(u32), offset); + sg_pcopy_from_buffer(sg, 1, &value, sizeof(u32), offset); offset += 4; nbytes += 4; @@ -1993,7 +1993,7 @@ static void atmci_read_data_pio(struct atmel_mci *host) } else { unsigned int remaining = sg->length - offset; - sg_pcopy_to_buffer(sg, 1, &value, remaining, offset); + sg_pcopy_from_buffer(sg, 1, &value, remaining, offset); nbytes += remaining; flush_dcache_page(sg_page(sg)); @@ -2003,7 +2003,7 @@ static void atmci_read_data_pio(struct atmel_mci *host) goto done; offset = 4 - remaining; - sg_pcopy_to_buffer(sg, 1, (u8 *)&value + remaining, + sg_pcopy_from_buffer(sg, 1, (u8 *)&value + remaining, offset, 0); nbytes += offset; } @@ -2042,7 +2042,7 @@ static void atmci_write_data_pio(struct atmel_mci *host) do { if (likely(offset + 4 <= sg->length)) { - sg_pcopy_from_buffer(sg, 1, &value, sizeof(u32), offset); + sg_pcopy_to_buffer(sg, 1, &value, sizeof(u32), offset); atmci_writel(host, ATMCI_TDR, value); offset += 4; @@ -2059,7 +2059,7 @@ static void atmci_write_data_pio(struct atmel_mci *host) unsigned int remaining = sg->length - offset; value = 0; - sg_pcopy_from_buffer(sg, 1, &value, remaining, offset); + sg_pcopy_to_buffer(sg, 1, &value, remaining, offset); nbytes += remaining; host->sg = sg = sg_next(sg); @@ -2070,7 +2070,7 @@ static void atmci_write_data_pio(struct atmel_mci *host) } offset = 4 - remaining; - sg_pcopy_from_buffer(sg, 1, (u8 *)&value + remaining, + sg_pcopy_to_buffer(sg, 1, (u8 *)&value + remaining, offset, 0); atmci_writel(host, ATMCI_TDR, value); nbytes += offset; From 17e96d8516e31c3cb52cb8e2ee79d1d2e6948c11 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Mon, 20 Aug 2018 10:54:45 +0200 Subject: [PATCH 017/242] mmc: android-goldfish: fix bad logic of sg_copy_{from,to}_buffer conversion The conversion to sg_copy_{from,to}_buffer has been done in the wrong way. sg_copy_to_buffer is a copy from an SG list to a linear buffer so it can't replace memcpy(dest, host->virt_base, data->sg->length) where dest is the virtual address of the SG. Same for sg_copy_from_buffer but in the opposite way. Signed-off-by: Ludovic Desroches Suggested-by: Douglas Gilbert Fixes: 53d7e098ba08 ("mmc: android-goldfish: use sg_copy_{from,to}_buffer") Signed-off-by: Ulf Hansson --- drivers/mmc/host/android-goldfish.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/android-goldfish.c b/drivers/mmc/host/android-goldfish.c index 294de177632c..61e4e2a213c9 100644 --- a/drivers/mmc/host/android-goldfish.c +++ b/drivers/mmc/host/android-goldfish.c @@ -217,7 +217,7 @@ static void goldfish_mmc_xfer_done(struct goldfish_mmc_host *host, * We don't really have DMA, so we need * to copy from our platform driver buffer */ - sg_copy_to_buffer(data->sg, 1, host->virt_base, + sg_copy_from_buffer(data->sg, 1, host->virt_base, data->sg->length); } host->data->bytes_xfered += data->sg->length; @@ -393,7 +393,7 @@ static void goldfish_mmc_prepare_data(struct goldfish_mmc_host *host, * We don't really have DMA, so we need to copy to our * platform driver buffer */ - sg_copy_from_buffer(data->sg, 1, host->virt_base, + sg_copy_to_buffer(data->sg, 1, host->virt_base, data->sg->length); } } From 26caddf274cf1e89fd4ce44ab2b8dbc7a7f97681 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Aug 2018 15:05:55 +0300 Subject: [PATCH 018/242] mmc: block: Fix unsupported parallel dispatch of requests The mmc block driver does not support parallel dispatch of requests. In normal circumstances, all requests are anyway funneled through a single work item, so parallel dispatch never happens. However it can happen if there is no elevator. Fix that by detecting if a dispatch is in progress and returning busy (BLK_STS_RESOURCE) in that case Fixes: 81196976ed94 ("mmc: block: Add blk-mq support") Cc: stable@vger.kernel.org # v4.16+ Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/core/queue.c | 12 +++++++----- drivers/mmc/core/queue.h | 1 + 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 648eb6743ed5..6edffeed9953 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -238,10 +238,6 @@ static void mmc_mq_exit_request(struct blk_mq_tag_set *set, struct request *req, mmc_exit_request(mq->queue, req); } -/* - * We use BLK_MQ_F_BLOCKING and have only 1 hardware queue, which means requests - * will not be dispatched in parallel. - */ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -264,7 +260,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, spin_lock_irq(q->queue_lock); - if (mq->recovery_needed) { + if (mq->recovery_needed || mq->busy) { spin_unlock_irq(q->queue_lock); return BLK_STS_RESOURCE; } @@ -291,6 +287,9 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, break; } + /* Parallel dispatch of requests is not supported at the moment */ + mq->busy = true; + mq->in_flight[issue_type] += 1; get_card = (mmc_tot_in_flight(mq) == 1); cqe_retune_ok = (mmc_cqe_qcnt(mq) == 1); @@ -333,9 +332,12 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, mq->in_flight[issue_type] -= 1; if (mmc_tot_in_flight(mq) == 0) put_card = true; + mq->busy = false; spin_unlock_irq(q->queue_lock); if (put_card) mmc_put_card(card, &mq->ctx); + } else { + WRITE_ONCE(mq->busy, false); } return ret; diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index 17e59d50b496..9bf3c9245075 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -81,6 +81,7 @@ struct mmc_queue { unsigned int cqe_busy; #define MMC_CQE_DCMD_BUSY BIT(0) #define MMC_CQE_QUEUE_FULL BIT(1) + bool busy; bool use_cqe; bool recovery_needed; bool in_recovery; From 4f3530f4a41d49c41015020cd9a5ed5c95b5d2db Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 8 Aug 2018 01:07:06 -0700 Subject: [PATCH 019/242] bus: ti-sysc: Fix no_console_suspend handling If no_console_suspend is set, we should keep console enabled during suspend. Lets fix this by only producing a warning if we can't idle hardware during suspend. Fixes: ef55f8215a78 ("bus: ti-sysc: Improve suspend and resume handling") Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index b31bf03ea497..4576a1268e0e 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -1175,10 +1175,10 @@ static int sysc_child_suspend_noirq(struct device *dev) if (!pm_runtime_status_suspended(dev)) { error = pm_generic_runtime_suspend(dev); if (error) { - dev_err(dev, "%s error at %i: %i\n", - __func__, __LINE__, error); + dev_warn(dev, "%s busy at %i: %i\n", + __func__, __LINE__, error); - return error; + return 0; } error = sysc_runtime_suspend(ddata->dev); From f4efa74c09a7eddcc12cd13208f78743763f6e7a Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 1 Aug 2018 22:28:21 +0200 Subject: [PATCH 020/242] ARM: dts: omap4-droid4: fix vibrations on Droid 4 Vibration GPIOs don't have anything to do with wakeup. Move it to normal section; this fixes vibrations on Droid 4. Fixes: a5effd968301 ("ARM: dts: omap4-droid4: Add vibrator") Signed-off-by: Pavel Machek Reviewed-by: Sebastian Reichel Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap4-droid4-xt894.dts | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts index e7c3c563ff8f..edc97f89fae4 100644 --- a/arch/arm/boot/dts/omap4-droid4-xt894.dts +++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts @@ -618,15 +618,6 @@ OMAP4_IOPAD(0x10c, PIN_INPUT | MUX_MODE1) /* abe_mcbsp3_fsx */ >; }; -}; - -&omap4_pmx_wkup { - usb_gpio_mux_sel2: pinmux_usb_gpio_mux_sel2_pins { - /* gpio_wk0 */ - pinctrl-single,pins = < - OMAP4_IOPAD(0x040, PIN_OUTPUT_PULLDOWN | MUX_MODE3) - >; - }; vibrator_direction_pin: pinmux_vibrator_direction_pin { pinctrl-single,pins = < @@ -641,6 +632,15 @@ }; }; +&omap4_pmx_wkup { + usb_gpio_mux_sel2: pinmux_usb_gpio_mux_sel2_pins { + /* gpio_wk0 */ + pinctrl-single,pins = < + OMAP4_IOPAD(0x040, PIN_OUTPUT_PULLDOWN | MUX_MODE3) + >; + }; +}; + /* * As uart1 is wired to mdm6600 with rts and cts, we can use the cts pin for * uart1 wakeirq. From ce32d59ee2cd036f6e8a6ed17a06a0b0bec5c67c Mon Sep 17 00:00:00 2001 From: Keerthy Date: Wed, 25 Jul 2018 11:25:35 +0530 Subject: [PATCH 021/242] arm: dts: am4372: setup rtc as system-power-controller RTC alarm2 is connected to pmic_en line and hence can be used to control the pmic enabling/disabling. Hence add the system-power-controller for rtc node. Signed-off-by: Keerthy Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am4372.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index f0cbd86312dc..d4b7c59eec68 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -469,6 +469,7 @@ ti,hwmods = "rtc"; clocks = <&clk_32768_ck>; clock-names = "int-clk"; + system-power-controller; status = "disabled"; }; From 64d9d13828c6c8e188bba63794eee923df3d69a9 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 31 Jul 2018 01:37:30 +0000 Subject: [PATCH 022/242] fs/quota: Replace XQM_MAXQUOTAS usage with MAXQUOTAS XQM_MAXQUOTAS and MAXQUOTAS are, it appears, equivalent. Replace all usage of XQM_MAXQUOTAS and remove it along with the unused XQM_*QUOTA definitions. Signed-off-by: Jeremy Cline Signed-off-by: Jan Kara --- fs/quota/quota.c | 12 +++++------- include/linux/quota.h | 8 +------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 860bfbe7a07a..d403392d8a0f 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -120,8 +120,6 @@ static int quota_getinfo(struct super_block *sb, int type, void __user *addr) struct if_dqinfo uinfo; int ret; - /* This checks whether qc_state has enough entries... */ - BUILD_BUG_ON(MAXQUOTAS > XQM_MAXQUOTAS); if (!sb->s_qcop->get_state) return -ENOSYS; ret = sb->s_qcop->get_state(sb, &state); @@ -354,10 +352,10 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs) * GETXSTATE quotactl has space for just one set of time limits so * report them for the first enabled quota type */ - for (type = 0; type < XQM_MAXQUOTAS; type++) + for (type = 0; type < MAXQUOTAS; type++) if (state.s_state[type].flags & QCI_ACCT_ENABLED) break; - BUG_ON(type == XQM_MAXQUOTAS); + BUG_ON(type == MAXQUOTAS); fqs->qs_btimelimit = state.s_state[type].spc_timelimit; fqs->qs_itimelimit = state.s_state[type].ino_timelimit; fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit; @@ -427,10 +425,10 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs) * GETXSTATV quotactl has space for just one set of time limits so * report them for the first enabled quota type */ - for (type = 0; type < XQM_MAXQUOTAS; type++) + for (type = 0; type < MAXQUOTAS; type++) if (state.s_state[type].flags & QCI_ACCT_ENABLED) break; - BUG_ON(type == XQM_MAXQUOTAS); + BUG_ON(type == MAXQUOTAS); fqs->qs_btimelimit = state.s_state[type].spc_timelimit; fqs->qs_itimelimit = state.s_state[type].ino_timelimit; fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit; @@ -701,7 +699,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, { int ret; - if (type >= (XQM_COMMAND(cmd) ? XQM_MAXQUOTAS : MAXQUOTAS)) + if (type >= MAXQUOTAS) return -EINVAL; /* * Quota not supported on this fs? Check this before s_quota_types diff --git a/include/linux/quota.h b/include/linux/quota.h index ca9772c8e48b..f32dd270b8e3 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -408,13 +408,7 @@ struct qc_type_state { struct qc_state { unsigned int s_incoredqs; /* Number of dquots in core */ - /* - * Per quota type information. The array should really have - * max(MAXQUOTAS, XQM_MAXQUOTAS) entries. BUILD_BUG_ON in - * quota_getinfo() makes sure XQM_MAXQUOTAS is large enough. Once VFS - * supports project quotas, this can be changed to MAXQUOTAS - */ - struct qc_type_state s_state[XQM_MAXQUOTAS]; + struct qc_type_state s_state[MAXQUOTAS]; /* Per quota type information */ }; /* Structure for communicating via ->set_info */ From 7b6924d94a60c6b8c1279ca003e8744e6cd9e8b1 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 31 Jul 2018 01:37:31 +0000 Subject: [PATCH 023/242] fs/quota: Fix spectre gadget in do_quotactl 'type' is user-controlled, so sanitize it after the bounds check to avoid using it in speculative execution. This covers the following potential gadgets detected with the help of smatch: * fs/ext4/super.c:5741 ext4_quota_read() warn: potential spectre issue 'sb_dqopt(sb)->files' [r] * fs/ext4/super.c:5778 ext4_quota_write() warn: potential spectre issue 'sb_dqopt(sb)->files' [r] * fs/f2fs/super.c:1552 f2fs_quota_read() warn: potential spectre issue 'sb_dqopt(sb)->files' [r] * fs/f2fs/super.c:1608 f2fs_quota_write() warn: potential spectre issue 'sb_dqopt(sb)->files' [r] * fs/quota/dquot.c:412 mark_info_dirty() warn: potential spectre issue 'sb_dqopt(sb)->info' [w] * fs/quota/dquot.c:933 dqinit_needed() warn: potential spectre issue 'dquots' [r] * fs/quota/dquot.c:2112 dquot_commit_info() warn: potential spectre issue 'dqopt->ops' [r] * fs/quota/dquot.c:2362 vfs_load_quota_inode() warn: potential spectre issue 'dqopt->files' [w] (local cap) * fs/quota/dquot.c:2369 vfs_load_quota_inode() warn: potential spectre issue 'dqopt->ops' [w] (local cap) * fs/quota/dquot.c:2370 vfs_load_quota_inode() warn: potential spectre issue 'dqopt->info' [w] (local cap) * fs/quota/quota.c:110 quota_getfmt() warn: potential spectre issue 'sb_dqopt(sb)->info' [r] * fs/quota/quota_v2.c:84 v2_check_quota_file() warn: potential spectre issue 'quota_magics' [w] * fs/quota/quota_v2.c:85 v2_check_quota_file() warn: potential spectre issue 'quota_versions' [w] * fs/quota/quota_v2.c:96 v2_read_file_info() warn: potential spectre issue 'dqopt->info' [r] * fs/quota/quota_v2.c:172 v2_write_file_info() warn: potential spectre issue 'dqopt->info' [r] Additionally, a quick inspection indicates there are array accesses with 'type' in quota_on() and quota_off() functions which are also addressed by this. Cc: Josh Poimboeuf Cc: stable@vger.kernel.org Signed-off-by: Jeremy Cline Signed-off-by: Jan Kara --- fs/quota/quota.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/quota/quota.c b/fs/quota/quota.c index d403392d8a0f..f0cbf58ad4da 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -18,6 +18,7 @@ #include #include #include +#include static int check_quotactl_permission(struct super_block *sb, int type, int cmd, qid_t id) @@ -701,6 +702,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, if (type >= MAXQUOTAS) return -EINVAL; + type = array_index_nospec(type, MAXQUOTAS); /* * Quota not supported on this fs? Check this before s_quota_types * since they needn't be set if quota is not supported at all. From 8604ffcbf04f8f4f3f55a9e46e5ff948b2ed4290 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 16 Aug 2018 12:01:03 +0200 Subject: [PATCH 024/242] drm/amdgpu: fix VM clearing for the root PD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to figure out the address after validating the BO, not before. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Reviewed-by: Junwei Zhang Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ece0ac703e27..e40ca8676418 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -369,7 +369,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, uint64_t addr; int r; - addr = amdgpu_bo_gpu_offset(bo); entries = amdgpu_bo_size(bo) / 8; if (pte_support_ats) { @@ -401,6 +400,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (r) goto error; + addr = amdgpu_bo_gpu_offset(bo); if (ats_entries) { uint64_t ats_value; From d98ff24e8e9be3329eea7c84d5e244d0c1cd0ab3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 21 Aug 2018 15:09:39 +0200 Subject: [PATCH 025/242] drm/amdgpu: fix preamble handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At this point the command submission can still be interrupted. Signed-off-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 502b94fb116a..09703c87d676 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1012,13 +1012,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, if (r) return r; - if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) { - parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; - if (!parser->ctx->preamble_presented) { - parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; - parser->ctx->preamble_presented = true; - } - } + if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) + parser->job->preamble_status |= + AMDGPU_PREAMBLE_IB_PRESENT; if (parser->ring && parser->ring != ring) return -EINVAL; @@ -1241,6 +1237,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_cs_post_dependencies(p); + if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && + !p->ctx->preamble_presented) { + job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; + p->ctx->preamble_presented = true; + } + cs->out.handle = seq; job->uf_sequence = seq; From 2f40c6eac74a2a60921cdec9e9a8a57e88e31434 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Wed, 22 Aug 2018 20:18:25 +0800 Subject: [PATCH 026/242] amdgpu: fix multi-process hang issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SWDEV-146499: hang during multi vulkan process testing cause: the second frame's PREAMBLE_IB have clear-state and LOAD actions, those actions ruin the pipeline that is still doing process in the previous frame's work-load IB. fix: need insert pipeline sync if have context switch for SRIOV (because only SRIOV will report PREEMPTION flag to UMD) Signed-off-by: Monk Liu Signed-off-by: Emily Deng Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 5518e623fed2..51b5e977ca88 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -164,8 +164,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, return r; } + need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) || + (amdgpu_sriov_vf(adev) && need_ctx_switch) || amdgpu_vm_need_pipeline_sync(ring, job))) { need_pipe_sync = true; dma_fence_put(tmp); @@ -196,7 +198,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } skip_preamble = ring->current_ctx == fence_ctx; - need_ctx_switch = ring->current_ctx != fence_ctx; if (job && ring->funcs->emit_cntxcntl) { if (need_ctx_switch) status |= AMDGPU_HAVE_CTX_SWITCH; From eb7e5cfced102e61814f6f3e4cb4acb9f9315760 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Wed, 22 Aug 2018 10:07:35 -0400 Subject: [PATCH 027/242] drm/amdgpu: Fix page fault and kasan warning on pci device remove. Problem: When executing echo 1 > /sys/class/drm/card0/device/remove kasan warning as bellow and page fault happen because adev->gart.pages already freed by the time amdgpu_gart_unbind is called. BUG: KASAN: user-memory-access in amdgpu_gart_unbind+0x98/0x180 [amdgpu] Write of size 8 at addr 0000000000003648 by task bash/1828 CPU: 2 PID: 1828 Comm: bash Tainted: G W O 4.18.0-rc1-dev+ #29 Hardware name: Gigabyte Technology Co., Ltd. AX370-Gaming/AX370-Gaming-CF, BIOS F3 06/19/2017 Call Trace: dump_stack+0x71/0xab kasan_report+0x109/0x390 amdgpu_gart_unbind+0x98/0x180 [amdgpu] ttm_tt_unbind+0x43/0x60 [ttm] ttm_bo_move_ttm+0x83/0x1c0 [ttm] ttm_bo_handle_move_mem+0xb97/0xd00 [ttm] ttm_bo_evict+0x273/0x530 [ttm] ttm_mem_evict_first+0x29c/0x360 [ttm] ttm_bo_force_list_clean+0xfc/0x210 [ttm] ttm_bo_clean_mm+0xe7/0x160 [ttm] amdgpu_ttm_fini+0xda/0x1d0 [amdgpu] amdgpu_bo_fini+0xf/0x60 [amdgpu] gmc_v8_0_sw_fini+0x36/0x70 [amdgpu] amdgpu_device_fini+0x2d0/0x7d0 [amdgpu] amdgpu_driver_unload_kms+0x6a/0xd0 [amdgpu] drm_dev_unregister+0x79/0x180 [drm] amdgpu_pci_remove+0x2a/0x60 [amdgpu] pci_device_remove+0x5b/0x100 device_release_driver_internal+0x236/0x360 pci_stop_bus_device+0xbf/0xf0 pci_stop_and_remove_bus_device_locked+0x16/0x30 remove_store+0xda/0xf0 kernfs_fop_write+0x186/0x220 __vfs_write+0xcc/0x330 vfs_write+0xe6/0x250 ksys_write+0xb1/0x140 do_syscall_64+0x77/0x1e0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f66ebbb32c0 Fix: Split gmc_v{6,7,8,9}_0_gart_fini to postpone amdgpu_gart_fini to after memory managers are shut down since gart unbind happens as part of this procedure Signed-off-by: Andrey Grodzovsky Reviewed-by: Junwei Zhang Acked-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 9 ++------- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 16 ++-------------- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 16 ++-------------- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++-------------- 4 files changed, 8 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 75317f283c69..ad151fefa41f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -632,12 +632,6 @@ static void gmc_v6_0_gart_disable(struct amdgpu_device *adev) amdgpu_gart_table_vram_unpin(adev); } -static void gmc_v6_0_gart_fini(struct amdgpu_device *adev) -{ - amdgpu_gart_table_vram_free(adev); - amdgpu_gart_fini(adev); -} - static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev, u32 status, u32 addr, u32 mc_client) { @@ -935,8 +929,9 @@ static int gmc_v6_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); - gmc_v6_0_gart_fini(adev); + amdgpu_gart_table_vram_free(adev); amdgpu_bo_fini(adev); + amdgpu_gart_fini(adev); release_firmware(adev->gmc.fw); adev->gmc.fw = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 36dc367c4b45..f8d8a3a73e42 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -746,19 +746,6 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev) amdgpu_gart_table_vram_unpin(adev); } -/** - * gmc_v7_0_gart_fini - vm fini callback - * - * @adev: amdgpu_device pointer - * - * Tears down the driver GART/VM setup (CIK). - */ -static void gmc_v7_0_gart_fini(struct amdgpu_device *adev) -{ - amdgpu_gart_table_vram_free(adev); - amdgpu_gart_fini(adev); -} - /** * gmc_v7_0_vm_decode_fault - print human readable fault info * @@ -1095,8 +1082,9 @@ static int gmc_v7_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); kfree(adev->gmc.vm_fault_info); - gmc_v7_0_gart_fini(adev); + amdgpu_gart_table_vram_free(adev); amdgpu_bo_fini(adev); + amdgpu_gart_fini(adev); release_firmware(adev->gmc.fw); adev->gmc.fw = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 70fc97b59b4f..9333109b210d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -968,19 +968,6 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) amdgpu_gart_table_vram_unpin(adev); } -/** - * gmc_v8_0_gart_fini - vm fini callback - * - * @adev: amdgpu_device pointer - * - * Tears down the driver GART/VM setup (CIK). - */ -static void gmc_v8_0_gart_fini(struct amdgpu_device *adev) -{ - amdgpu_gart_table_vram_free(adev); - amdgpu_gart_fini(adev); -} - /** * gmc_v8_0_vm_decode_fault - print human readable fault info * @@ -1199,8 +1186,9 @@ static int gmc_v8_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); kfree(adev->gmc.vm_fault_info); - gmc_v8_0_gart_fini(adev); + amdgpu_gart_table_vram_free(adev); amdgpu_bo_fini(adev); + amdgpu_gart_fini(adev); release_firmware(adev->gmc.fw); adev->gmc.fw = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 399a5db27649..72f8018fa2a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -942,26 +942,12 @@ static int gmc_v9_0_sw_init(void *handle) return 0; } -/** - * gmc_v9_0_gart_fini - vm fini callback - * - * @adev: amdgpu_device pointer - * - * Tears down the driver GART/VM setup (CIK). - */ -static void gmc_v9_0_gart_fini(struct amdgpu_device *adev) -{ - amdgpu_gart_table_vram_free(adev); - amdgpu_gart_fini(adev); -} - static int gmc_v9_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); - gmc_v9_0_gart_fini(adev); /* * TODO: @@ -974,7 +960,9 @@ static int gmc_v9_0_sw_fini(void *handle) */ amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); + amdgpu_gart_table_vram_free(adev); amdgpu_bo_fini(adev); + amdgpu_gart_fini(adev); return 0; } From 9faf870e559a710c44e747ba20383ea82d8ac5d2 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 22 Aug 2018 21:28:01 +0300 Subject: [PATCH 028/242] mmc: renesas_sdhi_internal_dmac: fix #define RST_RESERVED_BITS The DM_CM_RST register actually has bits 0-31 defaulting to 1s and bits 32-63 defaulting to 0s -- fix off-by-one in #define RST_RESERVED_BITS. Signed-off-by: Sergei Shtylyov Reviewed-by: Wolfram Sang Fixes: 2a68ea7896e3 ("mmc: renesas-sdhi: add support for R-Car Gen3 SDHI DMAC") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_internal_dmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index 35cc0de6be67..f16677f424b9 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -45,7 +45,7 @@ /* DM_CM_RST */ #define RST_DTRANRST1 BIT(9) #define RST_DTRANRST0 BIT(8) -#define RST_RESERVED_BITS GENMASK_ULL(32, 0) +#define RST_RESERVED_BITS GENMASK_ULL(31, 0) /* DM_CM_INFO1 and DM_CM_INFO1_MASK */ #define INFO1_CLEAR 0 From d2332f887ddfba50fee93b8e1736376517c2df0c Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 22 Aug 2018 21:22:26 +0300 Subject: [PATCH 029/242] mmc: renesas_sdhi_internal_dmac: mask DMAC interrupts I have encountered an interrupt storm during the eMMC chip probing (and the chip finally didn't get detected). It turned out that U-Boot left the SDHI DMA interrupts enabled while the Linux driver didn't use those. Masking those interrupts in renesas_sdhi_internal_dmac_request_dma() gets rid of both issues... Signed-off-by: Sergei Shtylyov Reviewed-by: Wolfram Sang Fixes: 2a68ea7896e3 ("mmc: renesas-sdhi: add support for R-Car Gen3 SDHI DMAC") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_internal_dmac.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index f16677f424b9..ca0b43973769 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -49,10 +49,12 @@ /* DM_CM_INFO1 and DM_CM_INFO1_MASK */ #define INFO1_CLEAR 0 +#define INFO1_MASK_CLEAR GENMASK_ULL(31, 0) #define INFO1_DTRANEND1 BIT(17) #define INFO1_DTRANEND0 BIT(16) /* DM_CM_INFO2 and DM_CM_INFO2_MASK */ +#define INFO2_MASK_CLEAR GENMASK_ULL(31, 0) #define INFO2_DTRANERR1 BIT(17) #define INFO2_DTRANERR0 BIT(16) @@ -252,6 +254,12 @@ renesas_sdhi_internal_dmac_request_dma(struct tmio_mmc_host *host, { struct renesas_sdhi *priv = host_to_priv(host); + /* Disable DMAC interrupts, we don't use them */ + renesas_sdhi_internal_dmac_dm_write(host, DM_CM_INFO1_MASK, + INFO1_MASK_CLEAR); + renesas_sdhi_internal_dmac_dm_write(host, DM_CM_INFO2_MASK, + INFO2_MASK_CLEAR); + /* Each value is set to non-zero to assume "enabling" each DMA */ host->chan_rx = host->chan_tx = (void *)0xdeadbeaf; From 82c82ab658655befcb6aa47cbdb98dadce1a0cfe Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 21 Aug 2018 12:00:08 +0200 Subject: [PATCH 030/242] udf: Remove dead code from udf_find_fileset() Remove dead code and slightly simplify code in udf_find_fileset(). Signed-off-by: Jan Kara --- fs/udf/super.c | 62 +------------------------------------------------- 1 file changed, 1 insertion(+), 61 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index 3040dc2a32f6..68d57b61f3af 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -764,9 +764,7 @@ static int udf_find_fileset(struct super_block *sb, struct kernel_lb_addr *root) { struct buffer_head *bh = NULL; - long lastblock; uint16_t ident; - struct udf_sb_info *sbi; if (fileset->logicalBlockNum != 0xFFFFFFFF || fileset->partitionReferenceNum != 0xFFFF) { @@ -779,69 +777,11 @@ static int udf_find_fileset(struct super_block *sb, return 1; } - } - - sbi = UDF_SB(sb); - if (!bh) { - /* Search backwards through the partitions */ - struct kernel_lb_addr newfileset; - -/* --> cvg: FIXME - is it reasonable? */ - return 1; - - for (newfileset.partitionReferenceNum = sbi->s_partitions - 1; - (newfileset.partitionReferenceNum != 0xFFFF && - fileset->logicalBlockNum == 0xFFFFFFFF && - fileset->partitionReferenceNum == 0xFFFF); - newfileset.partitionReferenceNum--) { - lastblock = sbi->s_partmaps - [newfileset.partitionReferenceNum] - .s_partition_len; - newfileset.logicalBlockNum = 0; - - do { - bh = udf_read_ptagged(sb, &newfileset, 0, - &ident); - if (!bh) { - newfileset.logicalBlockNum++; - continue; - } - - switch (ident) { - case TAG_IDENT_SBD: - { - struct spaceBitmapDesc *sp; - sp = (struct spaceBitmapDesc *) - bh->b_data; - newfileset.logicalBlockNum += 1 + - ((le32_to_cpu(sp->numOfBytes) + - sizeof(struct spaceBitmapDesc) - - 1) >> sb->s_blocksize_bits); - brelse(bh); - break; - } - case TAG_IDENT_FSD: - *fileset = newfileset; - break; - default: - newfileset.logicalBlockNum++; - brelse(bh); - bh = NULL; - break; - } - } while (newfileset.logicalBlockNum < lastblock && - fileset->logicalBlockNum == 0xFFFFFFFF && - fileset->partitionReferenceNum == 0xFFFF); - } - } - - if ((fileset->logicalBlockNum != 0xFFFFFFFF || - fileset->partitionReferenceNum != 0xFFFF) && bh) { udf_debug("Fileset at block=%u, partition=%u\n", fileset->logicalBlockNum, fileset->partitionReferenceNum); - sbi->s_partition = fileset->partitionReferenceNum; + UDF_SB(sb)->s_partition = fileset->partitionReferenceNum; udf_load_fileset(sb, bh, root); brelse(bh); return 0; From ee4af50ca94f58afc3532662779b9cf80bbe27c8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 21 Aug 2018 14:52:34 +0200 Subject: [PATCH 031/242] udf: Fix mounting of Win7 created UDF filesystems Win7 is creating UDF filesystems with single partition with number 8192. Current partition descriptor scanning code does not handle this well as it incorrectly assumes that partition numbers will form mostly contiguous space of small numbers. This results in unmountable media due to errors like: UDF-fs: error (device dm-1): udf_read_tagged: tag version 0x0000 != 0x0002 || 0x0003, block 0 UDF-fs: warning (device dm-1): udf_fill_super: No fileset found Fix the problem by handling partition descriptors in a way that sparse partition numbering does not matter. Reported-and-tested-by: jean-luc malet CC: stable@vger.kernel.org Fixes: 7b78fd02fb19530fd101ae137a1f46aa466d9bb6 Signed-off-by: Jan Kara --- fs/udf/super.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index 68d57b61f3af..6f515651a2c2 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1510,10 +1510,16 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ */ #define PART_DESC_ALLOC_STEP 32 +struct part_desc_seq_scan_data { + struct udf_vds_record rec; + u32 partnum; +}; + struct desc_seq_scan_data { struct udf_vds_record vds[VDS_POS_LENGTH]; unsigned int size_part_descs; - struct udf_vds_record *part_descs_loc; + unsigned int num_part_descs; + struct part_desc_seq_scan_data *part_descs_loc; }; static struct udf_vds_record *handle_partition_descriptor( @@ -1522,10 +1528,14 @@ static struct udf_vds_record *handle_partition_descriptor( { struct partitionDesc *desc = (struct partitionDesc *)bh->b_data; int partnum; + int i; partnum = le16_to_cpu(desc->partitionNumber); - if (partnum >= data->size_part_descs) { - struct udf_vds_record *new_loc; + for (i = 0; i < data->num_part_descs; i++) + if (partnum == data->part_descs_loc[i].partnum) + return &(data->part_descs_loc[i].rec); + if (data->num_part_descs >= data->size_part_descs) { + struct part_desc_seq_scan_data *new_loc; unsigned int new_size = ALIGN(partnum, PART_DESC_ALLOC_STEP); new_loc = kcalloc(new_size, sizeof(*new_loc), GFP_KERNEL); @@ -1537,7 +1547,7 @@ static struct udf_vds_record *handle_partition_descriptor( data->part_descs_loc = new_loc; data->size_part_descs = new_size; } - return &(data->part_descs_loc[partnum]); + return &(data->part_descs_loc[data->num_part_descs++].rec); } @@ -1587,6 +1597,7 @@ static noinline int udf_process_sequence( memset(data.vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH); data.size_part_descs = PART_DESC_ALLOC_STEP; + data.num_part_descs = 0; data.part_descs_loc = kcalloc(data.size_part_descs, sizeof(*data.part_descs_loc), GFP_KERNEL); @@ -1598,7 +1609,6 @@ static noinline int udf_process_sequence( * are in it. */ for (; (!done && block <= lastblock); block++) { - bh = udf_read_tagged(sb, block, block, &ident); if (!bh) break; @@ -1670,13 +1680,10 @@ static noinline int udf_process_sequence( } /* Now handle prevailing Partition Descriptors */ - for (i = 0; i < data.size_part_descs; i++) { - if (data.part_descs_loc[i].block) { - ret = udf_load_partdesc(sb, - data.part_descs_loc[i].block); - if (ret < 0) - return ret; - } + for (i = 0; i < data.num_part_descs; i++) { + ret = udf_load_partdesc(sb, data.part_descs_loc[i].rec.block); + if (ret < 0) + return ret; } return 0; From 5e2e2f9f76e157063a656351728703cb02b068f1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 Aug 2018 16:59:25 +0300 Subject: [PATCH 032/242] PM / clk: signedness bug in of_pm_clk_add_clks() "count" needs to be signed for the error handling to work. I made "i" signed as well so they match. Fixes: 02113ba93ea4 (PM / clk: Add support for obtaining clocks from device-tree) Cc: 4.6+ # 4.6+ Signed-off-by: Dan Carpenter Signed-off-by: Rafael J. Wysocki --- drivers/base/power/clock_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index 8e2e4757adcb..5a42ae4078c2 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -185,7 +185,7 @@ EXPORT_SYMBOL_GPL(of_pm_clk_add_clk); int of_pm_clk_add_clks(struct device *dev) { struct clk **clks; - unsigned int i, count; + int i, count; int ret; if (!dev || !dev->of_node) From a296b16270ab8d3b1c2a41ca1dd4d2fc34b598d9 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 16 Aug 2018 11:36:38 +0800 Subject: [PATCH 033/242] drm/amd/display: Fix bug use wrong pp interface Used wrong pp interface, the original interface is exposed by dpm on SI and paritial CI. Pointed out by Francis David v2: dal only need to set min_dcefclk and min_fclk to smu. so use display_clock_voltage_request interface, instand of update all display configuration. Acked-by: Alex Deucher Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c index fbe878ae1e8c..4ba0003a9d32 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c @@ -480,12 +480,20 @@ void pp_rv_set_display_requirement(struct pp_smu *pp, { struct dc_context *ctx = pp->ctx; struct amdgpu_device *adev = ctx->driver_context; + void *pp_handle = adev->powerplay.pp_handle; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + struct pp_display_clock_request clock = {0}; - if (!pp_funcs || !pp_funcs->display_configuration_changed) + if (!pp_funcs || !pp_funcs->display_clock_voltage_request) return; - amdgpu_dpm_display_configuration_changed(adev); + clock.clock_type = amd_pp_dcf_clock; + clock.clock_freq_in_khz = req->hard_min_dcefclk_khz; + pp_funcs->display_clock_voltage_request(pp_handle, &clock); + + clock.clock_type = amd_pp_f_clock; + clock.clock_freq_in_khz = req->hard_min_fclk_khz; + pp_funcs->display_clock_voltage_request(pp_handle, &clock); } void pp_rv_set_wm_ranges(struct pp_smu *pp, From 757ab15c3f4968b5a29caf3fe8b67660ce84c3cd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 21 Aug 2018 10:44:10 +0200 Subject: [PATCH 034/242] cpuidle: menu: Retain tick when shallow state is selected The case addressed by commit 5ef499cd571c (cpuidle: menu: Handle stopped tick more aggressively) in the stopped tick case is present when the tick has not been stopped yet too. Namely, if only two CPU idle states, shallow state A with target residency significantly below the tick boundary and deep state B with target residency significantly above it, are available and the predicted idle duration is above the tick boundary, but below the target residency of state B, state A will be selected and the CPU may spend indefinite amount of time in it, which is not quite energy-efficient. However, if the tick has not been stopped yet and the governor is about to select a shallow idle state for the CPU even though the idle duration predicted by it is above the tick boundary, it should be fine to wake up the CPU early, so the tick can be retained then and the governor will have a chance to select a deeper state when it runs next time. [Note that when this really happens, it will make the idle duration predictor believe that the CPU might be idle longer than predicted, which will make it more likely to predict longer idle durations going forward, but that will also cause deeper idle states to be selected going forward, on average, which is what's needed here.] Fixes: 87c9fe6ee495 (cpuidle: menu: Avoid selecting shallow states with stopped tick) Reported-by: Leo Yan Cc: 4.17+ # 4.17+: 5ef499cd571c (cpuidle: menu: Handle ...) Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/menu.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 110483f0e3fb..e26a40971b26 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -379,9 +379,20 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (idx == -1) idx = i; /* first enabled state */ if (s->target_residency > data->predicted_us) { - if (!tick_nohz_tick_stopped()) + if (data->predicted_us < TICK_USEC) break; + if (!tick_nohz_tick_stopped()) { + /* + * If the state selected so far is shallow, + * waking up early won't hurt, so retain the + * tick in that case and let the governor run + * again in the next iteration of the loop. + */ + expected_interval = drv->states[idx].target_residency; + break; + } + /* * If the state selected so far is shallow and this * state's target residency matches the time till the From cc98963dbaaea93d17608641b8d6942a5327fc31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Mon, 6 Aug 2018 15:29:09 +0300 Subject: [PATCH 035/242] crypto: caam/jr - fix descriptor DMA unmapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Descriptor address needs to be swapped to CPU endianness before being DMA unmapped. Cc: # 4.8+ Fixes: 261ea058f016 ("crypto: caam - handle core endianness != caam endianness") Reported-by: Laurentiu Tudor Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/jr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index f4f258075b89..acdd72016ffe 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -190,7 +190,8 @@ static void caam_jr_dequeue(unsigned long devarg) BUG_ON(CIRC_CNT(head, tail + i, JOBR_DEPTH) <= 0); /* Unmap just-run descriptor so we can post-process */ - dma_unmap_single(dev, jrp->outring[hw_idx].desc, + dma_unmap_single(dev, + caam_dma_to_cpu(jrp->outring[hw_idx].desc), jrp->entinfo[sw_idx].desc_size, DMA_TO_DEVICE); From ad876a18048f43b1f66f5d474b7598538668c5de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Mon, 6 Aug 2018 15:29:39 +0300 Subject: [PATCH 036/242] crypto: caam/qi - fix error path in xts setkey MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xts setkey callback returns 0 on some error paths. Fix this by returning -EINVAL. Cc: # 4.12+ Fixes: b189817cf789 ("crypto: caam/qi - add ablkcipher and authenc algorithms") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg_qi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c index 6e61cc93c2b0..d7aa7d7ff102 100644 --- a/drivers/crypto/caam/caamalg_qi.c +++ b/drivers/crypto/caam/caamalg_qi.c @@ -679,10 +679,8 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, int ret = 0; if (keylen != 2 * AES_MIN_KEY_SIZE && keylen != 2 * AES_MAX_KEY_SIZE) { - crypto_ablkcipher_set_flags(ablkcipher, - CRYPTO_TFM_RES_BAD_KEY_LEN); dev_err(jrdev, "key size mismatch\n"); - return -EINVAL; + goto badkey; } ctx->cdata.keylen = keylen; @@ -715,7 +713,7 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, return ret; badkey: crypto_ablkcipher_set_flags(ablkcipher, CRYPTO_TFM_RES_BAD_KEY_LEN); - return 0; + return -EINVAL; } /* From f1bf9e60a0779ec97de9ecdc353e1d01cdd73f43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Mon, 6 Aug 2018 15:29:55 +0300 Subject: [PATCH 037/242] crypto: caam - fix DMA mapping direction for RSA forms 2 & 3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Crypto engine needs some temporary locations in external memory for running RSA decrypt forms 2 and 3 (CRT). These are named "tmp1" and "tmp2" in the PDB. Update DMA mapping direction of tmp1 and tmp2 from TO_DEVICE to BIDIRECTIONAL, since engine needs r/w access. Cc: # 4.13+ Fixes: 52e26d77b8b3 ("crypto: caam - add support for RSA key form 2") Fixes: 4a651b122adb ("crypto: caam - add support for RSA key form 3") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caampkc.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 578ea63a3109..f26d62e5533a 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -71,8 +71,8 @@ static void rsa_priv_f2_unmap(struct device *dev, struct rsa_edesc *edesc, dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE); dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE); dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE); - dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); - dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL); + dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_BIDIRECTIONAL); } static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc, @@ -90,8 +90,8 @@ static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc, dma_unmap_single(dev, pdb->dp_dma, p_sz, DMA_TO_DEVICE); dma_unmap_single(dev, pdb->dq_dma, q_sz, DMA_TO_DEVICE); dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE); - dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); - dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL); + dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_BIDIRECTIONAL); } /* RSA Job Completion handler */ @@ -417,13 +417,13 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req, goto unmap_p; } - pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE); + pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, pdb->tmp1_dma)) { dev_err(dev, "Unable to map RSA tmp1 memory\n"); goto unmap_q; } - pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE); + pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, pdb->tmp2_dma)) { dev_err(dev, "Unable to map RSA tmp2 memory\n"); goto unmap_tmp1; @@ -451,7 +451,7 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req, return 0; unmap_tmp1: - dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL); unmap_q: dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE); unmap_p: @@ -504,13 +504,13 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req, goto unmap_dq; } - pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE); + pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, pdb->tmp1_dma)) { dev_err(dev, "Unable to map RSA tmp1 memory\n"); goto unmap_qinv; } - pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE); + pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, pdb->tmp2_dma)) { dev_err(dev, "Unable to map RSA tmp2 memory\n"); goto unmap_tmp1; @@ -538,7 +538,7 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req, return 0; unmap_tmp1: - dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_BIDIRECTIONAL); unmap_qinv: dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE); unmap_dq: From 7fa885e2a22fd0f91a2c23d9275f5021f618ff5a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 7 Aug 2018 23:18:36 +0200 Subject: [PATCH 038/242] crypto: arm64/sm4-ce - check for the right CPU feature bit ARMv8.2 specifies special instructions for the SM3 cryptographic hash and the SM4 symmetric cipher. While it is unlikely that a core would implement one and not the other, we should only use SM4 instructions if the SM4 CPU feature bit is set, and we currently check the SM3 feature bit instead. So fix that. Fixes: e99ce921c468 ("crypto: arm64 - add support for SM4...") Cc: Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/sm4-ce-glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index b7fb5274b250..0c4fc223f225 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -69,5 +69,5 @@ static void __exit sm4_ce_mod_fini(void) crypto_unregister_alg(&sm4_ce_alg); } -module_cpu_feature_match(SM3, sm4_ce_mod_init); +module_cpu_feature_match(SM4, sm4_ce_mod_init); module_exit(sm4_ce_mod_fini); From 65b2c12dcdb883fc015c0ec65d6c2f857e0456ac Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Fri, 10 Aug 2018 18:27:41 +0530 Subject: [PATCH 039/242] crypto: chtls - fix null dereference chtls_free_uld() call chtls_free_uld() only for the initialized cdev, this fixes NULL dereference in chtls_free_uld() Signed-off-by: Ganesh Goudar Signed-off-by: Atul Gupta Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chtls/chtls.h | 5 +++++ drivers/crypto/chelsio/chtls/chtls_main.c | 7 +++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls.h b/drivers/crypto/chelsio/chtls/chtls.h index a53a0e6ba024..7725b6ee14ef 100644 --- a/drivers/crypto/chelsio/chtls/chtls.h +++ b/drivers/crypto/chelsio/chtls/chtls.h @@ -96,6 +96,10 @@ enum csk_flags { CSK_CONN_INLINE, /* Connection on HW */ }; +enum chtls_cdev_state { + CHTLS_CDEV_STATE_UP = 1 +}; + struct listen_ctx { struct sock *lsk; struct chtls_dev *cdev; @@ -146,6 +150,7 @@ struct chtls_dev { unsigned int send_page_order; int max_host_sndbuf; struct key_map kmap; + unsigned int cdev_state; }; struct chtls_hws { diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c index 9b07f9165658..f59b044ebd25 100644 --- a/drivers/crypto/chelsio/chtls/chtls_main.c +++ b/drivers/crypto/chelsio/chtls/chtls_main.c @@ -160,6 +160,7 @@ static void chtls_register_dev(struct chtls_dev *cdev) tlsdev->hash = chtls_create_hash; tlsdev->unhash = chtls_destroy_hash; tls_register_device(&cdev->tlsdev); + cdev->cdev_state = CHTLS_CDEV_STATE_UP; } static void chtls_unregister_dev(struct chtls_dev *cdev) @@ -281,8 +282,10 @@ static void chtls_free_all_uld(void) struct chtls_dev *cdev, *tmp; mutex_lock(&cdev_mutex); - list_for_each_entry_safe(cdev, tmp, &cdev_list, list) - chtls_free_uld(cdev); + list_for_each_entry_safe(cdev, tmp, &cdev_list, list) { + if (cdev->cdev_state == CHTLS_CDEV_STATE_UP) + chtls_free_uld(cdev); + } mutex_unlock(&cdev_mutex); } From e5b954e8d11fdde55eed35017370a3a0d8837754 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 15 Aug 2018 10:29:42 -0700 Subject: [PATCH 040/242] crypto: aesni - Use unaligned loads from gcm_context_data A regression was reported bisecting to 1476db2d12 "Move HashKey computation from stack to gcm_context". That diff moved HashKey computation from the stack, which was explicitly aligned in the asm, to a struct provided from the C code, depending on AESNI_ALIGN_ATTR for alignment. It appears some compilers may not align this struct correctly, resulting in a crash on the movdqa instruction when attempting to encrypt or decrypt data. Fix by using unaligned loads for the HashKeys. On modern hardware there is no perf difference between the unaligned and aligned loads. All other accesses to gcm_context_data already use unaligned loads. Reported-by: Mauro Rossi Fixes: 1476db2d12 ("Move HashKey computation from stack to gcm_context") Cc: Signed-off-by: Dave Watson Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 66 +++++++++++++++---------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index e762ef417562..d27a50656aa1 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -223,34 +223,34 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff pcmpeqd TWOONE(%rip), \TMP2 pand POLY(%rip), \TMP2 pxor \TMP2, \TMP3 - movdqa \TMP3, HashKey(%arg2) + movdqu \TMP3, HashKey(%arg2) movdqa \TMP3, \TMP5 pshufd $78, \TMP3, \TMP1 pxor \TMP3, \TMP1 - movdqa \TMP1, HashKey_k(%arg2) + movdqu \TMP1, HashKey_k(%arg2) GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 # TMP5 = HashKey^2<<1 (mod poly) - movdqa \TMP5, HashKey_2(%arg2) + movdqu \TMP5, HashKey_2(%arg2) # HashKey_2 = HashKey^2<<1 (mod poly) pshufd $78, \TMP5, \TMP1 pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_2_k(%arg2) + movdqu \TMP1, HashKey_2_k(%arg2) GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 # TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_3(%arg2) + movdqu \TMP5, HashKey_3(%arg2) pshufd $78, \TMP5, \TMP1 pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_3_k(%arg2) + movdqu \TMP1, HashKey_3_k(%arg2) GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 # TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_4(%arg2) + movdqu \TMP5, HashKey_4(%arg2) pshufd $78, \TMP5, \TMP1 pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_4_k(%arg2) + movdqu \TMP1, HashKey_4_k(%arg2) .endm # GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding. @@ -271,7 +271,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, - movdqa HashKey(%arg2), %xmm13 + movdqu HashKey(%arg2), %xmm13 CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \ %xmm4, %xmm5, %xmm6 @@ -997,7 +997,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pshufd $78, \XMM5, \TMP6 pxor \XMM5, \TMP6 paddd ONE(%rip), \XMM0 # INCR CNT - movdqa HashKey_4(%arg2), \TMP5 + movdqu HashKey_4(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 movdqa \XMM0, \XMM1 paddd ONE(%rip), \XMM0 # INCR CNT @@ -1016,7 +1016,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pxor (%arg1), \XMM2 pxor (%arg1), \XMM3 pxor (%arg1), \XMM4 - movdqa HashKey_4_k(%arg2), \TMP5 + movdqu HashKey_4_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) movaps 0x10(%arg1), \TMP1 AESENC \TMP1, \XMM1 # Round 1 @@ -1031,7 +1031,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM6, \TMP1 pshufd $78, \XMM6, \TMP2 pxor \XMM6, \TMP2 - movdqa HashKey_3(%arg2), \TMP5 + movdqu HashKey_3(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 movaps 0x30(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 3 @@ -1044,7 +1044,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_3_k(%arg2), \TMP5 + movdqu HashKey_3_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x50(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 5 @@ -1058,7 +1058,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM7, \TMP1 pshufd $78, \XMM7, \TMP2 pxor \XMM7, \TMP2 - movdqa HashKey_2(%arg2), \TMP5 + movdqu HashKey_2(%arg2), \TMP5 # Multiply TMP5 * HashKey using karatsuba @@ -1074,7 +1074,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_2_k(%arg2), \TMP5 + movdqu HashKey_2_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x80(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 8 @@ -1092,7 +1092,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM8, \TMP1 pshufd $78, \XMM8, \TMP2 pxor \XMM8, \TMP2 - movdqa HashKey(%arg2), \TMP5 + movdqu HashKey(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 movaps 0x90(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 9 @@ -1121,7 +1121,7 @@ aes_loop_par_enc_done\@: AESENCLAST \TMP3, \XMM2 AESENCLAST \TMP3, \XMM3 AESENCLAST \TMP3, \XMM4 - movdqa HashKey_k(%arg2), \TMP5 + movdqu HashKey_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movdqu (%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK @@ -1205,7 +1205,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pshufd $78, \XMM5, \TMP6 pxor \XMM5, \TMP6 paddd ONE(%rip), \XMM0 # INCR CNT - movdqa HashKey_4(%arg2), \TMP5 + movdqu HashKey_4(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 movdqa \XMM0, \XMM1 paddd ONE(%rip), \XMM0 # INCR CNT @@ -1224,7 +1224,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pxor (%arg1), \XMM2 pxor (%arg1), \XMM3 pxor (%arg1), \XMM4 - movdqa HashKey_4_k(%arg2), \TMP5 + movdqu HashKey_4_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) movaps 0x10(%arg1), \TMP1 AESENC \TMP1, \XMM1 # Round 1 @@ -1239,7 +1239,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM6, \TMP1 pshufd $78, \XMM6, \TMP2 pxor \XMM6, \TMP2 - movdqa HashKey_3(%arg2), \TMP5 + movdqu HashKey_3(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 movaps 0x30(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 3 @@ -1252,7 +1252,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_3_k(%arg2), \TMP5 + movdqu HashKey_3_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x50(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 5 @@ -1266,7 +1266,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM7, \TMP1 pshufd $78, \XMM7, \TMP2 pxor \XMM7, \TMP2 - movdqa HashKey_2(%arg2), \TMP5 + movdqu HashKey_2(%arg2), \TMP5 # Multiply TMP5 * HashKey using karatsuba @@ -1282,7 +1282,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_2_k(%arg2), \TMP5 + movdqu HashKey_2_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x80(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 8 @@ -1300,7 +1300,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM8, \TMP1 pshufd $78, \XMM8, \TMP2 pxor \XMM8, \TMP2 - movdqa HashKey(%arg2), \TMP5 + movdqu HashKey(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 movaps 0x90(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 9 @@ -1329,7 +1329,7 @@ aes_loop_par_dec_done\@: AESENCLAST \TMP3, \XMM2 AESENCLAST \TMP3, \XMM3 AESENCLAST \TMP3, \XMM4 - movdqa HashKey_k(%arg2), \TMP5 + movdqu HashKey_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movdqu (%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK @@ -1405,10 +1405,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM1, \TMP6 pshufd $78, \XMM1, \TMP2 pxor \XMM1, \TMP2 - movdqa HashKey_4(%arg2), \TMP5 + movdqu HashKey_4(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0 - movdqa HashKey_4_k(%arg2), \TMP4 + movdqu HashKey_4_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movdqa \XMM1, \XMMDst movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 @@ -1418,10 +1418,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM2, \TMP1 pshufd $78, \XMM2, \TMP2 pxor \XMM2, \TMP2 - movdqa HashKey_3(%arg2), \TMP5 + movdqu HashKey_3(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0 - movdqa HashKey_3_k(%arg2), \TMP4 + movdqu HashKey_3_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pxor \TMP1, \TMP6 pxor \XMM2, \XMMDst @@ -1433,10 +1433,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM3, \TMP1 pshufd $78, \XMM3, \TMP2 pxor \XMM3, \TMP2 - movdqa HashKey_2(%arg2), \TMP5 + movdqu HashKey_2(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0 - movdqa HashKey_2_k(%arg2), \TMP4 + movdqu HashKey_2_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pxor \TMP1, \TMP6 pxor \XMM3, \XMMDst @@ -1446,10 +1446,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM4, \TMP1 pshufd $78, \XMM4, \TMP2 pxor \XMM4, \TMP2 - movdqa HashKey(%arg2), \TMP5 + movdqu HashKey(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0 - movdqa HashKey_k(%arg2), \TMP4 + movdqu HashKey_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pxor \TMP1, \TMP6 pxor \XMM4, \XMMDst From c2b24c36e0a30ebd8fc7d068da7f0451f2c05c76 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Aug 2018 16:58:34 +0200 Subject: [PATCH 041/242] crypto: arm64/aes-gcm-ce - fix scatterwalk API violation Commit 71e52c278c54 ("crypto: arm64/aes-ce-gcm - operate on two input blocks at a time") modified the granularity at which the AES/GCM code processes its input to allow subsequent changes to be applied that improve performance by using aggregation to process multiple input blocks at once. For this reason, it doubled the algorithm's 'chunksize' property to 2 x AES_BLOCK_SIZE, but retained the non-SIMD fallback path that processes a single block at a time. In some cases, this violates the skcipher scatterwalk API, by calling skcipher_walk_done() with a non-zero residue value for a chunk that is expected to be handled in its entirety. This results in a WARN_ON() to be hit by the TLS self test code, but is likely to break other user cases as well. Unfortunately, none of the current test cases exercises this exact code path at the moment. Fixes: 71e52c278c54 ("crypto: arm64/aes-ce-gcm - operate on two ...") Reported-by: Vakul Garg Signed-off-by: Ard Biesheuvel Tested-by: Vakul Garg Signed-off-by: Herbert Xu --- arch/arm64/crypto/ghash-ce-glue.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 6e9f33d14930..067d8937d5af 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -417,7 +417,7 @@ static int gcm_encrypt(struct aead_request *req) __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds); put_unaligned_be32(2, iv + GCM_IV_SIZE); - while (walk.nbytes >= AES_BLOCK_SIZE) { + while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { int blocks = walk.nbytes / AES_BLOCK_SIZE; u8 *dst = walk.dst.virt.addr; u8 *src = walk.src.virt.addr; @@ -437,11 +437,18 @@ static int gcm_encrypt(struct aead_request *req) NULL); err = skcipher_walk_done(&walk, - walk.nbytes % AES_BLOCK_SIZE); + walk.nbytes % (2 * AES_BLOCK_SIZE)); } - if (walk.nbytes) + if (walk.nbytes) { __aes_arm64_encrypt(ctx->aes_key.key_enc, ks, iv, nrounds); + if (walk.nbytes > AES_BLOCK_SIZE) { + crypto_inc(iv, AES_BLOCK_SIZE); + __aes_arm64_encrypt(ctx->aes_key.key_enc, + ks + AES_BLOCK_SIZE, iv, + nrounds); + } + } } /* handle the tail */ @@ -545,7 +552,7 @@ static int gcm_decrypt(struct aead_request *req) __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds); put_unaligned_be32(2, iv + GCM_IV_SIZE); - while (walk.nbytes >= AES_BLOCK_SIZE) { + while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { int blocks = walk.nbytes / AES_BLOCK_SIZE; u8 *dst = walk.dst.virt.addr; u8 *src = walk.src.virt.addr; @@ -564,11 +571,21 @@ static int gcm_decrypt(struct aead_request *req) } while (--blocks > 0); err = skcipher_walk_done(&walk, - walk.nbytes % AES_BLOCK_SIZE); + walk.nbytes % (2 * AES_BLOCK_SIZE)); } - if (walk.nbytes) + if (walk.nbytes) { + if (walk.nbytes > AES_BLOCK_SIZE) { + u8 *iv2 = iv + AES_BLOCK_SIZE; + + memcpy(iv2, iv, AES_BLOCK_SIZE); + crypto_inc(iv2, AES_BLOCK_SIZE); + + __aes_arm64_encrypt(ctx->aes_key.key_enc, iv2, + iv2, nrounds); + } __aes_arm64_encrypt(ctx->aes_key.key_enc, iv, iv, nrounds); + } } /* handle the tail */ From 0522236d4f9c5ab2e79889cb020d1acbe5da416e Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Wed, 22 Aug 2018 08:26:31 +0200 Subject: [PATCH 042/242] crypto: vmx - Fix sleep-in-atomic bugs This patch fixes sleep-in-atomic bugs in AES-CBC and AES-XTS VMX implementations. The problem is that the blkcipher_* functions should not be called in atomic context. The bugs can be reproduced via the AF_ALG interface by trying to encrypt/decrypt sufficiently large buffers (at least 64 KiB) using the VMX implementations of 'cbc(aes)' or 'xts(aes)'. Such operations then trigger BUG in crypto_yield(): [ 891.863680] BUG: sleeping function called from invalid context at include/crypto/algapi.h:424 [ 891.864622] in_atomic(): 1, irqs_disabled(): 0, pid: 12347, name: kcapi-enc [ 891.864739] 1 lock held by kcapi-enc/12347: [ 891.864811] #0: 00000000f5d42c46 (sk_lock-AF_ALG){+.+.}, at: skcipher_recvmsg+0x50/0x530 [ 891.865076] CPU: 5 PID: 12347 Comm: kcapi-enc Not tainted 4.19.0-0.rc0.git3.1.fc30.ppc64le #1 [ 891.865251] Call Trace: [ 891.865340] [c0000003387578c0] [c000000000d67ea4] dump_stack+0xe8/0x164 (unreliable) [ 891.865511] [c000000338757910] [c000000000172a58] ___might_sleep+0x2f8/0x310 [ 891.865679] [c000000338757990] [c0000000006bff74] blkcipher_walk_done+0x374/0x4a0 [ 891.865825] [c0000003387579e0] [d000000007e73e70] p8_aes_cbc_encrypt+0x1c8/0x260 [vmx_crypto] [ 891.865993] [c000000338757ad0] [c0000000006c0ee0] skcipher_encrypt_blkcipher+0x60/0x80 [ 891.866128] [c000000338757b10] [c0000000006ec504] skcipher_recvmsg+0x424/0x530 [ 891.866283] [c000000338757bd0] [c000000000b00654] sock_recvmsg+0x74/0xa0 [ 891.866403] [c000000338757c10] [c000000000b00f64] ___sys_recvmsg+0xf4/0x2f0 [ 891.866515] [c000000338757d90] [c000000000b02bb8] __sys_recvmsg+0x68/0xe0 [ 891.866631] [c000000338757e30] [c00000000000bbe4] system_call+0x5c/0x70 Fixes: 8c755ace357c ("crypto: vmx - Adding CBC routines for VMX module") Fixes: c07f5d3da643 ("crypto: vmx - Adding support for XTS") Cc: stable@vger.kernel.org Signed-off-by: Ondrej Mosnacek Signed-off-by: Herbert Xu --- drivers/crypto/vmx/aes_cbc.c | 30 ++++++++++++++---------------- drivers/crypto/vmx/aes_xts.c | 21 ++++++++++++++------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c index 5285ece4f33a..b71895871be3 100644 --- a/drivers/crypto/vmx/aes_cbc.c +++ b/drivers/crypto/vmx/aes_cbc.c @@ -107,24 +107,23 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc, ret = crypto_skcipher_encrypt(req); skcipher_request_zero(req); } else { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - blkcipher_walk_init(&walk, dst, src, nbytes); ret = blkcipher_walk_virt(desc, &walk); while ((nbytes = walk.nbytes)) { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, nbytes & AES_BLOCK_MASK, &ctx->enc_key, walk.iv, 1); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, &walk, nbytes); } - - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); } return ret; @@ -147,24 +146,23 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc, ret = crypto_skcipher_decrypt(req); skcipher_request_zero(req); } else { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - blkcipher_walk_init(&walk, dst, src, nbytes); ret = blkcipher_walk_virt(desc, &walk); while ((nbytes = walk.nbytes)) { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, nbytes & AES_BLOCK_MASK, &ctx->dec_key, walk.iv, 0); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, &walk, nbytes); } - - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); } return ret; diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c index 8bd9aff0f55f..e9954a7d4694 100644 --- a/drivers/crypto/vmx/aes_xts.c +++ b/drivers/crypto/vmx/aes_xts.c @@ -116,32 +116,39 @@ static int p8_aes_xts_crypt(struct blkcipher_desc *desc, ret = enc? crypto_skcipher_encrypt(req) : crypto_skcipher_decrypt(req); skcipher_request_zero(req); } else { + blkcipher_walk_init(&walk, dst, src, nbytes); + + ret = blkcipher_walk_virt(desc, &walk); + preempt_disable(); pagefault_disable(); enable_kernel_vsx(); - blkcipher_walk_init(&walk, dst, src, nbytes); - - ret = blkcipher_walk_virt(desc, &walk); iv = walk.iv; memset(tweak, 0, AES_BLOCK_SIZE); aes_p8_encrypt(iv, tweak, &ctx->tweak_key); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + while ((nbytes = walk.nbytes)) { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); if (enc) aes_p8_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, nbytes & AES_BLOCK_MASK, &ctx->enc_key, NULL, tweak); else aes_p8_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, nbytes & AES_BLOCK_MASK, &ctx->dec_key, NULL, tweak); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, &walk, nbytes); } - - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); } return ret; } From 3d7c82060d1fe65bde4023aac41a0b1bd7718e07 Mon Sep 17 00:00:00 2001 From: Srikanth Jampala Date: Wed, 22 Aug 2018 12:40:52 +0530 Subject: [PATCH 043/242] crypto: cavium/nitrox - fix for command corruption in queue full case with backlog submissions. Earlier used to post the current command without checking queue full after backlog submissions. So, post the current command only after confirming the space in queue after backlog submissions. Maintain host write index instead of reading device registers to get the next free slot to post the command. Return -ENOSPC in queue full case. Signed-off-by: Srikanth Jampala Reviewed-by: Gadam Sreerama Tested-by: Jha, Chandan Signed-off-by: Herbert Xu --- drivers/crypto/cavium/nitrox/nitrox_dev.h | 3 +- drivers/crypto/cavium/nitrox/nitrox_lib.c | 1 + drivers/crypto/cavium/nitrox/nitrox_reqmgr.c | 57 +++++++++++--------- 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/drivers/crypto/cavium/nitrox/nitrox_dev.h b/drivers/crypto/cavium/nitrox/nitrox_dev.h index 9a476bb6d4c7..af596455b420 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_dev.h +++ b/drivers/crypto/cavium/nitrox/nitrox_dev.h @@ -35,6 +35,7 @@ struct nitrox_cmdq { /* requests in backlog queues */ atomic_t backlog_count; + int write_idx; /* command size 32B/64B */ u8 instr_size; u8 qno; @@ -87,7 +88,7 @@ struct nitrox_bh { struct bh_data *slc; }; -/* NITROX-5 driver state */ +/* NITROX-V driver state */ #define NITROX_UCODE_LOADED 0 #define NITROX_READY 1 diff --git a/drivers/crypto/cavium/nitrox/nitrox_lib.c b/drivers/crypto/cavium/nitrox/nitrox_lib.c index ebe267379ac9..4d31df07777f 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_lib.c +++ b/drivers/crypto/cavium/nitrox/nitrox_lib.c @@ -36,6 +36,7 @@ static int cmdq_common_init(struct nitrox_cmdq *cmdq) cmdq->head = PTR_ALIGN(cmdq->head_unaligned, PKT_IN_ALIGN); cmdq->dma = PTR_ALIGN(cmdq->dma_unaligned, PKT_IN_ALIGN); cmdq->qsize = (qsize + PKT_IN_ALIGN); + cmdq->write_idx = 0; spin_lock_init(&cmdq->response_lock); spin_lock_init(&cmdq->cmdq_lock); diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c index deaefd532aaa..4a362fc22f62 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c +++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c @@ -42,6 +42,16 @@ * Invalid flag options in AES-CCM IV. */ +static inline int incr_index(int index, int count, int max) +{ + if ((index + count) >= max) + index = index + count - max; + else + index += count; + + return index; +} + /** * dma_free_sglist - unmap and free the sg lists. * @ndev: N5 device @@ -426,30 +436,29 @@ static void post_se_instr(struct nitrox_softreq *sr, struct nitrox_cmdq *cmdq) { struct nitrox_device *ndev = sr->ndev; - union nps_pkt_in_instr_baoff_dbell pkt_in_baoff_dbell; - u64 offset; + int idx; u8 *ent; spin_lock_bh(&cmdq->cmdq_lock); - /* get the next write offset */ - offset = NPS_PKT_IN_INSTR_BAOFF_DBELLX(cmdq->qno); - pkt_in_baoff_dbell.value = nitrox_read_csr(ndev, offset); + idx = cmdq->write_idx; /* copy the instruction */ - ent = cmdq->head + pkt_in_baoff_dbell.s.aoff; + ent = cmdq->head + (idx * cmdq->instr_size); memcpy(ent, &sr->instr, cmdq->instr_size); - /* flush the command queue updates */ - dma_wmb(); - sr->tstamp = jiffies; atomic_set(&sr->status, REQ_POSTED); response_list_add(sr, cmdq); + sr->tstamp = jiffies; + /* flush the command queue updates */ + dma_wmb(); /* Ring doorbell with count 1 */ writeq(1, cmdq->dbell_csr_addr); /* orders the doorbell rings */ mmiowb(); + cmdq->write_idx = incr_index(idx, 1, ndev->qlen); + spin_unlock_bh(&cmdq->cmdq_lock); } @@ -459,6 +468,9 @@ static int post_backlog_cmds(struct nitrox_cmdq *cmdq) struct nitrox_softreq *sr, *tmp; int ret = 0; + if (!atomic_read(&cmdq->backlog_count)) + return 0; + spin_lock_bh(&cmdq->backlog_lock); list_for_each_entry_safe(sr, tmp, &cmdq->backlog_head, backlog) { @@ -466,7 +478,7 @@ static int post_backlog_cmds(struct nitrox_cmdq *cmdq) /* submit until space available */ if (unlikely(cmdq_full(cmdq, ndev->qlen))) { - ret = -EBUSY; + ret = -ENOSPC; break; } /* delete from backlog list */ @@ -491,23 +503,20 @@ static int nitrox_enqueue_request(struct nitrox_softreq *sr) { struct nitrox_cmdq *cmdq = sr->cmdq; struct nitrox_device *ndev = sr->ndev; - int ret = -EBUSY; + + /* try to post backlog requests */ + post_backlog_cmds(cmdq); if (unlikely(cmdq_full(cmdq, ndev->qlen))) { if (!(sr->flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) - return -EAGAIN; - + return -ENOSPC; + /* add to backlog list */ backlog_list_add(sr, cmdq); - } else { - ret = post_backlog_cmds(cmdq); - if (ret) { - backlog_list_add(sr, cmdq); - return ret; - } - post_se_instr(sr, cmdq); - ret = -EINPROGRESS; + return -EBUSY; } - return ret; + post_se_instr(sr, cmdq); + + return -EINPROGRESS; } /** @@ -624,11 +633,9 @@ int nitrox_process_se_request(struct nitrox_device *ndev, */ sr->instr.fdata[0] = *((u64 *)&req->gph); sr->instr.fdata[1] = 0; - /* flush the soft_req changes before posting the cmd */ - wmb(); ret = nitrox_enqueue_request(sr); - if (ret == -EAGAIN) + if (ret == -ENOSPC) goto send_fail; return ret; From 3ad867001c91657c46dcf6656d52eb6080286fd5 Mon Sep 17 00:00:00 2001 From: Lothar Felten Date: Tue, 14 Aug 2018 09:09:37 +0200 Subject: [PATCH 044/242] hwmon: (ina2xx) fix sysfs shunt resistor read access fix the sysfs shunt resistor read access: return the shunt resistor value, not the calibration register contents. update email address Signed-off-by: Lothar Felten Signed-off-by: Guenter Roeck --- Documentation/hwmon/ina2xx | 2 +- drivers/hwmon/ina2xx.c | 13 +++++++++++-- include/linux/platform_data/ina2xx.h | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Documentation/hwmon/ina2xx b/Documentation/hwmon/ina2xx index 72d16f08e431..b8df81f6d6bc 100644 --- a/Documentation/hwmon/ina2xx +++ b/Documentation/hwmon/ina2xx @@ -32,7 +32,7 @@ Supported chips: Datasheet: Publicly available at the Texas Instruments website http://www.ti.com/ -Author: Lothar Felten +Author: Lothar Felten Description ----------- diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index e9e6aeabbf84..71d3445ba869 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -17,7 +17,7 @@ * Bi-directional Current/Power Monitor with I2C Interface * Datasheet: http://www.ti.com/product/ina230 * - * Copyright (C) 2012 Lothar Felten + * Copyright (C) 2012 Lothar Felten * Thanks to Jan Volkering * * This program is free software; you can redistribute it and/or modify @@ -329,6 +329,15 @@ static int ina2xx_set_shunt(struct ina2xx_data *data, long val) return 0; } +static ssize_t ina2xx_show_shunt(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct ina2xx_data *data = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%li\n", data->rshunt); +} + static ssize_t ina2xx_store_shunt(struct device *dev, struct device_attribute *da, const char *buf, size_t count) @@ -403,7 +412,7 @@ static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, ina2xx_show_value, NULL, /* shunt resistance */ static SENSOR_DEVICE_ATTR(shunt_resistor, S_IRUGO | S_IWUSR, - ina2xx_show_value, ina2xx_store_shunt, + ina2xx_show_shunt, ina2xx_store_shunt, INA2XX_CALIBRATION); /* update interval (ina226 only) */ diff --git a/include/linux/platform_data/ina2xx.h b/include/linux/platform_data/ina2xx.h index 9abc0ca7259b..9f0aa1b48c78 100644 --- a/include/linux/platform_data/ina2xx.h +++ b/include/linux/platform_data/ina2xx.h @@ -1,7 +1,7 @@ /* * Driver for Texas Instruments INA219, INA226 power monitor chips * - * Copyright (C) 2012 Lothar Felten + * Copyright (C) 2012 Lothar Felten * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as From 9d19371df50a73301aec66a479b490587e889055 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Aug 2018 12:12:36 +0300 Subject: [PATCH 045/242] hwmon: (adt7475) Potential error pointer dereferences The adt7475_update_device() function returns error pointers. The problem is that in show_pwmfreq() we dereference it before the check. And then in pwm_use_point2_pwm_at_crit_show() there isn't a check at all. I don't know if it's required, but it silences a static checker warning and it's doesn't hurt anything to check. Signed-off-by: Dan Carpenter Reviewed-by: Tokunori Ikegami Signed-off-by: Guenter Roeck --- drivers/hwmon/adt7475.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index 90837f7c7d0f..16045149f3db 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -962,13 +962,14 @@ static ssize_t show_pwmfreq(struct device *dev, struct device_attribute *attr, { struct adt7475_data *data = adt7475_update_device(dev); struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); - int i = clamp_val(data->range[sattr->index] & 0xf, 0, - ARRAY_SIZE(pwmfreq_table) - 1); + int idx; if (IS_ERR(data)) return PTR_ERR(data); + idx = clamp_val(data->range[sattr->index] & 0xf, 0, + ARRAY_SIZE(pwmfreq_table) - 1); - return sprintf(buf, "%d\n", pwmfreq_table[i]); + return sprintf(buf, "%d\n", pwmfreq_table[idx]); } static ssize_t set_pwmfreq(struct device *dev, struct device_attribute *attr, @@ -1004,6 +1005,10 @@ static ssize_t pwm_use_point2_pwm_at_crit_show(struct device *dev, char *buf) { struct adt7475_data *data = adt7475_update_device(dev); + + if (IS_ERR(data)) + return PTR_ERR(data); + return sprintf(buf, "%d\n", !!(data->config4 & CONFIG4_MAXDUTY)); } From f196dec6d50abb2e65fb54a0621b2f1b4d922995 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Aug 2018 13:07:47 +0300 Subject: [PATCH 046/242] hwmon: (adt7475) Make adt7475_read_word() return errors The adt7475_read_word() function was meant to return negative error codes on failure. Signed-off-by: Dan Carpenter Reviewed-by: Tokunori Ikegami Signed-off-by: Guenter Roeck --- drivers/hwmon/adt7475.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index 16045149f3db..f4c7516eb989 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -302,14 +302,18 @@ static inline u16 volt2reg(int channel, long volt, u8 bypass_attn) return clamp_val(reg, 0, 1023) & (0xff << 2); } -static u16 adt7475_read_word(struct i2c_client *client, int reg) +static int adt7475_read_word(struct i2c_client *client, int reg) { - u16 val; + int val1, val2; - val = i2c_smbus_read_byte_data(client, reg); - val |= (i2c_smbus_read_byte_data(client, reg + 1) << 8); + val1 = i2c_smbus_read_byte_data(client, reg); + if (val1 < 0) + return val1; + val2 = i2c_smbus_read_byte_data(client, reg + 1); + if (val2 < 0) + return val2; - return val; + return val1 | (val2 << 8); } static void adt7475_write_word(struct i2c_client *client, int reg, u16 val) From d49dbfade96d5b0863ca8a90122a805edd5ef50a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 15 Aug 2018 08:14:37 -0500 Subject: [PATCH 047/242] hwmon: (nct6775) Fix potential Spectre v1 val can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: vers/hwmon/nct6775.c:2698 store_pwm_weight_temp_sel() warn: potential spectre issue 'data->temp_src' [r] Fix this by sanitizing val before using it to index data->temp_src Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6775.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index c6bd61e4695a..944f5b63aecd 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -63,6 +63,7 @@ #include #include #include +#include #include "lm75.h" #define USE_ALTERNATE @@ -2689,6 +2690,7 @@ store_pwm_weight_temp_sel(struct device *dev, struct device_attribute *attr, return err; if (val > NUM_TEMP) return -EINVAL; + val = array_index_nospec(val, NUM_TEMP + 1); if (val && (!(data->have_temp & BIT(val - 1)) || !data->temp_src[val - 1])) return -EINVAL; From c7c09dc187f0323ad40b5b6c57a6db673a386a7f Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 24 Aug 2018 11:29:27 +0800 Subject: [PATCH 048/242] nios2: kconfig: remove duplicate DEBUG_STACK_USAGE symbol defintions DEBUG_STACK_USAGE is already defined in lib/Kconfig.debug Signed-off-by: Tobias Klauser Signed-off-by: Ley Foon Tan --- arch/nios2/Kconfig.debug | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/nios2/Kconfig.debug b/arch/nios2/Kconfig.debug index 7a49f0d28d14..f1da8a7b17ff 100644 --- a/arch/nios2/Kconfig.debug +++ b/arch/nios2/Kconfig.debug @@ -3,15 +3,6 @@ config TRACE_IRQFLAGS_SUPPORT def_bool y -config DEBUG_STACK_USAGE - bool "Enable stack utilization instrumentation" - depends on DEBUG_KERNEL - help - Enables the display of the minimum amount of free stack which each - task has ever had available in the sysrq-T and sysrq-P debug output. - - This option will slow down process creation somewhat. - config EARLY_PRINTK bool "Activate early kernel debugging" default y From 2b7bd20d5605e0314d677ea21b462543e73e466c Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Sat, 28 Jul 2018 19:08:45 +0530 Subject: [PATCH 049/242] drm/mediatek: Convert drm_atomic_helper_suspend/resume() convert drm_atomic_helper_suspend/resume() to use drm_mode_config_helper_suspend/resume(). Signed-off-by: Souptick Joarder Signed-off-by: Ajit Negi Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 39721119713b..b68922a793cb 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -580,29 +580,24 @@ static int mtk_drm_sys_suspend(struct device *dev) { struct mtk_drm_private *private = dev_get_drvdata(dev); struct drm_device *drm = private->drm; + int ret; - drm_kms_helper_poll_disable(drm); - - private->suspend_state = drm_atomic_helper_suspend(drm); - if (IS_ERR(private->suspend_state)) { - drm_kms_helper_poll_enable(drm); - return PTR_ERR(private->suspend_state); - } - + ret = drm_mode_config_helper_suspend(drm); DRM_DEBUG_DRIVER("mtk_drm_sys_suspend\n"); - return 0; + + return ret; } static int mtk_drm_sys_resume(struct device *dev) { struct mtk_drm_private *private = dev_get_drvdata(dev); struct drm_device *drm = private->drm; + int ret; - drm_atomic_helper_resume(drm, private->suspend_state); - drm_kms_helper_poll_enable(drm); - + ret = drm_mode_config_helper_resume(drm); DRM_DEBUG_DRIVER("mtk_drm_sys_resume\n"); - return 0; + + return ret; } #endif From 8272806d21bf2a7fd74602cc7bade7d12f73ac4b Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 17 Jul 2018 10:35:18 +0200 Subject: [PATCH 050/242] drm/mediatek: Replace drm_dev_unref with drm_dev_put This patch unifies the naming of DRM functions for reference counting of struct drm_device. The resulting code is more aligned with the rest of the Linux kernel interfaces. Signed-off-by: Thomas Zimmermann Reviewed-by: Philipp Zabel Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index b68922a793cb..47ec604289b7 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -381,7 +381,7 @@ static int mtk_drm_bind(struct device *dev) err_deinit: mtk_drm_kms_deinit(drm); err_free: - drm_dev_unref(drm); + drm_dev_put(drm); return ret; } @@ -390,7 +390,7 @@ static void mtk_drm_unbind(struct device *dev) struct mtk_drm_private *private = dev_get_drvdata(dev); drm_dev_unregister(private->drm); - drm_dev_unref(private->drm); + drm_dev_put(private->drm); private->drm = NULL; } @@ -564,7 +564,7 @@ static int mtk_drm_remove(struct platform_device *pdev) drm_dev_unregister(drm); mtk_drm_kms_deinit(drm); - drm_dev_unref(drm); + drm_dev_put(drm); component_master_del(&pdev->dev, &mtk_drm_ops); pm_runtime_disable(&pdev->dev); From 29d32e466e98e03378878e95339334971d0fdaf4 Mon Sep 17 00:00:00 2001 From: Stu Hsieh Date: Thu, 9 Aug 2018 10:15:36 +0800 Subject: [PATCH 051/242] drm/mediatek: add connection from RDMA0 to DPI1 This patch add connection from RDMA0 to DPI1 Signed-off-by: Stu Hsieh Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_ddp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c index 87e4191c250e..03e3628b5b0d 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c @@ -106,6 +106,7 @@ #define OVL1_MOUT_EN_COLOR1 0x1 #define GAMMA_MOUT_EN_RDMA1 0x1 #define RDMA0_SOUT_DPI0 0x2 +#define RDMA0_SOUT_DPI1 0x3 #define RDMA0_SOUT_DSI2 0x4 #define RDMA0_SOUT_DSI3 0x5 #define RDMA1_SOUT_DPI0 0x2 @@ -224,6 +225,9 @@ static unsigned int mtk_ddp_mout_en(enum mtk_ddp_comp_id cur, } else if (cur == DDP_COMPONENT_RDMA0 && next == DDP_COMPONENT_DPI0) { *addr = DISP_REG_CONFIG_DISP_RDMA0_SOUT_EN; value = RDMA0_SOUT_DPI0; + } else if (cur == DDP_COMPONENT_RDMA0 && next == DDP_COMPONENT_DPI1) { + *addr = DISP_REG_CONFIG_DISP_RDMA0_SOUT_EN; + value = RDMA0_SOUT_DPI1; } else if (cur == DDP_COMPONENT_RDMA0 && next == DDP_COMPONENT_DSI2) { *addr = DISP_REG_CONFIG_DISP_RDMA0_SOUT_EN; value = RDMA0_SOUT_DSI2; From 48d25d243bfb20a7230e9f226c560b71d989d962 Mon Sep 17 00:00:00 2001 From: Stu Hsieh Date: Thu, 9 Aug 2018 10:15:37 +0800 Subject: [PATCH 052/242] drm/mediatek: add connection from RDMA0 to DSI1 This patch add connection from RDMA0 to DSI1 Signed-off-by: Stu Hsieh Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_ddp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c index 03e3628b5b0d..310d8482d5a0 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c @@ -107,6 +107,7 @@ #define GAMMA_MOUT_EN_RDMA1 0x1 #define RDMA0_SOUT_DPI0 0x2 #define RDMA0_SOUT_DPI1 0x3 +#define RDMA0_SOUT_DSI1 0x1 #define RDMA0_SOUT_DSI2 0x4 #define RDMA0_SOUT_DSI3 0x5 #define RDMA1_SOUT_DPI0 0x2 @@ -228,6 +229,9 @@ static unsigned int mtk_ddp_mout_en(enum mtk_ddp_comp_id cur, } else if (cur == DDP_COMPONENT_RDMA0 && next == DDP_COMPONENT_DPI1) { *addr = DISP_REG_CONFIG_DISP_RDMA0_SOUT_EN; value = RDMA0_SOUT_DPI1; + } else if (cur == DDP_COMPONENT_RDMA0 && next == DDP_COMPONENT_DSI1) { + *addr = DISP_REG_CONFIG_DISP_RDMA0_SOUT_EN; + value = RDMA0_SOUT_DSI1; } else if (cur == DDP_COMPONENT_RDMA0 && next == DDP_COMPONENT_DSI2) { *addr = DISP_REG_CONFIG_DISP_RDMA0_SOUT_EN; value = RDMA0_SOUT_DSI2; From 0a14785ee32ad40458657edaf1025f71ebbc1147 Mon Sep 17 00:00:00 2001 From: Stu Hsieh Date: Thu, 9 Aug 2018 10:15:38 +0800 Subject: [PATCH 053/242] drm/mediatek: add connection from RDMA1 to DSI0 This patch add connection from RDMA1 to DSI0 Signed-off-by: Stu Hsieh Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_ddp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c index 310d8482d5a0..31189fad8d4e 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c @@ -124,6 +124,7 @@ #define DPI0_SEL_IN_RDMA2 0x3 #define DPI1_SEL_IN_RDMA1 (0x1 << 8) #define DPI1_SEL_IN_RDMA2 (0x3 << 8) +#define DSI0_SEL_IN_RDMA1 0x1 #define DSI1_SEL_IN_RDMA1 0x1 #define DSI1_SEL_IN_RDMA2 0x4 #define DSI2_SEL_IN_RDMA1 (0x1 << 16) @@ -290,6 +291,9 @@ static unsigned int mtk_ddp_sel_in(enum mtk_ddp_comp_id cur, } else if (cur == DDP_COMPONENT_RDMA1 && next == DDP_COMPONENT_DPI1) { *addr = DISP_REG_CONFIG_DPI_SEL_IN; value = DPI1_SEL_IN_RDMA1; + } else if (cur == DDP_COMPONENT_RDMA1 && next == DDP_COMPONENT_DSI0) { + *addr = DISP_REG_CONFIG_DSIE_SEL_IN; + value = DSI0_SEL_IN_RDMA1; } else if (cur == DDP_COMPONENT_RDMA1 && next == DDP_COMPONENT_DSI1) { *addr = DISP_REG_CONFIG_DSIO_SEL_IN; value = DSI1_SEL_IN_RDMA1; From 85186efc2a5975801cd4ba03a9143305e474b645 Mon Sep 17 00:00:00 2001 From: Stu Hsieh Date: Thu, 9 Aug 2018 10:15:39 +0800 Subject: [PATCH 054/242] drm/mediatek: add connection from RDMA2 to DSI0 This patch add connection from RDMA2 to DSI0 Signed-off-by: Stu Hsieh Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_ddp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c index 31189fad8d4e..3239f22785fd 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c @@ -125,6 +125,7 @@ #define DPI1_SEL_IN_RDMA1 (0x1 << 8) #define DPI1_SEL_IN_RDMA2 (0x3 << 8) #define DSI0_SEL_IN_RDMA1 0x1 +#define DSI0_SEL_IN_RDMA2 0x4 #define DSI1_SEL_IN_RDMA1 0x1 #define DSI1_SEL_IN_RDMA2 0x4 #define DSI2_SEL_IN_RDMA1 (0x1 << 16) @@ -309,6 +310,9 @@ static unsigned int mtk_ddp_sel_in(enum mtk_ddp_comp_id cur, } else if (cur == DDP_COMPONENT_RDMA2 && next == DDP_COMPONENT_DPI1) { *addr = DISP_REG_CONFIG_DPI_SEL_IN; value = DPI1_SEL_IN_RDMA2; + } else if (cur == DDP_COMPONENT_RDMA2 && next == DDP_COMPONENT_DSI0) { + *addr = DISP_REG_CONFIG_DSIE_SEL_IN; + value = DSI0_SEL_IN_RDMA2; } else if (cur == DDP_COMPONENT_RDMA2 && next == DDP_COMPONENT_DSI1) { *addr = DISP_REG_CONFIG_DSIE_SEL_IN; value = DSI1_SEL_IN_RDMA2; From 182add0b1b9170a1f8f2a049fe2e298222cf405a Mon Sep 17 00:00:00 2001 From: Stu Hsieh Date: Thu, 9 Aug 2018 10:15:40 +0800 Subject: [PATCH 055/242] drm/mediatek: add memory mode and layer_config for RDMA This patch add memory mode for RDMA and layer_config for RDMA If use RDMA to read data from memory, it should set memory mode to RDMA Layer config set the data address and pitch to RDMA from plane setting. Signed-off-by: Stu Hsieh Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_disp_rdma.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c index 585943c81e1f..08866550740f 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c @@ -31,14 +31,20 @@ #define RDMA_REG_UPDATE_INT BIT(0) #define DISP_REG_RDMA_GLOBAL_CON 0x0010 #define RDMA_ENGINE_EN BIT(0) +#define RDMA_MODE_MEMORY BIT(1) #define DISP_REG_RDMA_SIZE_CON_0 0x0014 #define DISP_REG_RDMA_SIZE_CON_1 0x0018 #define DISP_REG_RDMA_TARGET_LINE 0x001c +#define DISP_RDMA_MEM_SRC_PITCH 0x002c +#define DISP_RDMA_MEM_GMC_SETTING_0 0x0030 #define DISP_REG_RDMA_FIFO_CON 0x0040 #define RDMA_FIFO_UNDERFLOW_EN BIT(31) #define RDMA_FIFO_PSEUDO_SIZE(bytes) (((bytes) / 16) << 16) #define RDMA_OUTPUT_VALID_FIFO_THRESHOLD(bytes) ((bytes) / 16) #define RDMA_FIFO_SIZE(rdma) ((rdma)->data->fifo_size) +#define DISP_RDMA_MEM_START_ADDR 0x0f00 + +#define RDMA_MEM_GMC 0x40402020 struct mtk_disp_rdma_data { unsigned int fifo_size; @@ -138,12 +144,27 @@ static void mtk_rdma_config(struct mtk_ddp_comp *comp, unsigned int width, writel(reg, comp->regs + DISP_REG_RDMA_FIFO_CON); } +static void mtk_rdma_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, + struct mtk_plane_state *state) +{ + struct mtk_plane_pending_state *pending = &state->pending; + unsigned int addr = pending->addr; + unsigned int pitch = pending->pitch & 0xffff; + + writel_relaxed(addr, comp->regs + DISP_RDMA_MEM_START_ADDR); + writel_relaxed(pitch, comp->regs + DISP_RDMA_MEM_SRC_PITCH); + writel(RDMA_MEM_GMC, comp->regs + DISP_RDMA_MEM_GMC_SETTING_0); + rdma_update_bits(comp, DISP_REG_RDMA_GLOBAL_CON, + RDMA_MODE_MEMORY, RDMA_MODE_MEMORY); +} + static const struct mtk_ddp_comp_funcs mtk_disp_rdma_funcs = { .config = mtk_rdma_config, .start = mtk_rdma_start, .stop = mtk_rdma_stop, .enable_vblank = mtk_rdma_enable_vblank, .disable_vblank = mtk_rdma_disable_vblank, + .layer_config = mtk_rdma_layer_config, }; static int mtk_disp_rdma_bind(struct device *dev, struct device *master, From b428391ed6bd5e3cb8ea9d1738ef4bd16af6cdb2 Mon Sep 17 00:00:00 2001 From: Stu Hsieh Date: Thu, 9 Aug 2018 10:15:41 +0800 Subject: [PATCH 056/242] drm/mediatek: add RGB color format support for RDMA This patch add RGB color format support for RDMA, including RGB565, RGB888, RGBA8888 and ARGB8888. Signed-off-by: Stu Hsieh Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_disp_rdma.c | 45 ++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c index 08866550740f..091e48e51501 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c @@ -35,6 +35,12 @@ #define DISP_REG_RDMA_SIZE_CON_0 0x0014 #define DISP_REG_RDMA_SIZE_CON_1 0x0018 #define DISP_REG_RDMA_TARGET_LINE 0x001c +#define DISP_RDMA_MEM_CON 0x0024 +#define MEM_MODE_INPUT_FORMAT_RGB565 (0x000 << 4) +#define MEM_MODE_INPUT_FORMAT_RGB888 (0x001 << 4) +#define MEM_MODE_INPUT_FORMAT_RGBA8888 (0x002 << 4) +#define MEM_MODE_INPUT_FORMAT_ARGB8888 (0x003 << 4) +#define MEM_MODE_INPUT_SWAP BIT(8) #define DISP_RDMA_MEM_SRC_PITCH 0x002c #define DISP_RDMA_MEM_GMC_SETTING_0 0x0030 #define DISP_REG_RDMA_FIFO_CON 0x0040 @@ -144,12 +150,51 @@ static void mtk_rdma_config(struct mtk_ddp_comp *comp, unsigned int width, writel(reg, comp->regs + DISP_REG_RDMA_FIFO_CON); } +static unsigned int rdma_fmt_convert(struct mtk_disp_rdma *rdma, + unsigned int fmt) +{ + /* The return value in switch "MEM_MODE_INPUT_FORMAT_XXX" + * is defined in mediatek HW data sheet. + * The alphabet order in XXX is no relation to data + * arrangement in memory. + */ + switch (fmt) { + default: + case DRM_FORMAT_RGB565: + return MEM_MODE_INPUT_FORMAT_RGB565; + case DRM_FORMAT_BGR565: + return MEM_MODE_INPUT_FORMAT_RGB565 | MEM_MODE_INPUT_SWAP; + case DRM_FORMAT_RGB888: + return MEM_MODE_INPUT_FORMAT_RGB888; + case DRM_FORMAT_BGR888: + return MEM_MODE_INPUT_FORMAT_RGB888 | MEM_MODE_INPUT_SWAP; + case DRM_FORMAT_RGBX8888: + case DRM_FORMAT_RGBA8888: + return MEM_MODE_INPUT_FORMAT_ARGB8888; + case DRM_FORMAT_BGRX8888: + case DRM_FORMAT_BGRA8888: + return MEM_MODE_INPUT_FORMAT_ARGB8888 | MEM_MODE_INPUT_SWAP; + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + return MEM_MODE_INPUT_FORMAT_RGBA8888; + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: + return MEM_MODE_INPUT_FORMAT_RGBA8888 | MEM_MODE_INPUT_SWAP; + } +} + static void mtk_rdma_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, struct mtk_plane_state *state) { + struct mtk_disp_rdma *rdma = comp_to_rdma(comp); struct mtk_plane_pending_state *pending = &state->pending; unsigned int addr = pending->addr; unsigned int pitch = pending->pitch & 0xffff; + unsigned int fmt = pending->format; + unsigned int con; + + con = rdma_fmt_convert(rdma, fmt); + writel_relaxed(con, comp->regs + DISP_RDMA_MEM_CON); writel_relaxed(addr, comp->regs + DISP_RDMA_MEM_START_ADDR); writel_relaxed(pitch, comp->regs + DISP_RDMA_MEM_SRC_PITCH); From 55b53f6f7ccf0990ad83acf4fdb0436ff79fdfb6 Mon Sep 17 00:00:00 2001 From: Stu Hsieh Date: Thu, 9 Aug 2018 10:15:42 +0800 Subject: [PATCH 057/242] drm/mediatek: add the comment about color format setting for OVL This patch add the comment about color format setting for OVL Signed-off-by: Stu Hsieh Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_disp_ovl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index 978782a77629..0facd823c552 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -157,6 +157,11 @@ static void mtk_ovl_layer_off(struct mtk_ddp_comp *comp, unsigned int idx) static unsigned int ovl_fmt_convert(struct mtk_disp_ovl *ovl, unsigned int fmt) { + /* The return value in switch "MEM_MODE_INPUT_FORMAT_XXX" + * is defined in mediatek HW data sheet. + * The alphabet order in XXX is no relation to data + * arrangement in memory. + */ switch (fmt) { default: case DRM_FORMAT_RGB565: From 94420a63cf784945061b7b5f38511b7a48f034eb Mon Sep 17 00:00:00 2001 From: Stu Hsieh Date: Thu, 9 Aug 2018 10:15:43 +0800 Subject: [PATCH 058/242] drm/mediatek: add YUYV/UYVY color format support for RDMA This patch add YUYV/UYVY color format support for RDMA and transform matrix for YUYV/UYVY. Signed-off-by: Stu Hsieh Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_disp_rdma.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c index 091e48e51501..2d27e15445d2 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c @@ -33,6 +33,9 @@ #define RDMA_ENGINE_EN BIT(0) #define RDMA_MODE_MEMORY BIT(1) #define DISP_REG_RDMA_SIZE_CON_0 0x0014 +#define RDMA_MATRIX_ENABLE BIT(17) +#define RDMA_MATRIX_INT_MTX_SEL GENMASK(23, 20) +#define RDMA_MATRIX_INT_MTX_BT601_to_RGB (6 << 20) #define DISP_REG_RDMA_SIZE_CON_1 0x0018 #define DISP_REG_RDMA_TARGET_LINE 0x001c #define DISP_RDMA_MEM_CON 0x0024 @@ -40,6 +43,8 @@ #define MEM_MODE_INPUT_FORMAT_RGB888 (0x001 << 4) #define MEM_MODE_INPUT_FORMAT_RGBA8888 (0x002 << 4) #define MEM_MODE_INPUT_FORMAT_ARGB8888 (0x003 << 4) +#define MEM_MODE_INPUT_FORMAT_UYVY (0x004 << 4) +#define MEM_MODE_INPUT_FORMAT_YUYV (0x005 << 4) #define MEM_MODE_INPUT_SWAP BIT(8) #define DISP_RDMA_MEM_SRC_PITCH 0x002c #define DISP_RDMA_MEM_GMC_SETTING_0 0x0030 @@ -180,6 +185,10 @@ static unsigned int rdma_fmt_convert(struct mtk_disp_rdma *rdma, case DRM_FORMAT_XBGR8888: case DRM_FORMAT_ABGR8888: return MEM_MODE_INPUT_FORMAT_RGBA8888 | MEM_MODE_INPUT_SWAP; + case DRM_FORMAT_UYVY: + return MEM_MODE_INPUT_FORMAT_UYVY; + case DRM_FORMAT_YUYV: + return MEM_MODE_INPUT_FORMAT_YUYV; } } @@ -196,6 +205,17 @@ static void mtk_rdma_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, con = rdma_fmt_convert(rdma, fmt); writel_relaxed(con, comp->regs + DISP_RDMA_MEM_CON); + if (fmt == DRM_FORMAT_UYVY || fmt == DRM_FORMAT_YUYV) { + rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0, + RDMA_MATRIX_ENABLE, RDMA_MATRIX_ENABLE); + rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0, + RDMA_MATRIX_INT_MTX_SEL, + RDMA_MATRIX_INT_MTX_BT601_to_RGB); + } else { + rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0, + RDMA_MATRIX_ENABLE, 0); + } + writel_relaxed(addr, comp->regs + DISP_RDMA_MEM_START_ADDR); writel_relaxed(pitch, comp->regs + DISP_RDMA_MEM_SRC_PITCH); writel(RDMA_MEM_GMC, comp->regs + DISP_RDMA_MEM_GMC_SETTING_0); From 650afd49572b56a5c58134d4acfeb77acc69d622 Mon Sep 17 00:00:00 2001 From: Stu Hsieh Date: Thu, 9 Aug 2018 10:15:44 +0800 Subject: [PATCH 059/242] drm/mediatek: add function to get layer number for component This patch add function to get layer number for component Signed-off-by: Stu Hsieh Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h index 7413ffeb3c9d..8399229e6ad2 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h @@ -78,6 +78,7 @@ struct mtk_ddp_comp_funcs { void (*stop)(struct mtk_ddp_comp *comp); void (*enable_vblank)(struct mtk_ddp_comp *comp, struct drm_crtc *crtc); void (*disable_vblank)(struct mtk_ddp_comp *comp); + unsigned int (*layer_nr)(struct mtk_ddp_comp *comp); void (*layer_on)(struct mtk_ddp_comp *comp, unsigned int idx); void (*layer_off)(struct mtk_ddp_comp *comp, unsigned int idx); void (*layer_config)(struct mtk_ddp_comp *comp, unsigned int idx, @@ -128,6 +129,14 @@ static inline void mtk_ddp_comp_disable_vblank(struct mtk_ddp_comp *comp) comp->funcs->disable_vblank(comp); } +static inline unsigned int mtk_ddp_comp_layer_nr(struct mtk_ddp_comp *comp) +{ + if (comp->funcs && comp->funcs->layer_nr) + return comp->funcs->layer_nr(comp); + + return 0; +} + static inline void mtk_ddp_comp_layer_on(struct mtk_ddp_comp *comp, unsigned int idx) { From 1cbcb763ea5035e7ef01010ea68eb3b5143ad7cb Mon Sep 17 00:00:00 2001 From: Stu Hsieh Date: Thu, 9 Aug 2018 10:15:45 +0800 Subject: [PATCH 060/242] drm/mediatek: add function to return OVL layer number This patch add function to return OVL layer number For now, MT8173, MT2712, MT2701 OVL all has 4 layer. Signed-off-by: Stu Hsieh Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_disp_ovl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index 0facd823c552..28d191192945 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -132,6 +132,11 @@ static void mtk_ovl_config(struct mtk_ddp_comp *comp, unsigned int w, writel(0x0, comp->regs + DISP_REG_OVL_RST); } +static unsigned int mtk_ovl_layer_nr(struct mtk_ddp_comp *comp) +{ + return 4; +} + static void mtk_ovl_layer_on(struct mtk_ddp_comp *comp, unsigned int idx) { unsigned int reg; @@ -226,6 +231,7 @@ static const struct mtk_ddp_comp_funcs mtk_disp_ovl_funcs = { .stop = mtk_ovl_stop, .enable_vblank = mtk_ovl_enable_vblank, .disable_vblank = mtk_ovl_disable_vblank, + .layer_nr = mtk_ovl_layer_nr, .layer_on = mtk_ovl_layer_on, .layer_off = mtk_ovl_layer_off, .layer_config = mtk_ovl_layer_config, From 98b6d76f957ba80017a3118fe0e33030b4bc017b Mon Sep 17 00:00:00 2001 From: Stu Hsieh Date: Thu, 9 Aug 2018 10:15:46 +0800 Subject: [PATCH 061/242] drm/mediatek: add function to return RDMA layer number This patch add function to return RDMA layer number RDMA always has one layer. Signed-off-by: Stu Hsieh Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_disp_rdma.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c index 2d27e15445d2..b0a5cffe345a 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c @@ -192,6 +192,11 @@ static unsigned int rdma_fmt_convert(struct mtk_disp_rdma *rdma, } } +static unsigned int mtk_rdma_layer_nr(struct mtk_ddp_comp *comp) +{ + return 1; +} + static void mtk_rdma_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, struct mtk_plane_state *state) { @@ -229,6 +234,7 @@ static const struct mtk_ddp_comp_funcs mtk_disp_rdma_funcs = { .stop = mtk_rdma_stop, .enable_vblank = mtk_rdma_enable_vblank, .disable_vblank = mtk_rdma_disable_vblank, + .layer_nr = mtk_rdma_layer_nr, .layer_config = mtk_rdma_layer_config, }; From 66b2cf9623facfad790b335fcfd717258a00896b Mon Sep 17 00:00:00 2001 From: Stu Hsieh Date: Thu, 9 Aug 2018 10:15:47 +0800 Subject: [PATCH 062/242] drm/mediatek: use layer_nr function to get layer number to init plane This patch use layer_nr function to get layer number to init plane When plane init in crtc create, it use the number of OVL layer to init plane. That's OVL can read 4 memory address. For mt2712 third ddp, it use RDMA to read memory. RDMA can read 1 memory address, so it just init one plane. For compatibility, this patch use mtk_ddp_comp_layer_nr function to get layer number from their HW component in ddp for plane init. Signed-off-by: Stu Hsieh Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 21 ++++++++++++++------- drivers/gpu/drm/mediatek/mtk_drm_crtc.h | 1 - 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 2d6aa150a9ff..845d1608465e 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -45,7 +45,8 @@ struct mtk_drm_crtc { bool pending_needs_vblank; struct drm_pending_vblank_event *event; - struct drm_plane planes[OVL_LAYER_NR]; + struct drm_plane *planes; + unsigned int layer_nr; bool pending_planes; void __iomem *config_regs; @@ -286,7 +287,7 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) } /* Initially configure all planes */ - for (i = 0; i < OVL_LAYER_NR; i++) { + for (i = 0; i < mtk_crtc->layer_nr; i++) { struct drm_plane *plane = &mtk_crtc->planes[i]; struct mtk_plane_state *plane_state; @@ -351,7 +352,7 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc) } if (mtk_crtc->pending_planes) { - for (i = 0; i < OVL_LAYER_NR; i++) { + for (i = 0; i < mtk_crtc->layer_nr; i++) { struct drm_plane *plane = &mtk_crtc->planes[i]; struct mtk_plane_state *plane_state; @@ -403,7 +404,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc, return; /* Set all pending plane state to disabled */ - for (i = 0; i < OVL_LAYER_NR; i++) { + for (i = 0; i < mtk_crtc->layer_nr; i++) { struct drm_plane *plane = &mtk_crtc->planes[i]; struct mtk_plane_state *plane_state; @@ -450,7 +451,7 @@ static void mtk_drm_crtc_atomic_flush(struct drm_crtc *crtc, if (mtk_crtc->event) mtk_crtc->pending_needs_vblank = true; - for (i = 0; i < OVL_LAYER_NR; i++) { + for (i = 0; i < mtk_crtc->layer_nr; i++) { struct drm_plane *plane = &mtk_crtc->planes[i]; struct mtk_plane_state *plane_state; @@ -598,7 +599,12 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, mtk_crtc->ddp_comp[i] = comp; } - for (zpos = 0; zpos < OVL_LAYER_NR; zpos++) { + mtk_crtc->layer_nr = mtk_ddp_comp_layer_nr(mtk_crtc->ddp_comp[0]); + mtk_crtc->planes = devm_kzalloc(dev, mtk_crtc->layer_nr * + sizeof(struct drm_plane), + GFP_KERNEL); + + for (zpos = 0; zpos < mtk_crtc->layer_nr; zpos++) { type = (zpos == 0) ? DRM_PLANE_TYPE_PRIMARY : (zpos == 1) ? DRM_PLANE_TYPE_CURSOR : DRM_PLANE_TYPE_OVERLAY; @@ -609,7 +615,8 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, } ret = mtk_drm_crtc_init(drm_dev, mtk_crtc, &mtk_crtc->planes[0], - &mtk_crtc->planes[1], pipe); + mtk_crtc->layer_nr > 1 ? &mtk_crtc->planes[1] : + NULL, pipe); if (ret < 0) goto unprepare; drm_mode_crtc_set_gamma_size(&mtk_crtc->base, MTK_LUT_SIZE); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h index 9d9410c67ae9..60bcc8aba8e3 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h @@ -18,7 +18,6 @@ #include "mtk_drm_ddp_comp.h" #include "mtk_drm_plane.h" -#define OVL_LAYER_NR 4 #define MTK_LUT_SIZE 512 #define MTK_MAX_BPC 10 #define MTK_MIN_BPC 3 From f265905c939e21a0c9e83540d4c2776c3e43c310 Mon Sep 17 00:00:00 2001 From: Stu Hsieh Date: Thu, 9 Aug 2018 10:15:48 +0800 Subject: [PATCH 063/242] drm/mediatek: update some variable name from ovl to comp This patch update some variable name from ovl to comp Because RDMA would be first HW in ddp, the naming ovl should be change to comp. Signed-off-by: Stu Hsieh Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 26 ++++++++++++------------- drivers/gpu/drm/mediatek/mtk_drm_crtc.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 845d1608465e..0b976dfd04df 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -172,9 +172,9 @@ static void mtk_drm_crtc_mode_set_nofb(struct drm_crtc *crtc) static int mtk_drm_crtc_enable_vblank(struct drm_crtc *crtc) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); - struct mtk_ddp_comp *ovl = mtk_crtc->ddp_comp[0]; + struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; - mtk_ddp_comp_enable_vblank(ovl, &mtk_crtc->base); + mtk_ddp_comp_enable_vblank(comp, &mtk_crtc->base); return 0; } @@ -182,9 +182,9 @@ static int mtk_drm_crtc_enable_vblank(struct drm_crtc *crtc) static void mtk_drm_crtc_disable_vblank(struct drm_crtc *crtc) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); - struct mtk_ddp_comp *ovl = mtk_crtc->ddp_comp[0]; + struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; - mtk_ddp_comp_disable_vblank(ovl); + mtk_ddp_comp_disable_vblank(comp); } static int mtk_crtc_ddp_clk_enable(struct mtk_drm_crtc *mtk_crtc) @@ -335,7 +335,7 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); struct mtk_crtc_state *state = to_mtk_crtc_state(mtk_crtc->base.state); - struct mtk_ddp_comp *ovl = mtk_crtc->ddp_comp[0]; + struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; unsigned int i; /* @@ -344,7 +344,7 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc) * queue update module registers on vblank. */ if (state->pending_config) { - mtk_ddp_comp_config(ovl, state->pending_width, + mtk_ddp_comp_config(comp, state->pending_width, state->pending_height, state->pending_vrefresh, 0); @@ -359,7 +359,7 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc) plane_state = to_mtk_plane_state(plane->state); if (plane_state->pending.config) { - mtk_ddp_comp_layer_config(ovl, i, plane_state); + mtk_ddp_comp_layer_config(comp, i, plane_state); plane_state->pending.config = false; } } @@ -371,12 +371,12 @@ static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); - struct mtk_ddp_comp *ovl = mtk_crtc->ddp_comp[0]; + struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; int ret; DRM_DEBUG_DRIVER("%s %d\n", __func__, crtc->base.id); - ret = mtk_smi_larb_get(ovl->larb_dev); + ret = mtk_smi_larb_get(comp->larb_dev); if (ret) { DRM_ERROR("Failed to get larb: %d\n", ret); return; @@ -384,7 +384,7 @@ static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc, ret = mtk_crtc_ddp_hw_init(mtk_crtc); if (ret) { - mtk_smi_larb_put(ovl->larb_dev); + mtk_smi_larb_put(comp->larb_dev); return; } @@ -396,7 +396,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); - struct mtk_ddp_comp *ovl = mtk_crtc->ddp_comp[0]; + struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; int i; DRM_DEBUG_DRIVER("%s %d\n", __func__, crtc->base.id); @@ -419,7 +419,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc, drm_crtc_vblank_off(crtc); mtk_crtc_ddp_hw_fini(mtk_crtc); - mtk_smi_larb_put(ovl->larb_dev); + mtk_smi_larb_put(comp->larb_dev); mtk_crtc->enabled = false; } @@ -517,7 +517,7 @@ err_cleanup_crtc: return ret; } -void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *ovl) +void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *comp) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); struct mtk_drm_private *priv = crtc->dev->dev_private; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h index 60bcc8aba8e3..091adb2087eb 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h @@ -23,7 +23,7 @@ #define MTK_MIN_BPC 3 void mtk_drm_crtc_commit(struct drm_crtc *crtc); -void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *ovl); +void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *comp); int mtk_drm_crtc_create(struct drm_device *drm_dev, const enum mtk_ddp_comp_id *path, unsigned int path_len); From 08bcbed747eb87f00d2e2590b49607af1a9f4fe9 Mon Sep 17 00:00:00 2001 From: Stu Hsieh Date: Thu, 9 Aug 2018 10:15:49 +0800 Subject: [PATCH 064/242] drm/mediatek: fix connection from RDMA2 to DSI1 This patch fix connection from RDMA2 to DSI1 Signed-off-by: Stu Hsieh Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_ddp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c index 3239f22785fd..546b3e3b300b 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c @@ -314,7 +314,7 @@ static unsigned int mtk_ddp_sel_in(enum mtk_ddp_comp_id cur, *addr = DISP_REG_CONFIG_DSIE_SEL_IN; value = DSI0_SEL_IN_RDMA2; } else if (cur == DDP_COMPONENT_RDMA2 && next == DDP_COMPONENT_DSI1) { - *addr = DISP_REG_CONFIG_DSIE_SEL_IN; + *addr = DISP_REG_CONFIG_DSIO_SEL_IN; value = DSI1_SEL_IN_RDMA2; } else if (cur == DDP_COMPONENT_RDMA2 && next == DDP_COMPONENT_DSI2) { *addr = DISP_REG_CONFIG_DSIE_SEL_IN; From 538d6e9d597584e80514698e24321645debde78f Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 24 Jul 2018 19:14:19 +0300 Subject: [PATCH 065/242] Revert "ARM: dts: imx7d: Invert legacy PCI irq mapping" This reverts commit 1c86c9dd82f859b474474a7fee0d5195da2c9c1d. That commit followed the reference manual but unfortunately the imx7d manual is incorrect. Tested with ath9k pcie card and confirmed internally. Signed-off-by: Leonard Crestez Acked-by: Lucas Stach Fixes: 1c86c9dd82f8 ("ARM: dts: imx7d: Invert legacy PCI irq mapping") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7d.dtsi | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi index 7cbc2ffa4b3a..7234e8330a57 100644 --- a/arch/arm/boot/dts/imx7d.dtsi +++ b/arch/arm/boot/dts/imx7d.dtsi @@ -126,10 +126,14 @@ interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 2 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 3 &intc GIC_SPI 124 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 4 &intc GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>; + /* + * Reference manual lists pci irqs incorrectly + * Real hardware ordering is same as imx6: D+MSI, C, B, A + */ + interrupt-map = <0 0 0 1 &intc GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &intc GIC_SPI 124 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX7D_PCIE_CTRL_ROOT_CLK>, <&clks IMX7D_PLL_ENET_MAIN_100M_CLK>, <&clks IMX7D_PCIE_PHY_ROOT_CLK>; From 90a96087b5fa835790a54c588184adfc867dbc12 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 20 Jul 2018 17:39:16 -0300 Subject: [PATCH 066/242] ARM: dts: imx28-evk: Move regulators outside simple-bus It is recommended to place regulators outside simple-bus, so move them accordingly. Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx28-evk.dts | 135 +++++++++++++++----------------- 1 file changed, 61 insertions(+), 74 deletions(-) diff --git a/arch/arm/boot/dts/imx28-evk.dts b/arch/arm/boot/dts/imx28-evk.dts index 6b0ae667640f..210aee097b36 100644 --- a/arch/arm/boot/dts/imx28-evk.dts +++ b/arch/arm/boot/dts/imx28-evk.dts @@ -13,6 +13,67 @@ reg = <0x40000000 0x08000000>; }; + + reg_3p3v: regulator-3p3v { + compatible = "regulator-fixed"; + regulator-name = "3P3V"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + reg_vddio_sd0: regulator-vddio-sd0 { + compatible = "regulator-fixed"; + regulator-name = "vddio-sd0"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + gpio = <&gpio3 28 0>; + }; + + reg_fec_3v3: regulator-fec-3v3 { + compatible = "regulator-fixed"; + regulator-name = "fec-3v3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + gpio = <&gpio2 15 0>; + }; + + reg_usb0_vbus: regulator-usb0-vbus { + compatible = "regulator-fixed"; + regulator-name = "usb0_vbus"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + gpio = <&gpio3 9 0>; + enable-active-high; + }; + + reg_usb1_vbus: regulator-usb1-vbus { + compatible = "regulator-fixed"; + regulator-name = "usb1_vbus"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + gpio = <&gpio3 8 0>; + enable-active-high; + }; + + reg_lcd_3v3: regulator-lcd-3v3 { + compatible = "regulator-fixed"; + regulator-name = "lcd-3v3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + gpio = <&gpio3 30 0>; + enable-active-high; + }; + + reg_can_3v3: regulator-can-3v3 { + compatible = "regulator-fixed"; + regulator-name = "can-3v3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + gpio = <&gpio2 13 0>; + enable-active-high; + }; + apb@80000000 { apbh@80000000 { gpmi-nand@8000c000 { @@ -269,80 +330,6 @@ }; }; - regulators { - compatible = "simple-bus"; - #address-cells = <1>; - #size-cells = <0>; - - reg_3p3v: regulator@0 { - compatible = "regulator-fixed"; - reg = <0>; - regulator-name = "3P3V"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - regulator-always-on; - }; - - reg_vddio_sd0: regulator@1 { - compatible = "regulator-fixed"; - reg = <1>; - regulator-name = "vddio-sd0"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - gpio = <&gpio3 28 0>; - }; - - reg_fec_3v3: regulator@2 { - compatible = "regulator-fixed"; - reg = <2>; - regulator-name = "fec-3v3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - gpio = <&gpio2 15 0>; - }; - - reg_usb0_vbus: regulator@3 { - compatible = "regulator-fixed"; - reg = <3>; - regulator-name = "usb0_vbus"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - gpio = <&gpio3 9 0>; - enable-active-high; - }; - - reg_usb1_vbus: regulator@4 { - compatible = "regulator-fixed"; - reg = <4>; - regulator-name = "usb1_vbus"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - gpio = <&gpio3 8 0>; - enable-active-high; - }; - - reg_lcd_3v3: regulator@5 { - compatible = "regulator-fixed"; - reg = <5>; - regulator-name = "lcd-3v3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - gpio = <&gpio3 30 0>; - enable-active-high; - }; - - reg_can_3v3: regulator@6 { - compatible = "regulator-fixed"; - reg = <6>; - regulator-name = "can-3v3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - gpio = <&gpio2 13 0>; - enable-active-high; - }; - - }; - sound { compatible = "fsl,imx28-evk-sgtl5000", "fsl,mxs-audio-sgtl5000"; From c1539840fc25d89f3fe51038144dc0233b333453 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 20 Jul 2018 17:39:17 -0300 Subject: [PATCH 067/242] ARM: dts: imx28-evk: Convert to the new display bindings imx28-evk board has a Seiko 43WVF1G parallel display. Instead of hardcoding the display timings in the device tree, use the "sii,43wvf1g" compatible instead. This aligns with the new mxsfb bindings scheme documented at: Documentation/devicetree/bindings/display/mxsfb.txt Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx28-evk.dts | 48 ++++++++++++++++----------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/arch/arm/boot/dts/imx28-evk.dts b/arch/arm/boot/dts/imx28-evk.dts index 210aee097b36..93ab5bdfe068 100644 --- a/arch/arm/boot/dts/imx28-evk.dts +++ b/arch/arm/boot/dts/imx28-evk.dts @@ -74,6 +74,26 @@ enable-active-high; }; + reg_lcd_5v: regulator-lcd-5v { + compatible = "regulator-fixed"; + regulator-name = "lcd-5v"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + }; + + panel { + compatible = "sii,43wvf1g"; + backlight = <&backlight_display>; + dvdd-supply = <®_lcd_3v3>; + avdd-supply = <®_lcd_5v>; + + port { + panel_in: endpoint { + remote-endpoint = <&display_out>; + }; + }; + }; + apb@80000000 { apbh@80000000 { gpmi-nand@8000c000 { @@ -177,31 +197,11 @@ pinctrl-names = "default"; pinctrl-0 = <&lcdif_24bit_pins_a &lcdif_pins_evk>; - lcd-supply = <®_lcd_3v3>; - display = <&display0>; status = "okay"; - display0: display0 { - bits-per-pixel = <32>; - bus-width = <24>; - - display-timings { - native-mode = <&timing0>; - timing0: timing0 { - clock-frequency = <33500000>; - hactive = <800>; - vactive = <480>; - hback-porch = <89>; - hfront-porch = <164>; - vback-porch = <23>; - vfront-porch = <10>; - hsync-len = <10>; - vsync-len = <10>; - hsync-active = <0>; - vsync-active = <0>; - de-active = <1>; - pixelclk-active = <0>; - }; + port { + display_out: endpoint { + remote-endpoint = <&panel_in>; }; }; }; @@ -350,7 +350,7 @@ }; }; - backlight { + backlight_display: backlight { compatible = "pwm-backlight"; pwms = <&pwm 2 5000000>; brightness-levels = <0 4 8 16 32 64 128 255>; From cde305e9ce28d96d7f63e4fb5298291e90a91f8f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 20 Jul 2018 17:39:18 -0300 Subject: [PATCH 068/242] ARM: dts: imx23-evk: Move regulators outside simple-bus It is recommended to place regulators outside simple-bus, so move them accordingly. Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx23-evk.dts | 42 +++++++++++++-------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/arch/arm/boot/dts/imx23-evk.dts b/arch/arm/boot/dts/imx23-evk.dts index 9fb47724b9c1..494095d40327 100644 --- a/arch/arm/boot/dts/imx23-evk.dts +++ b/arch/arm/boot/dts/imx23-evk.dts @@ -13,6 +13,23 @@ reg = <0x40000000 0x08000000>; }; + reg_vddio_sd0: regulator-vddio-sd0 { + compatible = "regulator-fixed"; + regulator-name = "vddio-sd0"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + gpio = <&gpio1 29 0>; + }; + + reg_lcd_3v3: regulator-lcd-3v3 { + compatible = "regulator-fixed"; + regulator-name = "lcd-3v3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + gpio = <&gpio1 18 0>; + enable-active-high; + }; + apb@80000000 { apbh@80000000 { gpmi-nand@8000c000 { @@ -118,31 +135,6 @@ }; }; - regulators { - compatible = "simple-bus"; - #address-cells = <1>; - #size-cells = <0>; - - reg_vddio_sd0: regulator@0 { - compatible = "regulator-fixed"; - reg = <0>; - regulator-name = "vddio-sd0"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - gpio = <&gpio1 29 0>; - }; - - reg_lcd_3v3: regulator@1 { - compatible = "regulator-fixed"; - reg = <1>; - regulator-name = "lcd-3v3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - gpio = <&gpio1 18 0>; - enable-active-high; - }; - }; - backlight { compatible = "pwm-backlight"; pwms = <&pwm 2 5000000>; From 549644b8c3c125355a361def1e42f8319ac6ad6c Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 20 Jul 2018 17:39:19 -0300 Subject: [PATCH 069/242] ARM: dts: imx23-evk: Convert to the new display bindings imx23-evk board has a Seiko 43WVF1G parallel display. Instead of hardcoding the display timings in the device tree, use the "sii,43wvf1g" compatible instead. This aligns with the new mxsfb bindings scheme documented at: Documentation/devicetree/bindings/display/mxsfb.txt Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx23-evk.dts | 48 ++++++++++++++++----------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/arch/arm/boot/dts/imx23-evk.dts b/arch/arm/boot/dts/imx23-evk.dts index 494095d40327..ad2ae25b7b4d 100644 --- a/arch/arm/boot/dts/imx23-evk.dts +++ b/arch/arm/boot/dts/imx23-evk.dts @@ -30,6 +30,26 @@ enable-active-high; }; + reg_lcd_5v: regulator-lcd-5v { + compatible = "regulator-fixed"; + regulator-name = "lcd-5v"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + }; + + panel { + compatible = "sii,43wvf1g"; + backlight = <&backlight_display>; + dvdd-supply = <®_lcd_3v3>; + avdd-supply = <®_lcd_5v>; + + port { + panel_in: endpoint { + remote-endpoint = <&display_out>; + }; + }; + }; + apb@80000000 { apbh@80000000 { gpmi-nand@8000c000 { @@ -69,31 +89,11 @@ lcdif@80030000 { pinctrl-names = "default"; pinctrl-0 = <&lcdif_24bit_pins_a>; - lcd-supply = <®_lcd_3v3>; - display = <&display0>; status = "okay"; - display0: display0 { - bits-per-pixel = <32>; - bus-width = <24>; - - display-timings { - native-mode = <&timing0>; - timing0: timing0 { - clock-frequency = <9200000>; - hactive = <480>; - vactive = <272>; - hback-porch = <15>; - hfront-porch = <8>; - vback-porch = <12>; - vfront-porch = <4>; - hsync-len = <1>; - vsync-len = <1>; - hsync-active = <0>; - vsync-active = <0>; - de-active = <1>; - pixelclk-active = <0>; - }; + port { + display_out: endpoint { + remote-endpoint = <&panel_in>; }; }; }; @@ -135,7 +135,7 @@ }; }; - backlight { + backlight_display: backlight { compatible = "pwm-backlight"; pwms = <&pwm 2 5000000>; brightness-levels = <0 4 8 16 32 64 128 255>; From 0ffbc2824282793a3ebf04d4804dea15d5b211c6 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 20 Jul 2018 17:39:20 -0300 Subject: [PATCH 070/242] ARM: mxs_defconfig: Select CONFIG_DRM_PANEL_SEIKO_43WVF1G imx23-evk and imx28-evk boards use a Seiko 43WVF1G panel. Now that the DRM mxsfb driver is the one selected by default, let's also select CONFIG_DRM_PANEL_SEIKO_43WVF1G so that these boards continue to have a working display by default. Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/configs/mxs_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig index 148226e36152..7b8212857535 100644 --- a/arch/arm/configs/mxs_defconfig +++ b/arch/arm/configs/mxs_defconfig @@ -95,6 +95,7 @@ CONFIG_MFD_MXS_LRADC=y CONFIG_REGULATOR=y CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_DRM=y +CONFIG_DRM_PANEL_SEIKO_43WVF1G=y CONFIG_DRM_MXSFB=y CONFIG_FB_MODE_HELPERS=y CONFIG_BACKLIGHT_LCD_SUPPORT=y From b9543a2e39dc909e6b7ec901b6c7208d01d5c0dd Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 20 Jul 2018 17:39:21 -0300 Subject: [PATCH 071/242] ARM: imx_v6_v7_defconfig: Select CONFIG_DRM_PANEL_SEIKO_43WVF1G imx6sl-evk, imx6sll-evk and imx6sx-sdb boards use a Seiko 43WVF1G panel. Now that the DRM mxsfb driver is the one selected by default, let's also select CONFIG_DRM_PANEL_SEIKO_43WVF1G so that these boards continue to have a working display by default. Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/configs/imx_v6_v7_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index e2c127608bcc..7eca43ff69bb 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -257,6 +257,7 @@ CONFIG_IMX_IPUV3_CORE=y CONFIG_DRM=y CONFIG_DRM_PANEL_LVDS=y CONFIG_DRM_PANEL_SIMPLE=y +CONFIG_DRM_PANEL_SEIKO_43WVF1G=y CONFIG_DRM_DW_HDMI_AHB_AUDIO=m CONFIG_DRM_DW_HDMI_CEC=y CONFIG_DRM_IMX=y From 166cd4421b0dabd2e438f5384f4ecf930dc8ab08 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 23 Aug 2018 23:43:45 +0200 Subject: [PATCH 072/242] mtd: rawnand: docg4: Remove wrong __init annotations If gcc (e.g. 4.1.2) decides not to inline init_mtd_structs() and read_id_reg(), this will cause section mismatches, and crashes: WARNING: drivers/mtd/nand/raw/docg4.o(.text+0xc10): Section mismatch in reference from the function docg4_attach_chip() to the function .init.text:init_mtd_structs() The function docg4_attach_chip() references the function __init init_mtd_structs(). This is often because docg4_attach_chip lacks a __init annotation or the annotation of init_mtd_structs is wrong. WARNING: drivers/mtd/nand/raw/docg4.o(.text+0xc3e): Section mismatch in reference from the function docg4_attach_chip() to the function .init.text:read_id_reg() The function docg4_attach_chip() references the function __init read_id_reg(). This is often because docg4_attach_chip lacks a __init annotation or the annotation of read_id_reg is wrong. Fix this by dropping the now incorrect __init annotations from init_mtd_structs() and read_id_reg(). Fixes: 66a38478dcc5b5a3 ("mtd: rawnand: docg4: convert driver to nand_scan()") Signed-off-by: Geert Uytterhoeven Signed-off-by: Boris Brezillon --- drivers/mtd/nand/raw/docg4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c index a3f04315c05c..427fcbc1b71c 100644 --- a/drivers/mtd/nand/raw/docg4.c +++ b/drivers/mtd/nand/raw/docg4.c @@ -1218,7 +1218,7 @@ static int docg4_resume(struct platform_device *pdev) return 0; } -static void __init init_mtd_structs(struct mtd_info *mtd) +static void init_mtd_structs(struct mtd_info *mtd) { /* initialize mtd and nand data structures */ @@ -1290,7 +1290,7 @@ static void __init init_mtd_structs(struct mtd_info *mtd) } -static int __init read_id_reg(struct mtd_info *mtd) +static int read_id_reg(struct mtd_info *mtd) { struct nand_chip *nand = mtd_to_nand(mtd); struct docg4_priv *doc = nand_get_controller_data(nand); From 1ab534e85c93945f7862378d8c8adcf408205b19 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 24 Aug 2018 10:03:51 -0700 Subject: [PATCH 073/242] x86/spectre: Add missing family 6 check to microcode check The check for Spectre microcodes does not check for family 6, only the model numbers. Add a family 6 check to avoid ambiguity with other families. Fixes: a5b296636453 ("x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early Spectre v2 microcodes") Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Cc: x86@kernel.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180824170351.34874-2-andi@firstfloor.org --- arch/x86/kernel/cpu/intel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 401e8c133108..fc3c07fe7df5 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -150,6 +150,9 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_HYPERVISOR)) return false; + if (c->x86 != 6) + return false; + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { if (c->x86_model == spectre_bad_microcodes[i].model && c->x86_stepping == spectre_bad_microcodes[i].stepping) From cc51e5428ea54f575d49cfcede1d4cb3a72b4ec4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 24 Aug 2018 10:03:50 -0700 Subject: [PATCH 074/242] x86/speculation/l1tf: Increase l1tf memory limit for Nehalem+ On Nehalem and newer core CPUs the CPU cache internally uses 44 bits physical address space. The L1TF workaround is limited by this internal cache address width, and needs to have one bit free there for the mitigation to work. Older client systems report only 36bit physical address space so the range check decides that L1TF is not mitigated for a 36bit phys/32GB system with some memory holes. But since these actually have the larger internal cache width this warning is bogus because it would only really be needed if the system had more than 43bits of memory. Add a new internal x86_cache_bits field. Normally it is the same as the physical bits field reported by CPUID, but for Nehalem and newerforce it to be at least 44bits. Change the L1TF memory size warning to use the new cache_bits field to avoid bogus warnings and remove the bogus comment about memory size. Fixes: 17dbca119312 ("x86/speculation/l1tf: Add sysfs reporting for l1tf") Reported-by: George Anchev Reported-by: Christopher Snowhill Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Cc: x86@kernel.org Cc: linux-kernel@vger.kernel.org Cc: Michael Hocko Cc: vbabka@suse.cz Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180824170351.34874-1-andi@firstfloor.org --- arch/x86/include/asm/processor.h | 4 ++- arch/x86/kernel/cpu/bugs.c | 46 ++++++++++++++++++++++++++++---- arch/x86/kernel/cpu/common.c | 1 + 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c24297268ebc..d53c54b842da 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -132,6 +132,8 @@ struct cpuinfo_x86 { /* Index into per_cpu list: */ u16 cpu_index; u32 microcode; + /* Address space bits used by the cache internally */ + u8 x86_cache_bits; unsigned initialized : 1; } __randomize_layout; @@ -183,7 +185,7 @@ extern void cpu_detect(struct cpuinfo_x86 *c); static inline unsigned long long l1tf_pfn_limit(void) { - return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT); + return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT); } extern void early_cpu_init(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4c2313d0b9ca..40bdaea97fe7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -668,6 +668,45 @@ EXPORT_SYMBOL_GPL(l1tf_mitigation); enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation); +/* + * These CPUs all support 44bits physical address space internally in the + * cache but CPUID can report a smaller number of physical address bits. + * + * The L1TF mitigation uses the top most address bit for the inversion of + * non present PTEs. When the installed memory reaches into the top most + * address bit due to memory holes, which has been observed on machines + * which report 36bits physical address bits and have 32G RAM installed, + * then the mitigation range check in l1tf_select_mitigation() triggers. + * This is a false positive because the mitigation is still possible due to + * the fact that the cache uses 44bit internally. Use the cache bits + * instead of the reported physical bits and adjust them on the affected + * machines to 44bit if the reported bits are less than 44. + */ +static void override_cache_bits(struct cpuinfo_x86 *c) +{ + if (c->x86 != 6) + return; + + switch (c->x86_model) { + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_IVYBRIDGE: + case INTEL_FAM6_HASWELL_CORE: + case INTEL_FAM6_HASWELL_ULT: + case INTEL_FAM6_HASWELL_GT3E: + case INTEL_FAM6_BROADWELL_CORE: + case INTEL_FAM6_BROADWELL_GT3E: + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: + if (c->x86_cache_bits < 44) + c->x86_cache_bits = 44; + break; + } +} + static void __init l1tf_select_mitigation(void) { u64 half_pa; @@ -675,6 +714,8 @@ static void __init l1tf_select_mitigation(void) if (!boot_cpu_has_bug(X86_BUG_L1TF)) return; + override_cache_bits(&boot_cpu_data); + switch (l1tf_mitigation) { case L1TF_MITIGATION_OFF: case L1TF_MITIGATION_FLUSH_NOWARN: @@ -694,11 +735,6 @@ static void __init l1tf_select_mitigation(void) return; #endif - /* - * This is extremely unlikely to happen because almost all - * systems have far more MAX_PA/2 than RAM can be fit into - * DIMM slots. - */ half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT; if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) { pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n"); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 84dee5ab745a..44c4ef3d989b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -919,6 +919,7 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c) else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) c->x86_phys_bits = 36; #endif + c->x86_cache_bits = c->x86_phys_bits; } static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) From e3a5dc08715abba646324fd8456282bd77798e9c Mon Sep 17 00:00:00 2001 From: Nikolas Nyby Date: Sat, 25 Aug 2018 19:10:54 -0400 Subject: [PATCH 075/242] x86/Kconfig: Fix trivial typo Fix a typo in the Kconfig help text: adverticed -> advertised. Signed-off-by: Nikolas Nyby Signed-off-by: Thomas Gleixner Cc: trivial@kernel.org Cc: tglx@linutronix.de Cc: x86@kernel.org Link: https://lkml.kernel.org/r/20180825231054.23813-1-nikolas@gnu.org --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c5ff296bc5d1..1a0be022f91d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2843,7 +2843,7 @@ config X86_SYSFB This option, if enabled, marks VGA/VBE/EFI framebuffers as generic framebuffers so the new generic system-framebuffer drivers can be used on x86. If the framebuffer is not compatible with the generic - modes, it is adverticed as fallback platform framebuffer so legacy + modes, it is advertised as fallback platform framebuffer so legacy drivers like efifb, vesafb and uvesafb can pick it up. If this option is not selected, all system framebuffers are always marked as fallback platform framebuffers as usual. From f7c90c2aa4004808dff777ba6ae2c7294dd06851 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 21 Aug 2018 17:37:54 +0200 Subject: [PATCH 076/242] x86/xen: don't write ptes directly in 32-bit PV guests In some cases 32-bit PAE PV guests still write PTEs directly instead of using hypercalls. This is especially bad when clearing a PTE as this is done via 32-bit writes which will produce intermediate L1TF attackable PTEs. Change the code to use hypercalls instead. Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Boris Ostrovsky --- arch/x86/xen/mmu_pv.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 9e7012858420..9396b4d17064 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -434,14 +434,13 @@ static void xen_set_pud(pud_t *ptr, pud_t val) static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) { trace_xen_mmu_set_pte_atomic(ptep, pte); - set_64bit((u64 *)ptep, native_pte_val(pte)); + __xen_set_pte(ptep, pte); } static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { trace_xen_mmu_pte_clear(mm, addr, ptep); - if (!xen_batched_set_pte(ptep, native_make_pte(0))) - native_pte_clear(mm, addr, ptep); + __xen_set_pte(ptep, native_make_pte(0)); } static void xen_pmd_clear(pmd_t *pmdp) @@ -1569,7 +1568,7 @@ static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & pte_val_ma(pte)); #endif - native_set_pte(ptep, pte); + __xen_set_pte(ptep, pte); } /* Early in boot, while setting up the initial pagetable, assume From 973e5405f2f67ddbb2bf07b3ffc71908a37fea8e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 13 Aug 2018 16:01:10 +0200 Subject: [PATCH 077/242] xen/blkback: don't keep persistent grants too long MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Persistent grants are allocated until a threshold per ring is being reached. Those grants won't be freed until the ring is being destroyed meaning there will be resources kept busy which might no longer be used. Instead of freeing only persistent grants until the threshold is reached add a timestamp and remove all persistent grants not having been in use for a minute. Signed-off-by: Juergen Gross Reviewed-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk --- .../ABI/testing/sysfs-driver-xen-blkback | 10 +++ drivers/block/xen-blkback/blkback.c | 90 ++++++++++--------- drivers/block/xen-blkback/common.h | 8 +- 3 files changed, 61 insertions(+), 47 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkback b/Documentation/ABI/testing/sysfs-driver-xen-blkback index 8bb43b66eb55..4e7babb3ba1f 100644 --- a/Documentation/ABI/testing/sysfs-driver-xen-blkback +++ b/Documentation/ABI/testing/sysfs-driver-xen-blkback @@ -15,3 +15,13 @@ Description: blkback. If the frontend tries to use more than max_persistent_grants, the LRU kicks in and starts removing 5% of max_persistent_grants every 100ms. + +What: /sys/module/xen_blkback/parameters/persistent_grant_unused_seconds +Date: August 2018 +KernelVersion: 4.19 +Contact: Roger Pau Monné +Description: + How long a persistent grant is allowed to remain + allocated without being in use. The time is in + seconds, 0 means indefinitely long. + The default is 60 seconds. diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index b55b245e8052..9eae7b243f68 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -83,6 +83,18 @@ module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644); MODULE_PARM_DESC(max_persistent_grants, "Maximum number of grants to map persistently"); +/* + * How long a persistent grant is allowed to remain allocated without being in + * use. The time is in seconds, 0 means indefinitely long. + */ + +static unsigned int xen_blkif_pgrant_timeout = 60; +module_param_named(persistent_grant_unused_seconds, xen_blkif_pgrant_timeout, + uint, 0644); +MODULE_PARM_DESC(persistent_grant_unused_seconds, + "Time in seconds an unused persistent grant is allowed to " + "remain allocated. Default is 60, 0 means unlimited."); + /* * Maximum number of rings/queues blkback supports, allow as many queues as there * are CPUs if user has not specified a value. @@ -123,6 +135,13 @@ module_param(log_stats, int, 0644); /* Number of free pages to remove on each call to gnttab_free_pages */ #define NUM_BATCH_FREE_PAGES 10 +static inline bool persistent_gnt_timeout(struct persistent_gnt *persistent_gnt) +{ + return xen_blkif_pgrant_timeout && + (jiffies - persistent_gnt->last_used >= + HZ * xen_blkif_pgrant_timeout); +} + static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page) { unsigned long flags; @@ -278,7 +297,7 @@ static void put_persistent_gnt(struct xen_blkif_ring *ring, { if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) pr_alert_ratelimited("freeing a grant already unused\n"); - set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); + persistent_gnt->last_used = jiffies; clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); atomic_dec(&ring->persistent_gnt_in_use); } @@ -371,26 +390,26 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring) struct persistent_gnt *persistent_gnt; struct rb_node *n; unsigned int num_clean, total; - bool scan_used = false, clean_used = false; + bool scan_used = false; struct rb_root *root; - if (ring->persistent_gnt_c < xen_blkif_max_pgrants || - (ring->persistent_gnt_c == xen_blkif_max_pgrants && - !ring->blkif->vbd.overflow_max_grants)) { - goto out; - } - if (work_busy(&ring->persistent_purge_work)) { pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n"); goto out; } - num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN; - num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean; - num_clean = min(ring->persistent_gnt_c, num_clean); - if ((num_clean == 0) || - (num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use)))) - goto out; + if (ring->persistent_gnt_c < xen_blkif_max_pgrants || + (ring->persistent_gnt_c == xen_blkif_max_pgrants && + !ring->blkif->vbd.overflow_max_grants)) { + num_clean = 0; + } else { + num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN; + num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + + num_clean; + num_clean = min(ring->persistent_gnt_c, num_clean); + pr_debug("Going to purge at least %u persistent grants\n", + num_clean); + } /* * At this point, we can assure that there will be no calls @@ -401,9 +420,7 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring) * number of grants. */ - total = num_clean; - - pr_debug("Going to purge %u persistent grants\n", num_clean); + total = 0; BUG_ON(!list_empty(&ring->persistent_purge_list)); root = &ring->persistent_gnts; @@ -412,47 +429,38 @@ purge_list: BUG_ON(persistent_gnt->handle == BLKBACK_INVALID_HANDLE); - if (clean_used) { - clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); - continue; - } - if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) continue; - if (!scan_used && - (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags))) + if (!scan_used && !persistent_gnt_timeout(persistent_gnt)) + continue; + if (scan_used && total >= num_clean) continue; rb_erase(&persistent_gnt->node, root); list_add(&persistent_gnt->remove_node, &ring->persistent_purge_list); - if (--num_clean == 0) - goto finished; + total++; } /* - * If we get here it means we also need to start cleaning + * Check whether we also need to start cleaning * grants that were used since last purge in order to cope * with the requested num */ - if (!scan_used && !clean_used) { - pr_debug("Still missing %u purged frames\n", num_clean); + if (!scan_used && total < num_clean) { + pr_debug("Still missing %u purged frames\n", num_clean - total); scan_used = true; goto purge_list; } -finished: - if (!clean_used) { - pr_debug("Finished scanning for grants to clean, removing used flag\n"); - clean_used = true; - goto purge_list; + + if (total) { + ring->persistent_gnt_c -= total; + ring->blkif->vbd.overflow_max_grants = 0; + + /* We can defer this work */ + schedule_work(&ring->persistent_purge_work); + pr_debug("Purged %u/%u\n", num_clean, total); } - ring->persistent_gnt_c -= (total - num_clean); - ring->blkif->vbd.overflow_max_grants = 0; - - /* We can defer this work */ - schedule_work(&ring->persistent_purge_work); - pr_debug("Purged %u/%u\n", (total - num_clean), total); - out: return; } diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index ecb35fe8ca8d..7bff72db3b7e 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -234,14 +234,9 @@ struct xen_vbd { struct backend_info; /* Number of available flags */ -#define PERSISTENT_GNT_FLAGS_SIZE 2 +#define PERSISTENT_GNT_FLAGS_SIZE 1 /* This persistent grant is currently in use */ #define PERSISTENT_GNT_ACTIVE 0 -/* - * This persistent grant has been used, this flag is set when we remove the - * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently. - */ -#define PERSISTENT_GNT_WAS_ACTIVE 1 /* Number of requests that we can fit in a ring */ #define XEN_BLKIF_REQS_PER_PAGE 32 @@ -250,6 +245,7 @@ struct persistent_gnt { struct page *page; grant_ref_t gnt; grant_handle_t handle; + unsigned long last_used; DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE); struct rb_node node; struct list_head remove_node; From a46b53672b2c2e3770b38a4abf90d16364d2584b Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 13 Aug 2018 16:01:11 +0200 Subject: [PATCH 078/242] xen/blkfront: cleanup stale persistent grants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a periodic cleanup function to remove old persistent grants which are no longer in use on the backend side. This avoids starvation in case there are lots of persistent grants for a device which no longer is involved in I/O business. Signed-off-by: Juergen Gross Reviewed-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 94 ++++++++++++++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 4 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 8986adab9bf5..a2a395f85a41 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -121,6 +122,8 @@ static inline struct blkif_req *blkif_req(struct request *rq) static DEFINE_MUTEX(blkfront_mutex); static const struct block_device_operations xlvbd_block_fops; +static struct delayed_work blkfront_work; +static LIST_HEAD(info_list); /* * Maximum number of segments in indirect requests, the actual value used by @@ -216,6 +219,7 @@ struct blkfront_info /* Save uncomplete reqs and bios for migration. */ struct list_head requests; struct bio_list bio_list; + struct list_head info_list; }; static unsigned int nr_minors; @@ -1759,6 +1763,12 @@ abort_transaction: return err; } +static void free_info(struct blkfront_info *info) +{ + list_del(&info->info_list); + kfree(info); +} + /* Common code used when first setting up, and when resuming. */ static int talk_to_blkback(struct xenbus_device *dev, struct blkfront_info *info) @@ -1880,7 +1890,10 @@ again: destroy_blkring: blkif_free(info, 0); - kfree(info); + mutex_lock(&blkfront_mutex); + free_info(info); + mutex_unlock(&blkfront_mutex); + dev_set_drvdata(&dev->dev, NULL); return err; @@ -1991,6 +2004,10 @@ static int blkfront_probe(struct xenbus_device *dev, info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); dev_set_drvdata(&dev->dev, info); + mutex_lock(&blkfront_mutex); + list_add(&info->info_list, &info_list); + mutex_unlock(&blkfront_mutex); + return 0; } @@ -2301,6 +2318,12 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) if (indirect_segments <= BLKIF_MAX_SEGMENTS_PER_REQUEST) indirect_segments = 0; info->max_indirect_segments = indirect_segments; + + if (info->feature_persistent) { + mutex_lock(&blkfront_mutex); + schedule_delayed_work(&blkfront_work, HZ * 10); + mutex_unlock(&blkfront_mutex); + } } /* @@ -2482,7 +2505,9 @@ static int blkfront_remove(struct xenbus_device *xbdev) mutex_unlock(&info->mutex); if (!bdev) { - kfree(info); + mutex_lock(&blkfront_mutex); + free_info(info); + mutex_unlock(&blkfront_mutex); return 0; } @@ -2502,7 +2527,9 @@ static int blkfront_remove(struct xenbus_device *xbdev) if (info && !bdev->bd_openers) { xlvbd_release_gendisk(info); disk->private_data = NULL; - kfree(info); + mutex_lock(&blkfront_mutex); + free_info(info); + mutex_unlock(&blkfront_mutex); } mutex_unlock(&bdev->bd_mutex); @@ -2585,7 +2612,7 @@ static void blkif_release(struct gendisk *disk, fmode_t mode) dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); xlvbd_release_gendisk(info); disk->private_data = NULL; - kfree(info); + free_info(info); } out: @@ -2618,6 +2645,61 @@ static struct xenbus_driver blkfront_driver = { .is_ready = blkfront_is_ready, }; +static void purge_persistent_grants(struct blkfront_info *info) +{ + unsigned int i; + unsigned long flags; + + for (i = 0; i < info->nr_rings; i++) { + struct blkfront_ring_info *rinfo = &info->rinfo[i]; + struct grant *gnt_list_entry, *tmp; + + spin_lock_irqsave(&rinfo->ring_lock, flags); + + if (rinfo->persistent_gnts_c == 0) { + spin_unlock_irqrestore(&rinfo->ring_lock, flags); + continue; + } + + list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants, + node) { + if (gnt_list_entry->gref == GRANT_INVALID_REF || + gnttab_query_foreign_access(gnt_list_entry->gref)) + continue; + + list_del(&gnt_list_entry->node); + gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL); + rinfo->persistent_gnts_c--; + __free_page(gnt_list_entry->page); + kfree(gnt_list_entry); + } + + spin_unlock_irqrestore(&rinfo->ring_lock, flags); + } +} + +static void blkfront_delay_work(struct work_struct *work) +{ + struct blkfront_info *info; + bool need_schedule_work = false; + + mutex_lock(&blkfront_mutex); + + list_for_each_entry(info, &info_list, info_list) { + if (info->feature_persistent) { + need_schedule_work = true; + mutex_lock(&info->mutex); + purge_persistent_grants(info); + mutex_unlock(&info->mutex); + } + } + + if (need_schedule_work) + schedule_delayed_work(&blkfront_work, HZ * 10); + + mutex_unlock(&blkfront_mutex); +} + static int __init xlblk_init(void) { int ret; @@ -2650,6 +2732,8 @@ static int __init xlblk_init(void) return -ENODEV; } + INIT_DELAYED_WORK(&blkfront_work, blkfront_delay_work); + ret = xenbus_register_frontend(&blkfront_driver); if (ret) { unregister_blkdev(XENVBD_MAJOR, DEV_NAME); @@ -2663,6 +2747,8 @@ module_init(xlblk_init); static void __exit xlblk_exit(void) { + cancel_delayed_work_sync(&blkfront_work); + xenbus_unregister_driver(&blkfront_driver); unregister_blkdev(XENVBD_MAJOR, DEV_NAME); kfree(minors); From 4bcddbae019df2614ea36976ba3d36313f93c6d3 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 13 Aug 2018 16:01:12 +0200 Subject: [PATCH 079/242] xen/blkfront: reorder tests in xlblk_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case we don't want pv block devices we should not test parameters for sanity and eventually print out error messages. So test precluding conditions before checking parameters. Signed-off-by: Juergen Gross Reviewed-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index a2a395f85a41..a71d817e900d 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2708,6 +2708,15 @@ static int __init xlblk_init(void) if (!xen_domain()) return -ENODEV; + if (!xen_has_pv_disk_devices()) + return -ENODEV; + + if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { + pr_warn("xen_blk: can't get major %d with name %s\n", + XENVBD_MAJOR, DEV_NAME); + return -ENODEV; + } + if (xen_blkif_max_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST) xen_blkif_max_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; @@ -2723,15 +2732,6 @@ static int __init xlblk_init(void) xen_blkif_max_queues = nr_cpus; } - if (!xen_has_pv_disk_devices()) - return -ENODEV; - - if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { - printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", - XENVBD_MAJOR, DEV_NAME); - return -ENODEV; - } - INIT_DELAYED_WORK(&blkfront_work, blkfront_delay_work); ret = xenbus_register_frontend(&blkfront_driver); From d77ff24e7fa2258877fa0b87efa06b9a58a37aab Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 13 Aug 2018 16:01:13 +0200 Subject: [PATCH 080/242] xen/blkback: move persistent grants flags to bool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The struct persistent_gnt flags member is meant to be a bitfield of different flags. There is only PERSISTENT_GNT_ACTIVE flag left, so convert it to a bool named "active". Signed-off-by: Juergen Gross Reviewed-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 13 ++++++------- drivers/block/xen-blkback/common.h | 7 +------ 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 9eae7b243f68..fd1e19f1a49f 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -255,8 +255,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring, } } - bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE); - set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); + persistent_gnt->active = true; /* Add new node and rebalance tree. */ rb_link_node(&(persistent_gnt->node), parent, new); rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts); @@ -280,11 +279,11 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring, else if (gref > data->gnt) node = node->rb_right; else { - if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) { + if (data->active) { pr_alert_ratelimited("requesting a grant already in use\n"); return NULL; } - set_bit(PERSISTENT_GNT_ACTIVE, data->flags); + data->active = true; atomic_inc(&ring->persistent_gnt_in_use); return data; } @@ -295,10 +294,10 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring, static void put_persistent_gnt(struct xen_blkif_ring *ring, struct persistent_gnt *persistent_gnt) { - if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) + if (!persistent_gnt->active) pr_alert_ratelimited("freeing a grant already unused\n"); persistent_gnt->last_used = jiffies; - clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); + persistent_gnt->active = false; atomic_dec(&ring->persistent_gnt_in_use); } @@ -429,7 +428,7 @@ purge_list: BUG_ON(persistent_gnt->handle == BLKBACK_INVALID_HANDLE); - if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) + if (persistent_gnt->active) continue; if (!scan_used && !persistent_gnt_timeout(persistent_gnt)) continue; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 7bff72db3b7e..2339b8d39c5e 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -233,11 +233,6 @@ struct xen_vbd { struct backend_info; -/* Number of available flags */ -#define PERSISTENT_GNT_FLAGS_SIZE 1 -/* This persistent grant is currently in use */ -#define PERSISTENT_GNT_ACTIVE 0 - /* Number of requests that we can fit in a ring */ #define XEN_BLKIF_REQS_PER_PAGE 32 @@ -246,7 +241,7 @@ struct persistent_gnt { grant_ref_t gnt; grant_handle_t handle; unsigned long last_used; - DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE); + bool active; struct rb_node node; struct list_head remove_node; }; From 6f2f39ad1a54978394851c05e327419ebeb7227e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 13 Aug 2018 16:01:14 +0200 Subject: [PATCH 081/242] xen/blkback: remove unused pers_gnts_lock from struct xen_blkif_ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pers_gnts_lock isn't being used anywhere. Remove it. Signed-off-by: Juergen Gross Reviewed-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 2339b8d39c5e..1d3002d773f7 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -269,7 +269,6 @@ struct xen_blkif_ring { wait_queue_head_t pending_free_wq; /* Tree to store persistent grants. */ - spinlock_t pers_gnts_lock; struct rb_root persistent_gnts; unsigned int persistent_gnt_c; atomic_t persistent_gnt_in_use; From 061a5427530633de93ace4ef001b99961984af62 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 26 Aug 2018 10:09:06 -0600 Subject: [PATCH 082/242] blk-wbt: abstract out end IO completion handler Prep patch for calling the handler from a different context, no functional changes in this patch. Tested-by: Agarwal, Anchal Signed-off-by: Jens Axboe --- block/blk-wbt.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 84507d3e9a98..4575b4650370 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -123,16 +123,11 @@ static void rwb_wake_all(struct rq_wb *rwb) } } -static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct) +static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw, + enum wbt_flags wb_acct) { - struct rq_wb *rwb = RQWB(rqos); - struct rq_wait *rqw; int inflight, limit; - if (!(wb_acct & WBT_TRACKED)) - return; - - rqw = get_rq_wait(rwb, wb_acct); inflight = atomic_dec_return(&rqw->inflight); /* @@ -170,6 +165,18 @@ static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct) } } +static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct) +{ + struct rq_wb *rwb = RQWB(rqos); + struct rq_wait *rqw; + + if (!(wb_acct & WBT_TRACKED)) + return; + + rqw = get_rq_wait(rwb, wb_acct); + wbt_rqw_done(rwb, rqw, wb_acct); +} + /* * Called on completion of a request. Note that it's also called when * a request is merged, when the request gets freed. From 38cfb5a45ee013bfab5d1ae4c4738815e744b440 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 26 Aug 2018 10:10:05 -0600 Subject: [PATCH 083/242] blk-wbt: improve waking of tasks We have two potential issues: 1) After commit 2887e41b910b, we only wake one process at the time when we finish an IO. We really want to wake up as many tasks as can queue IO. Before this commit, we woke up everyone, which could cause a thundering herd issue. 2) A task can potentially consume two wakeups, causing us to (in practice) miss a wakeup. Fix both by providing our own wakeup function, which stops __wake_up_common() from waking up more tasks if we fail to get a queueing token. With the strict ordering we have on the wait list, this wakes the right tasks and the right amount of tasks. Based on a patch from Jianchao Wang . Tested-by: Agarwal, Anchal Signed-off-by: Jens Axboe --- block/blk-wbt.c | 65 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 4575b4650370..bfb0d21d19ce 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -161,7 +161,7 @@ static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw, int diff = limit - inflight; if (!inflight || diff >= rwb->wb_background / 2) - wake_up(&rqw->wait); + wake_up_all(&rqw->wait); } } @@ -488,6 +488,34 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) return limit; } +struct wbt_wait_data { + struct wait_queue_entry wq; + struct task_struct *task; + struct rq_wb *rwb; + struct rq_wait *rqw; + unsigned long rw; + bool got_token; +}; + +static int wbt_wake_function(struct wait_queue_entry *curr, unsigned int mode, + int wake_flags, void *key) +{ + struct wbt_wait_data *data = container_of(curr, struct wbt_wait_data, + wq); + + /* + * If we fail to get a budget, return -1 to interrupt the wake up + * loop in __wake_up_common. + */ + if (!rq_wait_inc_below(data->rqw, get_limit(data->rwb, data->rw))) + return -1; + + data->got_token = true; + list_del_init(&curr->entry); + wake_up_process(data->task); + return 1; +} + /* * Block if we will exceed our limit, or if we are currently waiting for * the timer to kick off queuing again. @@ -498,31 +526,52 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct, __acquires(lock) { struct rq_wait *rqw = get_rq_wait(rwb, wb_acct); - DECLARE_WAITQUEUE(wait, current); + struct wbt_wait_data data = { + .wq = { + .func = wbt_wake_function, + .entry = LIST_HEAD_INIT(data.wq.entry), + }, + .task = current, + .rwb = rwb, + .rqw = rqw, + .rw = rw, + }; bool has_sleeper; has_sleeper = wq_has_sleeper(&rqw->wait); if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw))) return; - add_wait_queue_exclusive(&rqw->wait, &wait); + prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); do { - set_current_state(TASK_UNINTERRUPTIBLE); - - if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw))) + if (data.got_token) break; + if (!has_sleeper && + rq_wait_inc_below(rqw, get_limit(rwb, rw))) { + finish_wait(&rqw->wait, &data.wq); + + /* + * We raced with wbt_wake_function() getting a token, + * which means we now have two. Put our local token + * and wake anyone else potentially waiting for one. + */ + if (data.got_token) + wbt_rqw_done(rwb, rqw, wb_acct); + break; + } + if (lock) { spin_unlock_irq(lock); io_schedule(); spin_lock_irq(lock); } else io_schedule(); + has_sleeper = false; } while (1); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&rqw->wait, &wait); + finish_wait(&rqw->wait, &data.wq); } static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio) From b2d7a075a1ccef2fb321d595802190c8e9b39004 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 21 Aug 2018 17:37:55 +0200 Subject: [PATCH 084/242] x86/pae: use 64 bit atomic xchg function in native_ptep_get_and_clear Using only 32-bit writes for the pte will result in an intermediate L1TF vulnerable PTE. When running as a Xen PV guest this will at once switch the guest to shadow mode resulting in a loss of performance. Use arch_atomic64_xchg() instead which will perform the requested operation atomically with all 64 bits. Some performance considerations according to: https://software.intel.com/sites/default/files/managed/ad/dc/Intel-Xeon-Scalable-Processor-throughput-latency.pdf The main number should be the latency, as there is no tight loop around native_ptep_get_and_clear(). "lock cmpxchg8b" has a latency of 20 cycles, while "lock xchg" (with a memory operand) isn't mentioned in that document. "lock xadd" (with xadd having 3 cycles less latency than xchg) has a latency of 11, so we can assume a latency of 14 for "lock xchg". Signed-off-by: Juergen Gross Reviewed-by: Thomas Gleixner Reviewed-by: Jan Beulich Tested-by: Jason Andryuk Signed-off-by: Boris Ostrovsky --- arch/x86/include/asm/pgtable-3level.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index a564084c6141..f8b1ad2c3828 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_PGTABLE_3LEVEL_H #define _ASM_X86_PGTABLE_3LEVEL_H +#include + /* * Intel Physical Address Extension (PAE) Mode - three-level page * tables on PPro+ CPUs. @@ -150,10 +152,7 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { pte_t res; - /* xchg acts as a barrier before the setting of the high bits */ - res.pte_low = xchg(&ptep->pte_low, 0); - res.pte_high = ptep->pte_high; - ptep->pte_high = 0; + res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0); return res; } From 336d139f8718b1336c9d22f0e462611ae1229850 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 27 Aug 2018 16:01:41 +0900 Subject: [PATCH 085/242] mtd: rawnand: denali: do not pass zero maxchips to nand_scan() Commit 49aa76b16676 ("mtd: rawnand: do not execute nand_scan_ident() if maxchips is zero") gave a new meaning for calling nand_scan_ident() with maxchips=0. It is a special usage for some drivers such as docg4, but actually the Denali driver may pass maxchips=0 to nand_scan() when the driver is enabled but no NAND chip is found on the board for some reasons. If nand_scan_with_ids() is called with maxchips=0, nand_scan_ident() is skipped, then nand_set_defaults() is skipped as well. Thus, the driver must set chip->controller beforehand. Otherwise, nand_attach() causes NULL pointer dereference. In fact, the Denali controller knows the number of connected chips before calling nand_scan_ident(); if DEVICE_RESET fails, there is no chip in that chip select. Then, denali_reset_banks() sets the maxchips to the number of detected chips. If no chip is found, maxchips is zero. In this case, there is no point for calling nand_scan() because we know it will fail for sure. Let's make the probe function fail immediately. Fixes: 49aa76b16676 ("mtd: rawnand: do not execute nand_scan_ident() if maxchips is zero") Signed-off-by: Masahiro Yamada Acked-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/raw/denali.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c index ca18612c4201..67b2065e7a19 100644 --- a/drivers/mtd/nand/raw/denali.c +++ b/drivers/mtd/nand/raw/denali.c @@ -1338,6 +1338,11 @@ int denali_init(struct denali_nand_info *denali) denali_enable_irq(denali); denali_reset_banks(denali); + if (!denali->max_banks) { + /* Error out earlier if no chip is found for some reasons. */ + ret = -ENODEV; + goto disable_irq; + } denali->active_bank = DENALI_INVALID_BANK; From b0a84beb2e35536839ea289182684528f379b860 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Aug 2018 13:32:12 -0600 Subject: [PATCH 086/242] blk-wbt: remove dead code We already note and mark discard and swap IO from bio_to_wbt_flags(). Signed-off-by: Jens Axboe --- block/blk-wbt.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index bfb0d21d19ce..8e20a0677dcf 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -636,11 +636,6 @@ static void wbt_wait(struct rq_qos *rqos, struct bio *bio, spinlock_t *lock) return; } - if (current_is_kswapd()) - flags |= WBT_KSWAPD; - if (bio_op(bio) == REQ_OP_DISCARD) - flags |= WBT_DISCARD; - __wbt_wait(rwb, flags, bio->bi_opf, lock); if (!blk_stat_is_active(rwb->cb)) From fca5d959972c18839f0306f19df2f121623447dd Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 21 Aug 2018 17:14:32 -0400 Subject: [PATCH 087/242] drm/amdgpu: Adjust the VM size based on system memory size v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the VM size based on system memory size between the ASIC-specific limits given by min_vm_size and max_bits. GFXv9 GPUs will keep their default VM size of 256TB (48 bit). Only older GPUs will adjust VM size depending on system memory size. This makes more VM space available for ROCm applications on GFXv8 GPUs that want to map all available VRAM and system memory in their SVM address space. v2: * Clarify comment * Round up memory size before >> 30 * Round up automatic vm_size to power of two Signed-off-by: Felix Kuehling Acked-by: Junwei Zhang Reviewed-by: Huang Rui Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 32 ++++++++++++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e40ca8676418..d174d50e3bd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2483,28 +2483,52 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size * * @adev: amdgpu_device pointer - * @vm_size: the default vm size if it's set auto + * @min_vm_size: the minimum vm size in GB if it's set auto * @fragment_size_default: Default PTE fragment size * @max_level: max VMPT level * @max_bits: max address space size in bits * */ -void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, +void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, uint32_t fragment_size_default, unsigned max_level, unsigned max_bits) { + unsigned int max_size = 1 << (max_bits - 30); + unsigned int vm_size; uint64_t tmp; /* adjust vm size first */ if (amdgpu_vm_size != -1) { - unsigned max_size = 1 << (max_bits - 30); - vm_size = amdgpu_vm_size; if (vm_size > max_size) { dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n", amdgpu_vm_size, max_size); vm_size = max_size; } + } else { + struct sysinfo si; + unsigned int phys_ram_gb; + + /* Optimal VM size depends on the amount of physical + * RAM available. Underlying requirements and + * assumptions: + * + * - Need to map system memory and VRAM from all GPUs + * - VRAM from other GPUs not known here + * - Assume VRAM <= system memory + * - On GFX8 and older, VM space can be segmented for + * different MTYPEs + * - Need to allow room for fragmentation, guard pages etc. + * + * This adds up to a rough guess of system memory x3. + * Round up to power of two to maximize the available + * VM size with the given page table size. + */ + si_meminfo(&si); + phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit + + (1 << 30) - 1) >> 30; + vm_size = roundup_pow_of_two( + min(max(phys_ram_gb * 3, min_vm_size), max_size)); } adev->vm_manager.max_pfn = (uint64_t)vm_size << 18; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 67a15d439ac0..9fa9df0c5e7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -321,7 +321,7 @@ struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket); void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va); -void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, +void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, uint32_t fragment_size_default, unsigned max_level, unsigned max_bits); int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); From 8ef23364b654d44244400d79988e677e504b21ba Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Fri, 24 Aug 2018 17:26:23 +0800 Subject: [PATCH 088/242] drm/amdgpu: Enable/disable gfx PG feature in rlc safe mode This is required by gfx hw and can fix the rlc hang when do s3 stree test on Cz/St. Reviewed-by: Alex Deucher Signed-off-by: Hang Zhou Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 5cd45210113f..5a9534a82d40 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -5664,6 +5664,11 @@ static int gfx_v8_0_set_powergating_state(void *handle, if (amdgpu_sriov_vf(adev)) return 0; + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_RLC_SMU_HS | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_GFX_DMG)) + adev->gfx.rlc.funcs->enter_safe_mode(adev); switch (adev->asic_type) { case CHIP_CARRIZO: case CHIP_STONEY: @@ -5713,7 +5718,11 @@ static int gfx_v8_0_set_powergating_state(void *handle, default: break; } - + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_RLC_SMU_HS | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_GFX_DMG)) + adev->gfx.rlc.funcs->exit_safe_mode(adev); return 0; } From 6d39df146ff12fb5c71634ad135144d5423590ec Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 23 Aug 2018 15:30:45 +0800 Subject: [PATCH 089/242] drm/amdgpu: Fix vce initialize failed on Kaveri/Mullins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Forgot to add vce pg support via smu for Kaveri/Mullins. Fixes: 561a5c83eadd ("drm/amd/pp: Unify powergate_uvd/vce/mmhub to set_powergating_by_smu") v2: refine patch descriptions suggested by Michel Reviewed-by: Alex Deucher Tested-by: Michel Dänzer Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/kv_dpm.c | 41 ++++++++++++++++++----------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 3f57f6463dc8..a713c8b6e09c 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -66,7 +66,6 @@ static int kv_set_thermal_temperature_range(struct amdgpu_device *adev, static int kv_init_fps_limits(struct amdgpu_device *adev); static void kv_dpm_powergate_uvd(void *handle, bool gate); -static void kv_dpm_powergate_vce(struct amdgpu_device *adev, bool gate); static void kv_dpm_powergate_samu(struct amdgpu_device *adev, bool gate); static void kv_dpm_powergate_acp(struct amdgpu_device *adev, bool gate); @@ -1374,6 +1373,8 @@ static int kv_dpm_enable(struct amdgpu_device *adev) static void kv_dpm_disable(struct amdgpu_device *adev) { + struct kv_power_info *pi = kv_get_pi(adev); + amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq, AMDGPU_THERMAL_IRQ_LOW_TO_HIGH); amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq, @@ -1387,7 +1388,8 @@ static void kv_dpm_disable(struct amdgpu_device *adev) /* powerup blocks */ kv_dpm_powergate_acp(adev, false); kv_dpm_powergate_samu(adev, false); - kv_dpm_powergate_vce(adev, false); + if (pi->caps_vce_pg) /* power on the VCE block */ + amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerON); kv_dpm_powergate_uvd(adev, false); kv_enable_smc_cac(adev, false); @@ -1551,7 +1553,6 @@ static int kv_update_vce_dpm(struct amdgpu_device *adev, int ret; if (amdgpu_new_state->evclk > 0 && amdgpu_current_state->evclk == 0) { - kv_dpm_powergate_vce(adev, false); if (pi->caps_stable_p_state) pi->vce_boot_level = table->count - 1; else @@ -1573,7 +1574,6 @@ static int kv_update_vce_dpm(struct amdgpu_device *adev, kv_enable_vce_dpm(adev, true); } else if (amdgpu_new_state->evclk == 0 && amdgpu_current_state->evclk > 0) { kv_enable_vce_dpm(adev, false); - kv_dpm_powergate_vce(adev, true); } return 0; @@ -1702,24 +1702,32 @@ static void kv_dpm_powergate_uvd(void *handle, bool gate) } } -static void kv_dpm_powergate_vce(struct amdgpu_device *adev, bool gate) +static void kv_dpm_powergate_vce(void *handle, bool gate) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct kv_power_info *pi = kv_get_pi(adev); - - if (pi->vce_power_gated == gate) - return; + int ret; pi->vce_power_gated = gate; - if (!pi->caps_vce_pg) - return; - - if (gate) - amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerOFF); - else - amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerON); + if (gate) { + /* stop the VCE block */ + ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_GATE); + kv_enable_vce_dpm(adev, false); + if (pi->caps_vce_pg) /* power off the VCE block */ + amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerOFF); + } else { + if (pi->caps_vce_pg) /* power on the VCE block */ + amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerON); + kv_enable_vce_dpm(adev, true); + /* re-init the VCE block */ + ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_UNGATE); + } } + static void kv_dpm_powergate_samu(struct amdgpu_device *adev, bool gate) { struct kv_power_info *pi = kv_get_pi(adev); @@ -3313,6 +3321,9 @@ static int kv_set_powergating_by_smu(void *handle, case AMD_IP_BLOCK_TYPE_UVD: kv_dpm_powergate_uvd(handle, gate); break; + case AMD_IP_BLOCK_TYPE_VCE: + kv_dpm_powergate_vce(handle, gate); + break; default: break; } From 2ab4d0e74256fc49b7b270f63c1d1e47c2455abc Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Fri, 24 Aug 2018 16:17:54 +0800 Subject: [PATCH 090/242] drm/amdgpu: Update power state at the end of smu hw_init. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For SI/Kv, the power state is managed by function amdgpu_pm_compute_clocks. when dpm enabled, we should call amdgpu_pm_compute_clocks to update current power state instand of set boot state. this change can fix the oops when kfd driver was enabled on Kv. Reviewed-by: Alex Deucher Tested-by: Michel Dänzer Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/kv_dpm.c | 4 +--- drivers/gpu/drm/amd/amdgpu/si_dpm.c | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index a713c8b6e09c..b497c37cef7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -1353,8 +1353,6 @@ static int kv_dpm_enable(struct amdgpu_device *adev) return ret; } - kv_update_current_ps(adev, adev->pm.dpm.boot_ps); - if (adev->irq.installed && amdgpu_is_internal_thermal_sensor(adev->pm.int_thermal_type)) { ret = kv_set_thermal_temperature_range(adev, KV_TEMP_RANGE_MIN, KV_TEMP_RANGE_MAX); @@ -3069,7 +3067,7 @@ static int kv_dpm_hw_init(void *handle) else adev->pm.dpm_enabled = true; mutex_unlock(&adev->pm.mutex); - + amdgpu_pm_compute_clocks(adev); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index db327b412562..1de96995e690 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -6887,7 +6887,6 @@ static int si_dpm_enable(struct amdgpu_device *adev) si_enable_auto_throttle_source(adev, AMDGPU_DPM_AUTO_THROTTLE_SRC_THERMAL, true); si_thermal_start_thermal_controller(adev); - ni_update_current_ps(adev, boot_ps); return 0; } @@ -7763,7 +7762,7 @@ static int si_dpm_hw_init(void *handle) else adev->pm.dpm_enabled = true; mutex_unlock(&adev->pm.mutex); - + amdgpu_pm_compute_clocks(adev); return ret; } From 72ef23de207bad349ddc648296f330e176ac175b Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 23 Aug 2018 15:41:57 +0800 Subject: [PATCH 091/242] drm/amdgpu: Power on uvd block when hw_fini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit when hw_fini/suspend, smu only need to power on uvd block if uvd pg is supported, don't need to call uvd to do hw_init. v2: fix typo in patch descriptions and comments. Reviewed-by: Alex Deucher Tested-by: Michel Dänzer Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/kv_dpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index b497c37cef7e..cb79a93c2eb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -65,7 +65,6 @@ static int kv_set_thermal_temperature_range(struct amdgpu_device *adev, int min_temp, int max_temp); static int kv_init_fps_limits(struct amdgpu_device *adev); -static void kv_dpm_powergate_uvd(void *handle, bool gate); static void kv_dpm_powergate_samu(struct amdgpu_device *adev, bool gate); static void kv_dpm_powergate_acp(struct amdgpu_device *adev, bool gate); @@ -1388,7 +1387,8 @@ static void kv_dpm_disable(struct amdgpu_device *adev) kv_dpm_powergate_samu(adev, false); if (pi->caps_vce_pg) /* power on the VCE block */ amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerON); - kv_dpm_powergate_uvd(adev, false); + if (pi->caps_uvd_pg) /* power on the UVD block */ + amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_UVDPowerON); kv_enable_smc_cac(adev, false); kv_enable_didt(adev, false); From 4a2de54dc1d7668fa364d8483420ba64b120963b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 24 Aug 2018 14:48:02 +0200 Subject: [PATCH 092/242] drm/amdgpu: fix holding mn_lock while allocating memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can't hold the mn_lock while allocating memory. Signed-off-by: Christian König Acked-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 31 ++++++++++++++++---------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 09703c87d676..b6e9df11115d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1203,26 +1203,24 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, int r; + job = p->job; + p->job = NULL; + + r = drm_sched_job_init(&job->base, entity, p->filp); + if (r) + goto error_unlock; + + /* No memory allocation is allowed while holding the mn lock */ amdgpu_mn_lock(p->mn); amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = e->robj; if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { - amdgpu_mn_unlock(p->mn); - return -ERESTARTSYS; + r = -ERESTARTSYS; + goto error_abort; } } - job = p->job; - p->job = NULL; - - r = drm_sched_job_init(&job->base, entity, p->filp); - if (r) { - amdgpu_job_free(job); - amdgpu_mn_unlock(p->mn); - return r; - } - job->owner = p->filp; p->fence = dma_fence_get(&job->base.s_fence->finished); @@ -1260,6 +1258,15 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_mn_unlock(p->mn); return 0; + +error_abort: + dma_fence_put(&job->base.s_fence->finished); + job->base.s_fence = NULL; + +error_unlock: + amdgpu_job_free(job); + amdgpu_mn_unlock(p->mn); + return r; } int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) From e7603dadd3cc2fa65924d9e8ce0c6f2964866da0 Mon Sep 17 00:00:00 2001 From: SivapiriyanKumarasamy Date: Wed, 15 Aug 2018 16:55:18 -0400 Subject: [PATCH 093/242] drm/amd/display: Fix memory leak caused by missed dc_sink_release [Why] There is currently an intermittent hang from a memory leak in DTN stress testing. It is caused by unfreed memory during driver disable. [How] Do a dc_sink_release in the case that skips it incorrectly. Signed-off-by: SivapiriyanKumarasamy Reviewed-by: Aric Cyr Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 567867915d32..37eaf72ace54 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -754,8 +754,12 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) * fail-safe mode */ if (dc_is_hdmi_signal(link->connector_signal) || - dc_is_dvi_signal(link->connector_signal)) + dc_is_dvi_signal(link->connector_signal)) { + if (prev_sink != NULL) + dc_sink_release(prev_sink); + return false; + } default: break; } From 2f4e7db0f7456d8312de88d321b889dbd10c18fd Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 23 Aug 2018 11:46:13 +0800 Subject: [PATCH 094/242] drm/amdgpu: Remove duplicated power source update when ac/dc switch, driver will be notified by acpi event. then the power source will be updated. so don't need to get power source when set power state. Reviewed-by: Alex Deucher Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 8f98629fbe59..7b4e657a95c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1932,14 +1932,6 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) amdgpu_fence_wait_empty(ring); } - mutex_lock(&adev->pm.mutex); - /* update battery/ac status */ - if (power_supply_is_system_supplied() > 0) - adev->pm.ac_power = true; - else - adev->pm.ac_power = false; - mutex_unlock(&adev->pm.mutex); - if (adev->powerplay.pp_funcs->dispatch_tasks) { if (!amdgpu_device_has_dc_support(adev)) { mutex_lock(&adev->pm.mutex); From 46cb52ad414ac829680d0bb8cc7090ac2b577ca7 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 15 Jul 2018 22:09:29 +0200 Subject: [PATCH 095/242] ata: ftide010: Add a quirk for SQ201 The DMA is broken on this specific device for some unknown reason (probably badly designed or plain broken interface electronics) and will only work with PIO. Other users of the same hardware does not have this problem. Add a specific quirk so that this Gemini device gets DMA turned off. Also fix up some code around passing the port information around in probe while we're at it. Signed-off-by: Linus Walleij Signed-off-by: Jens Axboe --- drivers/ata/pata_ftide010.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/ata/pata_ftide010.c b/drivers/ata/pata_ftide010.c index 5d4b72e21161..569a4a662dcd 100644 --- a/drivers/ata/pata_ftide010.c +++ b/drivers/ata/pata_ftide010.c @@ -256,14 +256,12 @@ static struct ata_port_operations pata_ftide010_port_ops = { .qc_issue = ftide010_qc_issue, }; -static struct ata_port_info ftide010_port_info[] = { - { - .flags = ATA_FLAG_SLAVE_POSS, - .mwdma_mask = ATA_MWDMA2, - .udma_mask = ATA_UDMA6, - .pio_mask = ATA_PIO4, - .port_ops = &pata_ftide010_port_ops, - }, +static struct ata_port_info ftide010_port_info = { + .flags = ATA_FLAG_SLAVE_POSS, + .mwdma_mask = ATA_MWDMA2, + .udma_mask = ATA_UDMA6, + .pio_mask = ATA_PIO4, + .port_ops = &pata_ftide010_port_ops, }; #if IS_ENABLED(CONFIG_SATA_GEMINI) @@ -349,6 +347,7 @@ static int pata_ftide010_gemini_cable_detect(struct ata_port *ap) } static int pata_ftide010_gemini_init(struct ftide010 *ftide, + struct ata_port_info *pi, bool is_ata1) { struct device *dev = ftide->dev; @@ -373,7 +372,13 @@ static int pata_ftide010_gemini_init(struct ftide010 *ftide, /* Flag port as SATA-capable */ if (gemini_sata_bridge_enabled(sg, is_ata1)) - ftide010_port_info[0].flags |= ATA_FLAG_SATA; + pi->flags |= ATA_FLAG_SATA; + + /* This device has broken DMA, only PIO works */ + if (of_machine_is_compatible("itian,sq201")) { + pi->mwdma_mask = 0; + pi->udma_mask = 0; + } /* * We assume that a simple 40-wire cable is used in the PATA mode. @@ -435,6 +440,7 @@ static int pata_ftide010_gemini_init(struct ftide010 *ftide, } #else static int pata_ftide010_gemini_init(struct ftide010 *ftide, + struct ata_port_info *pi, bool is_ata1) { return -ENOTSUPP; @@ -446,7 +452,7 @@ static int pata_ftide010_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; - const struct ata_port_info pi = ftide010_port_info[0]; + struct ata_port_info pi = ftide010_port_info; const struct ata_port_info *ppi[] = { &pi, NULL }; struct ftide010 *ftide; struct resource *res; @@ -490,6 +496,7 @@ static int pata_ftide010_probe(struct platform_device *pdev) * are ATA0. This will also set up the cable types. */ ret = pata_ftide010_gemini_init(ftide, + &pi, (res->start == 0x63400000)); if (ret) goto err_dis_clk; From bab1be79a5169ac748d8292b20c86d874022d7ba Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 27 Aug 2018 18:38:31 +0800 Subject: [PATCH 096/242] sctp: hold transport before accessing its asoc in sctp_transport_get_next As Marcelo noticed, in sctp_transport_get_next, it is iterating over transports but then also accessing the association directly, without checking any refcnts before that, which can cause an use-after-free Read. So fix it by holding transport before accessing the association. With that, sctp_transport_hold calls can be removed in the later places. Fixes: 626d16f50f39 ("sctp: export some apis or variables for sctp_diag and reuse some for proc") Reported-by: syzbot+fe62a0c9aa6a85c6de16@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/proc.c | 4 ---- net/sctp/socket.c | 22 +++++++++++++++------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index ef5c9a82d4e8..4d6f1c8d6659 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -264,8 +264,6 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) } transport = (struct sctp_transport *)v; - if (!sctp_transport_hold(transport)) - return 0; assoc = transport->asoc; epb = &assoc->base; sk = epb->sk; @@ -322,8 +320,6 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) } transport = (struct sctp_transport *)v; - if (!sctp_transport_hold(transport)) - return 0; assoc = transport->asoc; list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e96b15a66aba..aa76586a1a1c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5005,9 +5005,14 @@ struct sctp_transport *sctp_transport_get_next(struct net *net, break; } + if (!sctp_transport_hold(t)) + continue; + if (net_eq(sock_net(t->asoc->base.sk), net) && t->asoc->peer.primary_path == t) break; + + sctp_transport_put(t); } return t; @@ -5017,13 +5022,18 @@ struct sctp_transport *sctp_transport_get_idx(struct net *net, struct rhashtable_iter *iter, int pos) { - void *obj = SEQ_START_TOKEN; + struct sctp_transport *t; - while (pos && (obj = sctp_transport_get_next(net, iter)) && - !IS_ERR(obj)) - pos--; + if (!pos) + return SEQ_START_TOKEN; - return obj; + while ((t = sctp_transport_get_next(net, iter)) && !IS_ERR(t)) { + if (!--pos) + break; + sctp_transport_put(t); + } + + return t; } int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), @@ -5082,8 +5092,6 @@ again: tsp = sctp_transport_get_idx(net, &hti, *pos + 1); for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { - if (!sctp_transport_hold(tsp)) - continue; ret = cb(tsp, p); if (ret) break; From 834539e69a5fe2aab33cc777ccfd4a4fcc5b9770 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 27 Aug 2018 18:40:18 +0800 Subject: [PATCH 097/242] sctp: remove useless start_fail from sctp_ht_iter in proc After changing rhashtable_walk_start to return void, start_fail would never be set other value than 0, and the checking for start_fail is pointless, so remove it. Fixes: 97a6ec4ac021 ("rhashtable: Change rhashtable_walk_start to return void") Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/proc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 4d6f1c8d6659..a644292f9faf 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -215,7 +215,6 @@ static const struct seq_operations sctp_eps_ops = { struct sctp_ht_iter { struct seq_net_private p; struct rhashtable_iter hti; - int start_fail; }; static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos) @@ -224,7 +223,6 @@ static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos) sctp_transport_walk_start(&iter->hti); - iter->start_fail = 0; return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos); } @@ -232,8 +230,6 @@ static void sctp_transport_seq_stop(struct seq_file *seq, void *v) { struct sctp_ht_iter *iter = seq->private; - if (iter->start_fail) - return; sctp_transport_walk_stop(&iter->hti); } From 84581bdae9587023cea1d139523f0ef0f28bd88d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 27 Aug 2018 18:41:32 +0800 Subject: [PATCH 098/242] erspan: set erspan_ver to 1 by default when adding an erspan dev After erspan_ver is introudced, if erspan_ver is not set in iproute, its value will be left 0 by default. Since Commit 02f99df1875c ("erspan: fix invalid erspan version."), it has broken the traffic due to the version check in erspan_xmit if users are not aware of 'erspan_ver' param, like using an old version of iproute. To fix this compatibility problem, it sets erspan_ver to 1 by default when adding an erspan dev in erspan_setup. Note that we can't do it in ipgre_netlink_parms, as this function is also used by ipgre_changelink. Fixes: 02f99df1875c ("erspan: fix invalid erspan version.") Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 3 +++ net/ipv6/ip6_gre.c | 1 + 2 files changed, 4 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 51a5d06085ac..ae714aecc31c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1508,11 +1508,14 @@ nla_put_failure: static void erspan_setup(struct net_device *dev) { + struct ip_tunnel *t = netdev_priv(dev); + ether_setup(dev); dev->netdev_ops = &erspan_netdev_ops; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; ip_tunnel_setup(dev, erspan_net_id); + t->erspan_ver = 1; } static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 18a3794b0f52..e493b041d4ac 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1778,6 +1778,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[], if (data[IFLA_GRE_COLLECT_METADATA]) parms->collect_md = true; + parms->erspan_ver = 1; if (data[IFLA_GRE_ERSPAN_VER]) parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); From d5ed72a55bc0a321ef33d272e2b0bf0d2b06d1fe Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 27 Aug 2018 20:58:43 +0200 Subject: [PATCH 099/242] net: sched: fix extack error message when chain is failed to be created Instead "Cannot find" say "Cannot create". Fixes: c35a4acc2985 ("net: sched: cls_api: handle generic cls errors") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 31bd1439cf60..2d41c5b21b48 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1252,7 +1252,7 @@ replay: } chain = tcf_chain_get(block, chain_index, true); if (!chain) { - NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); + NL_SET_ERR_MSG(extack, "Cannot create specified filter chain"); err = -ENOMEM; goto errout; } From b7b4247d553939ccf02ff597ec60f41a2f93ee8e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 27 Aug 2018 20:58:44 +0200 Subject: [PATCH 100/242] net: sched: return -ENOENT when trying to remove filter from non-existent chain When chain 0 was implicitly created, removal of non-existent filter from chain 0 gave -ENOENT. Once chain 0 became non-implicit, the same call is giving -EINVAL. Fix this by returning -ENOENT in that case. Reported-by: Roman Mashak Fixes: f71e0ca4db18 ("net: sched: Avoid implicit chain 0 creation") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2d41c5b21b48..1a67af8a6e8c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1399,7 +1399,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, goto errout; } NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); - err = -EINVAL; + err = -ENOENT; goto errout; } From 30935198b7d0be12b1c45c328b66a7fdefb16256 Mon Sep 17 00:00:00 2001 From: Haiqing Bai Date: Mon, 27 Aug 2018 09:32:26 +0800 Subject: [PATCH 101/242] tipc: fix the big/little endian issue in tipc_dest In function tipc_dest_push, the 32bit variables 'node' and 'port' are stored separately in uppper and lower part of 64bit 'value'. Then this value is assigned to dst->value which is a union like: union { struct { u32 port; u32 node; }; u64 value; } This works on little-endian machines like x86 but fails on big-endian machines. The fix remove the 'value' stack parameter and even the 'value' member of the union in tipc_dest, assign the 'node' and 'port' member directly with the input parameter to avoid the endian issue. Fixes: a80ae5306a73 ("tipc: improve destination linked list") Signed-off-by: Zhenbo Gao Acked-by: Jon Maloy Signed-off-by: Haiqing Bai Signed-off-by: David S. Miller --- net/tipc/name_table.c | 10 ++++------ net/tipc/name_table.h | 9 ++------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 88f027b502f6..66d5b2c5987a 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -980,20 +980,17 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port) { - u64 value = (u64)node << 32 | port; struct tipc_dest *dst; list_for_each_entry(dst, l, list) { - if (dst->value != value) - continue; - return dst; + if (dst->node == node && dst->port == port) + return dst; } return NULL; } bool tipc_dest_push(struct list_head *l, u32 node, u32 port) { - u64 value = (u64)node << 32 | port; struct tipc_dest *dst; if (tipc_dest_find(l, node, port)) @@ -1002,7 +999,8 @@ bool tipc_dest_push(struct list_head *l, u32 node, u32 port) dst = kmalloc(sizeof(*dst), GFP_ATOMIC); if (unlikely(!dst)) return false; - dst->value = value; + dst->node = node; + dst->port = port; list_add(&dst->list, l); return true; } diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 0febba41da86..892bd750b85f 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -133,13 +133,8 @@ void tipc_nametbl_stop(struct net *net); struct tipc_dest { struct list_head list; - union { - struct { - u32 port; - u32 node; - }; - u64 value; - }; + u32 port; + u32 node; }; struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port); From ad8619864f0c9bd89e14d957afa3fd8aaf0720da Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 27 Aug 2018 00:20:11 +0200 Subject: [PATCH 102/242] net: dsa: Drop GPIO includes Commit 52638f71fcff ("dsa: Move gpio reset into switch driver") moved the GPIO handling into the switch drivers but forgot to remove the GPIO header includes. Signed-off-by: Linus Walleij Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/dsa.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index e63c554e0623..9f3209ff7ffd 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -19,12 +19,10 @@ #include #include #include -#include #include #include #include #include -#include #include #include "dsa_priv.h" From 53ae914d898e5dd5984d352d5fa0b23410f966a0 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sat, 25 Aug 2018 15:19:05 +0800 Subject: [PATCH 103/242] net/rds: Use rdma_read_gids to get connection SGID/DGID in IPv6 In IPv4, the newly introduced rdma_read_gids is used to read the SGID/DGID for the connection which returns GID correctly for RoCE transport as well. In IPv6, rdma_read_gids is also used. The following are why rdma_read_gids is introduced. rdma_addr_get_dgid() for RoCE for client side connections returns MAC address, instead of DGID. rdma_addr_get_sgid() for RoCE doesn't return correct SGID for IPv6 and when more than one IP address is assigned to the netdevice. So the transport agnostic rdma_read_gids() API is provided by rdma_cm module. Signed-off-by: Zhu Yanjun Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/net/rds/ib.c b/net/rds/ib.c index c1d97640c0be..eba75c1ba359 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -341,15 +341,10 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn, if (rds_conn_state(conn) == RDS_CONN_UP) { struct rds_ib_device *rds_ibdev; - struct rdma_dev_addr *dev_addr; ic = conn->c_transport_data; - dev_addr = &ic->i_cm_id->route.addr.dev_addr; - rdma_addr_get_sgid(dev_addr, - (union ib_gid *)&iinfo6->src_gid); - rdma_addr_get_dgid(dev_addr, - (union ib_gid *)&iinfo6->dst_gid); - + rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo6->src_gid, + (union ib_gid *)&iinfo6->dst_gid); rds_ibdev = ic->rds_ibdev; iinfo6->max_send_wr = ic->i_send_ring.w_nr; iinfo6->max_recv_wr = ic->i_recv_ring.w_nr; From 62d2a1940709198a522a43ff8be8b8f6b3654dec Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 28 Aug 2018 07:31:11 +0800 Subject: [PATCH 104/242] block: remove unnecessary condition check kmem_cache_destroy() can handle NULL pointer correctly, so there is no need to check e->icq_cache before calling kmem_cache_destroy(). Signed-off-by: Chengguang Xu Signed-off-by: Jens Axboe --- block/elevator.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index 5ea6e7d600e4..6a06b5d040e5 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -895,8 +895,7 @@ int elv_register(struct elevator_type *e) spin_lock(&elv_list_lock); if (elevator_find(e->elevator_name, e->uses_mq)) { spin_unlock(&elv_list_lock); - if (e->icq_cache) - kmem_cache_destroy(e->icq_cache); + kmem_cache_destroy(e->icq_cache); return -EBUSY; } list_add_tail(&e->list, &elv_list); From db193954ed9e35701b6e489fa4cc97b08589341b Mon Sep 17 00:00:00 2001 From: John Pittman Date: Mon, 27 Aug 2018 14:33:05 -0400 Subject: [PATCH 105/242] block: bsg: move atomic_t ref_count variable to refcount API Currently, variable ref_count within the bsg_device struct is of type atomic_t. For variables being used as reference counters, the refcount API should be used instead of atomic. The newer refcount API works to prevent counter overflows and use-after-free bugs. So, move this varable from the atomic API to refcount, potentially avoiding the issues mentioned. Signed-off-by: John Pittman Signed-off-by: Jens Axboe --- block/bsg.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index db588add6ba6..9a442c23a715 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -37,7 +37,7 @@ struct bsg_device { struct request_queue *queue; spinlock_t lock; struct hlist_node dev_list; - atomic_t ref_count; + refcount_t ref_count; char name[20]; int max_queue; }; @@ -252,7 +252,7 @@ static int bsg_put_device(struct bsg_device *bd) mutex_lock(&bsg_mutex); - if (!atomic_dec_and_test(&bd->ref_count)) { + if (!refcount_dec_and_test(&bd->ref_count)) { mutex_unlock(&bsg_mutex); return 0; } @@ -290,7 +290,7 @@ static struct bsg_device *bsg_add_device(struct inode *inode, bd->queue = rq; - atomic_set(&bd->ref_count, 1); + refcount_set(&bd->ref_count, 1); hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode))); strncpy(bd->name, dev_name(rq->bsg_dev.class_dev), sizeof(bd->name) - 1); @@ -308,7 +308,7 @@ static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q) hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) { if (bd->queue == q) { - atomic_inc(&bd->ref_count); + refcount_inc(&bd->ref_count); goto found; } } From e06fa9c16ce4b740996189fa5610eabcee734e6c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 24 Aug 2018 22:08:50 +0200 Subject: [PATCH 106/242] bpf, sockmap: fix potential use after free in bpf_tcp_close bpf_tcp_close() we pop the psock linkage to a map via psock_map_pop(). A parallel update on the sock hash map can happen between psock_map_pop() and lookup_elem_raw() where we override the element under link->hash / link->key. In bpf_tcp_close()'s lookup_elem_raw() we subsequently only test whether an element is present, but we do not test whether the element is infact the element we were looking for. We lock the sock in bpf_tcp_close() during that time, so do we hold the lock in sock_hash_update_elem(). However, the latter locks the sock which is newly updated, not the one we're purging from the hash table. This means that while one CPU is doing the lookup from bpf_tcp_close(), another CPU is doing the map update in parallel, dropped our sock from the hlist and released the psock. Subsequently the first CPU will find the new sock and attempts to drop and release the old sock yet another time. Fix is that we need to check the elements for a match after lookup, similar as we do in the sock map. Note that the hash tab elems are freed via RCU, so access to their link->hash / link->key is fine since we're under RCU read side there. Fixes: e9db4ef6bf4c ("bpf: sockhash fix omitted bucket lock in sock_close") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- kernel/bpf/sockmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index cf5195c7c331..01879e4d599a 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -369,7 +369,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout) /* If another thread deleted this object skip deletion. * The refcnt on psock may or may not be zero. */ - if (l) { + if (l && l == link) { hlist_del_rcu(&link->hash_node); smap_release_sock(psock, link->sk); free_htab_elem(htab, link); From 15c480efab01197c965ce0562a43ffedd852b8f9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 24 Aug 2018 22:08:51 +0200 Subject: [PATCH 107/242] bpf, sockmap: fix psock refcount leak in bpf_tcp_recvmsg In bpf_tcp_recvmsg() we first took a reference on the psock, however once we find that there are skbs in the normal socket's receive queue we return with processing them through tcp_recvmsg(). Problem is that we leak the taken reference on the psock in that path. Given we don't really do anything with the psock at this point, move the skb_queue_empty() test before we fetch the psock to fix this case. Fixes: 8934ce2fd081 ("bpf: sockmap redirect ingress support") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- kernel/bpf/sockmap.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 01879e4d599a..26d8a3053407 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -912,6 +912,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); + if (!skb_queue_empty(&sk->sk_receive_queue)) + return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); rcu_read_lock(); psock = smap_psock_sk(sk); @@ -922,9 +924,6 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, goto out; rcu_read_unlock(); - if (!skb_queue_empty(&sk->sk_receive_queue)) - return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); - lock_sock(sk); bytes_ready: while (copied != len) { From 3f6e138d41ddff196f452993528cfe75762ede0f Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 27 Aug 2018 21:30:42 +0200 Subject: [PATCH 108/242] bpf: fix build error with clang Building the newly introduced BPF_PROG_TYPE_SK_REUSEPORT leads to a compile time error when building with clang: net/core/filter.o: In function `sk_reuseport_convert_ctx_access': ../net/core/filter.c:7284: undefined reference to `__compiletime_assert_7284' It seems that clang has issues resolving hweight_long at compile time. Since SK_FL_PROTO_MASK is a constant, we can use the interface for known constant arguments which works fine with clang. Fixes: 2dbb9b9e6df6 ("bpf: Introduce BPF_PROG_TYPE_SK_REUSEPORT") Signed-off-by: Stefan Agner Signed-off-by: Alexei Starovoitov --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index c25eb36f1320..7a2430945c71 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7281,7 +7281,7 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct sk_reuseport_md, ip_protocol): - BUILD_BUG_ON(hweight_long(SK_FL_PROTO_MASK) != BITS_PER_BYTE); + BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE); SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset, BPF_W, 0); *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); From f1ed3df20d2d223e0852cc4ac1f19bba869a7e3c Mon Sep 17 00:00:00 2001 From: Michal Wnukowski Date: Wed, 15 Aug 2018 15:51:57 -0700 Subject: [PATCH 109/242] nvme-pci: add a memory barrier to nvme_dbbuf_update_and_check_event In many architectures loads may be reordered with older stores to different locations. In the nvme driver the following two operations could be reordered: - Write shadow doorbell (dbbuf_db) into memory. - Read EventIdx (dbbuf_ei) from memory. This can result in a potential race condition between driver and VM host processing requests (if given virtual NVMe controller has a support for shadow doorbell). If that occurs, then the NVMe controller may decide to wait for MMIO doorbell from guest operating system, and guest driver may decide not to issue MMIO doorbell on any of subsequent commands. This issue is purely timing-dependent one, so there is no easy way to reproduce it. Currently the easiest known approach is to run "Oracle IO Numbers" (orion) that is shipped with Oracle DB: orion -run advanced -num_large 0 -size_small 8 -type rand -simulate \ concat -write 40 -duration 120 -matrix row -testname nvme_test Where nvme_test is a .lun file that contains a list of NVMe block devices to run test against. Limiting number of vCPUs assigned to given VM instance seems to increase chances for this bug to occur. On test environment with VM that got 4 NVMe drives and 1 vCPU assigned the virtual NVMe controller hang could be observed within 10-20 minutes. That correspond to about 400-500k IO operations processed (or about 100GB of IO read/writes). Orion tool was used as a validation and set to run in a loop for 36 hours (equivalent of pushing 550M IO operations). No issues were observed. That suggest that the patch fixes the issue. Fixes: f9f38e33389c ("nvme: improve performance for virtual NVMe devices") Signed-off-by: Michal Wnukowski Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg [hch: updated changelog and comment a bit] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 1b9951d2067e..d668682f91df 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -316,6 +316,14 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, old_value = *dbbuf_db; *dbbuf_db = value; + /* + * Ensure that the doorbell is updated before reading the event + * index from memory. The controller needs to provide similar + * ordering to ensure the envent index is updated before reading + * the doorbell. + */ + mb(); + if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value)) return false; } From afd299ca996929f4f98ac20da0044c0cdc124879 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 9 Aug 2018 16:00:14 -0700 Subject: [PATCH 110/242] nvme-fcloop: Fix dropped LS's to removed target port When a targetport is removed from the config, fcloop will avoid calling the LS done() routine thinking the targetport is gone. This leaves the initiator reset/reconnect hanging as it waits for a status on the Create_Association LS for the reconnect. Change the filter in the LS callback path. If tport null (set when failed validation before "sending to remote port"), be sure to call done. This was the main bug. But, continue the logic that only calls done if tport was set but there is no remoteport (e.g. case where remoteport has been removed, thus host doesn't expect a completion). Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 34712def81b1..5251689a1d9a 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -311,7 +311,7 @@ fcloop_tgt_lsrqst_done_work(struct work_struct *work) struct fcloop_tport *tport = tls_req->tport; struct nvmefc_ls_req *lsreq = tls_req->lsreq; - if (tport->remoteport) + if (!tport || tport->remoteport) lsreq->done(lsreq, tls_req->status); } @@ -329,6 +329,7 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, if (!rport->targetport) { tls_req->status = -ECONNREFUSED; + tls_req->tport = NULL; schedule_work(&tls_req->work); return ret; } From 04db0e5ec58167364a80fd33ddb4f3b67434eb85 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 15 Aug 2018 18:48:25 -0700 Subject: [PATCH 111/242] nvmet: free workqueue object if module init fails Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index ebf3e7a6c49e..b5ec96abd048 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1210,7 +1210,7 @@ static int __init nvmet_init(void) error = nvmet_init_discovery(); if (error) - goto out; + goto out_free_work_queue; error = nvmet_init_configfs(); if (error) @@ -1219,6 +1219,8 @@ static int __init nvmet_init(void) out_exit_discovery: nvmet_exit_discovery(); +out_free_work_queue: + destroy_workqueue(buffered_io_wq); out: return error; } From 501ca81760c204ec59b73e4a00bee5971fc0f1b1 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 24 Aug 2018 17:37:00 -0700 Subject: [PATCH 112/242] bpf: sockmap, decrement copied count correctly in redirect error case Currently, when a redirect occurs in sockmap and an error occurs in the redirect call we unwind the scatterlist once in the error path of bpf_tcp_sendmsg_do_redirect() and then again in sendmsg(). Then in the error path of sendmsg we decrement the copied count by the send size. However, its possible we partially sent data before the error was generated. This can happen if do_tcp_sendpages() partially sends the scatterlist before encountering a memory pressure error. If this happens we need to decrement the copied value (the value tracking how many bytes were actually sent to TCP stack) by the number of remaining bytes _not_ the entire send size. Otherwise we risk confusing userspace. Also we don't need two calls to free the scatterlist one is good enough. So remove the one in bpf_tcp_sendmsg_do_redirect() and then properly reduce copied by the number of remaining bytes which may in fact be the entire send size if no bytes were sent. To do this use bool to indicate if free_start_sg() should do mem accounting or not. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- kernel/bpf/sockmap.c | 45 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 26d8a3053407..ce63e5801746 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -236,7 +236,7 @@ static int bpf_tcp_init(struct sock *sk) } static void smap_release_sock(struct smap_psock *psock, struct sock *sock); -static int free_start_sg(struct sock *sk, struct sk_msg_buff *md); +static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge); static void bpf_tcp_release(struct sock *sk) { @@ -248,7 +248,7 @@ static void bpf_tcp_release(struct sock *sk) goto out; if (psock->cork) { - free_start_sg(psock->sock, psock->cork); + free_start_sg(psock->sock, psock->cork, true); kfree(psock->cork); psock->cork = NULL; } @@ -330,14 +330,14 @@ static void bpf_tcp_close(struct sock *sk, long timeout) close_fun = psock->save_close; if (psock->cork) { - free_start_sg(psock->sock, psock->cork); + free_start_sg(psock->sock, psock->cork, true); kfree(psock->cork); psock->cork = NULL; } list_for_each_entry_safe(md, mtmp, &psock->ingress, list) { list_del(&md->list); - free_start_sg(psock->sock, md); + free_start_sg(psock->sock, md, true); kfree(md); } @@ -570,14 +570,16 @@ static void free_bytes_sg(struct sock *sk, int bytes, md->sg_start = i; } -static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) +static int free_sg(struct sock *sk, int start, + struct sk_msg_buff *md, bool charge) { struct scatterlist *sg = md->sg_data; int i = start, free = 0; while (sg[i].length) { free += sg[i].length; - sk_mem_uncharge(sk, sg[i].length); + if (charge) + sk_mem_uncharge(sk, sg[i].length); if (!md->skb) put_page(sg_page(&sg[i])); sg[i].length = 0; @@ -594,9 +596,9 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) return free; } -static int free_start_sg(struct sock *sk, struct sk_msg_buff *md) +static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge) { - int free = free_sg(sk, md->sg_start, md); + int free = free_sg(sk, md->sg_start, md, charge); md->sg_start = md->sg_end; return free; @@ -604,7 +606,7 @@ static int free_start_sg(struct sock *sk, struct sk_msg_buff *md) static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md) { - return free_sg(sk, md->sg_curr, md); + return free_sg(sk, md->sg_curr, md, true); } static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md) @@ -718,7 +720,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes, list_add_tail(&r->list, &psock->ingress); sk->sk_data_ready(sk); } else { - free_start_sg(sk, r); + free_start_sg(sk, r, true); kfree(r); } @@ -752,14 +754,10 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send, release_sock(sk); } smap_release_sock(psock, sk); - if (unlikely(err)) - goto out; - return 0; + return err; out_rcu: rcu_read_unlock(); -out: - free_bytes_sg(NULL, send, md, false); - return err; + return 0; } static inline void bpf_md_init(struct smap_psock *psock) @@ -822,7 +820,7 @@ more_data: case __SK_PASS: err = bpf_tcp_push(sk, send, m, flags, true); if (unlikely(err)) { - *copied -= free_start_sg(sk, m); + *copied -= free_start_sg(sk, m, true); break; } @@ -845,16 +843,17 @@ more_data: lock_sock(sk); if (unlikely(err < 0)) { - free_start_sg(sk, m); + int free = free_start_sg(sk, m, false); + psock->sg_size = 0; if (!cork) - *copied -= send; + *copied -= free; } else { psock->sg_size -= send; } if (cork) { - free_start_sg(sk, m); + free_start_sg(sk, m, true); psock->sg_size = 0; kfree(m); m = NULL; @@ -1121,7 +1120,7 @@ wait_for_memory: err = sk_stream_wait_memory(sk, &timeo); if (err) { if (m && m != psock->cork) - free_start_sg(sk, m); + free_start_sg(sk, m, true); goto out_err; } } @@ -1580,13 +1579,13 @@ static void smap_gc_work(struct work_struct *w) bpf_prog_put(psock->bpf_tx_msg); if (psock->cork) { - free_start_sg(psock->sock, psock->cork); + free_start_sg(psock->sock, psock->cork, true); kfree(psock->cork); } list_for_each_entry_safe(md, mtmp, &psock->ingress, list) { list_del(&md->list); - free_start_sg(psock->sock, md); + free_start_sg(psock->sock, md, true); kfree(md); } From 67d1ba8a6dc83d90cd58b89fa6cbf9ae35a0cf7f Mon Sep 17 00:00:00 2001 From: Danek Duvall Date: Wed, 22 Aug 2018 16:01:04 -0700 Subject: [PATCH 113/242] mac80211: correct use of IEEE80211_VHT_CAP_RXSTBC_X The mod mask for VHT capabilities intends to say that you can override the number of STBC receive streams, and it does, but only by accident. The IEEE80211_VHT_CAP_RXSTBC_X aren't bits to be set, but values (albeit left-shifted). ORing the bits together gets the right answer, but we should use the _MASK macro here instead. Signed-off-by: Danek Duvall Signed-off-by: Johannes Berg --- net/mac80211/main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 0358f20b675f..27cd64acaf00 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -470,10 +470,7 @@ static const struct ieee80211_vht_cap mac80211_vht_capa_mod_mask = { cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC | IEEE80211_VHT_CAP_SHORT_GI_80 | IEEE80211_VHT_CAP_SHORT_GI_160 | - IEEE80211_VHT_CAP_RXSTBC_1 | - IEEE80211_VHT_CAP_RXSTBC_2 | - IEEE80211_VHT_CAP_RXSTBC_3 | - IEEE80211_VHT_CAP_RXSTBC_4 | + IEEE80211_VHT_CAP_RXSTBC_MASK | IEEE80211_VHT_CAP_TXSTBC | IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | From d7c863a2f65e48f442379f4ee1846d52e0c5d24d Mon Sep 17 00:00:00 2001 From: Danek Duvall Date: Wed, 22 Aug 2018 16:01:05 -0700 Subject: [PATCH 114/242] mac80211_hwsim: correct use of IEEE80211_VHT_CAP_RXSTBC_X The mac80211_hwsim driver intends to say that it supports up to four STBC receive streams, but instead it ends up saying something undefined. The IEEE80211_VHT_CAP_RXSTBC_X macros aren't independent bits that can be ORed together, but values. In this case, _4 is the appropriate one to use. Signed-off-by: Danek Duvall Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index fe1b0108f06d..7d0b460868f9 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2699,9 +2699,6 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, IEEE80211_VHT_CAP_SHORT_GI_80 | IEEE80211_VHT_CAP_SHORT_GI_160 | IEEE80211_VHT_CAP_TXSTBC | - IEEE80211_VHT_CAP_RXSTBC_1 | - IEEE80211_VHT_CAP_RXSTBC_2 | - IEEE80211_VHT_CAP_RXSTBC_3 | IEEE80211_VHT_CAP_RXSTBC_4 | IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; sband->vht_cap.vht_mcs.rx_mcs_map = From 38cb87ee47fb825f6c9d645c019f75b3905c0ab2 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 22 Aug 2018 13:52:21 +0200 Subject: [PATCH 115/242] cfg80211: make wmm_rule part of the reg_rule structure Make wmm_rule be part of the reg_rule structure. This simplifies the code a lot at the cost of having bigger memory usage. However in most cases we have only few reg_rule's and when we do have many like in iwlwifi we do not save memory as it allocates a separate wmm_rule for each channel anyway. This also fixes a bug reported in various places where somewhere the pointers were corrupted and we ended up doing a null-dereference. Fixes: 230ebaa189af ("cfg80211: read wmm rules from regulatory database") Signed-off-by: Stanislaw Gruszka [rephrase commit message slightly] Signed-off-by: Johannes Berg --- .../wireless/intel/iwlwifi/iwl-nvm-parse.c | 50 ++--------- include/net/cfg80211.h | 4 +- include/net/regulatory.h | 4 +- net/mac80211/util.c | 8 +- net/wireless/nl80211.c | 10 +-- net/wireless/reg.c | 90 +++---------------- 6 files changed, 31 insertions(+), 135 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index b815ba38dbdb..88121548eb9f 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -877,15 +877,12 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, const u8 *nvm_chan = cfg->nvm_type == IWL_NVM_EXT ? iwl_ext_nvm_channels : iwl_nvm_channels; struct ieee80211_regdomain *regd, *copy_rd; - int size_of_regd, regd_to_copy, wmms_to_copy; - int size_of_wmms = 0; + int size_of_regd, regd_to_copy; struct ieee80211_reg_rule *rule; - struct ieee80211_wmm_rule *wmm_rule, *d_wmm, *s_wmm; struct regdb_ptrs *regdb_ptrs; enum nl80211_band band; int center_freq, prev_center_freq = 0; - int valid_rules = 0, n_wmms = 0; - int i; + int valid_rules = 0; bool new_rule; int max_num_ch = cfg->nvm_type == IWL_NVM_EXT ? IWL_NVM_NUM_CHANNELS_EXT : IWL_NVM_NUM_CHANNELS; @@ -904,11 +901,7 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, sizeof(struct ieee80211_regdomain) + num_of_ch * sizeof(struct ieee80211_reg_rule); - if (geo_info & GEO_WMM_ETSI_5GHZ_INFO) - size_of_wmms = - num_of_ch * sizeof(struct ieee80211_wmm_rule); - - regd = kzalloc(size_of_regd + size_of_wmms, GFP_KERNEL); + regd = kzalloc(size_of_regd, GFP_KERNEL); if (!regd) return ERR_PTR(-ENOMEM); @@ -922,8 +915,6 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, regd->alpha2[0] = fw_mcc >> 8; regd->alpha2[1] = fw_mcc & 0xff; - wmm_rule = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd); - for (ch_idx = 0; ch_idx < num_of_ch; ch_idx++) { ch_flags = (u16)__le32_to_cpup(channels + ch_idx); band = (ch_idx < NUM_2GHZ_CHANNELS) ? @@ -977,26 +968,10 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, band == NL80211_BAND_2GHZ) continue; - if (!reg_query_regdb_wmm(regd->alpha2, center_freq, - ®db_ptrs[n_wmms].token, wmm_rule)) { - /* Add only new rules */ - for (i = 0; i < n_wmms; i++) { - if (regdb_ptrs[i].token == - regdb_ptrs[n_wmms].token) { - rule->wmm_rule = regdb_ptrs[i].rule; - break; - } - } - if (i == n_wmms) { - rule->wmm_rule = wmm_rule; - regdb_ptrs[n_wmms++].rule = wmm_rule; - wmm_rule++; - } - } + reg_query_regdb_wmm(regd->alpha2, center_freq, rule); } regd->n_reg_rules = valid_rules; - regd->n_wmm_rules = n_wmms; /* * Narrow down regdom for unused regulatory rules to prevent hole @@ -1005,28 +980,13 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, regd_to_copy = sizeof(struct ieee80211_regdomain) + valid_rules * sizeof(struct ieee80211_reg_rule); - wmms_to_copy = sizeof(struct ieee80211_wmm_rule) * n_wmms; - - copy_rd = kzalloc(regd_to_copy + wmms_to_copy, GFP_KERNEL); + copy_rd = kzalloc(regd_to_copy, GFP_KERNEL); if (!copy_rd) { copy_rd = ERR_PTR(-ENOMEM); goto out; } memcpy(copy_rd, regd, regd_to_copy); - memcpy((u8 *)copy_rd + regd_to_copy, (u8 *)regd + size_of_regd, - wmms_to_copy); - - d_wmm = (struct ieee80211_wmm_rule *)((u8 *)copy_rd + regd_to_copy); - s_wmm = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd); - - for (i = 0; i < regd->n_reg_rules; i++) { - if (!regd->reg_rules[i].wmm_rule) - continue; - - copy_rd->reg_rules[i].wmm_rule = d_wmm + - (regd->reg_rules[i].wmm_rule - s_wmm); - } out: kfree(regdb_ptrs); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1beb3ead0385..7229c186d199 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4763,8 +4763,8 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator); * * Return: 0 on success. -ENODATA. */ -int reg_query_regdb_wmm(char *alpha2, int freq, u32 *ptr, - struct ieee80211_wmm_rule *rule); +int reg_query_regdb_wmm(char *alpha2, int freq, + struct ieee80211_reg_rule *rule); /* * callbacks for asynchronous cfg80211 methods, notification diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 60f8cc86a447..3469750df0f4 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -217,15 +217,15 @@ struct ieee80211_wmm_rule { struct ieee80211_reg_rule { struct ieee80211_freq_range freq_range; struct ieee80211_power_rule power_rule; - struct ieee80211_wmm_rule *wmm_rule; + struct ieee80211_wmm_rule wmm_rule; u32 flags; u32 dfs_cac_ms; + bool has_wmm; }; struct ieee80211_regdomain { struct rcu_head rcu_head; u32 n_reg_rules; - u32 n_wmm_rules; char alpha2[3]; enum nl80211_dfs_regions dfs_region; struct ieee80211_reg_rule reg_rules[]; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index d02fbfec3783..c80187d6e6bb 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1120,7 +1120,7 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, { struct ieee80211_chanctx_conf *chanctx_conf; const struct ieee80211_reg_rule *rrule; - struct ieee80211_wmm_ac *wmm_ac; + const struct ieee80211_wmm_ac *wmm_ac; u16 center_freq = 0; if (sdata->vif.type != NL80211_IFTYPE_AP && @@ -1139,15 +1139,15 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, rrule = freq_reg_info(sdata->wdev.wiphy, MHZ_TO_KHZ(center_freq)); - if (IS_ERR_OR_NULL(rrule) || !rrule->wmm_rule) { + if (IS_ERR_OR_NULL(rrule) || !rrule->has_wmm) { rcu_read_unlock(); return; } if (sdata->vif.type == NL80211_IFTYPE_AP) - wmm_ac = &rrule->wmm_rule->ap[ac]; + wmm_ac = &rrule->wmm_rule.ap[ac]; else - wmm_ac = &rrule->wmm_rule->client[ac]; + wmm_ac = &rrule->wmm_rule.client[ac]; qparam->cw_min = max_t(u16, qparam->cw_min, wmm_ac->cw_min); qparam->cw_max = max_t(u16, qparam->cw_max, wmm_ac->cw_max); qparam->aifs = max_t(u8, qparam->aifs, wmm_ac->aifsn); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 80bc986c79e5..e3dcffd96919 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -667,13 +667,13 @@ static int nl80211_msg_put_wmm_rules(struct sk_buff *msg, goto nla_put_failure; if (nla_put_u16(msg, NL80211_WMMR_CW_MIN, - rule->wmm_rule->client[j].cw_min) || + rule->wmm_rule.client[j].cw_min) || nla_put_u16(msg, NL80211_WMMR_CW_MAX, - rule->wmm_rule->client[j].cw_max) || + rule->wmm_rule.client[j].cw_max) || nla_put_u8(msg, NL80211_WMMR_AIFSN, - rule->wmm_rule->client[j].aifsn) || + rule->wmm_rule.client[j].aifsn) || nla_put_u8(msg, NL80211_WMMR_TXOP, - rule->wmm_rule->client[j].cot)) + rule->wmm_rule.client[j].cot)) goto nla_put_failure; nla_nest_end(msg, nl_wmm_rule); @@ -766,7 +766,7 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, const struct ieee80211_reg_rule *rule = freq_reg_info(wiphy, chan->center_freq); - if (!IS_ERR(rule) && rule->wmm_rule) { + if (!IS_ERR_OR_NULL(rule) && rule->has_wmm) { if (nl80211_msg_put_wmm_rules(msg, rule)) goto nla_put_failure; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 283902974fbf..2f702adf2912 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -425,35 +425,23 @@ static const struct ieee80211_regdomain * reg_copy_regd(const struct ieee80211_regdomain *src_regd) { struct ieee80211_regdomain *regd; - int size_of_regd, size_of_wmms; + int size_of_regd; unsigned int i; - struct ieee80211_wmm_rule *d_wmm, *s_wmm; size_of_regd = sizeof(struct ieee80211_regdomain) + src_regd->n_reg_rules * sizeof(struct ieee80211_reg_rule); - size_of_wmms = src_regd->n_wmm_rules * - sizeof(struct ieee80211_wmm_rule); - regd = kzalloc(size_of_regd + size_of_wmms, GFP_KERNEL); + regd = kzalloc(size_of_regd, GFP_KERNEL); if (!regd) return ERR_PTR(-ENOMEM); memcpy(regd, src_regd, sizeof(struct ieee80211_regdomain)); - d_wmm = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd); - s_wmm = (struct ieee80211_wmm_rule *)((u8 *)src_regd + size_of_regd); - memcpy(d_wmm, s_wmm, size_of_wmms); - - for (i = 0; i < src_regd->n_reg_rules; i++) { + for (i = 0; i < src_regd->n_reg_rules; i++) memcpy(®d->reg_rules[i], &src_regd->reg_rules[i], sizeof(struct ieee80211_reg_rule)); - if (!src_regd->reg_rules[i].wmm_rule) - continue; - regd->reg_rules[i].wmm_rule = d_wmm + - (src_regd->reg_rules[i].wmm_rule - s_wmm); - } return regd; } @@ -859,9 +847,10 @@ static bool valid_regdb(const u8 *data, unsigned int size) return true; } -static void set_wmm_rule(struct ieee80211_wmm_rule *rule, +static void set_wmm_rule(struct ieee80211_reg_rule *rrule, struct fwdb_wmm_rule *wmm) { + struct ieee80211_wmm_rule *rule = &rrule->wmm_rule; unsigned int i; for (i = 0; i < IEEE80211_NUM_ACS; i++) { @@ -875,11 +864,13 @@ static void set_wmm_rule(struct ieee80211_wmm_rule *rule, rule->ap[i].aifsn = wmm->ap[i].aifsn; rule->ap[i].cot = 1000 * be16_to_cpu(wmm->ap[i].cot); } + + rrule->has_wmm = true; } static int __regdb_query_wmm(const struct fwdb_header *db, const struct fwdb_country *country, int freq, - u32 *dbptr, struct ieee80211_wmm_rule *rule) + struct ieee80211_reg_rule *rule) { unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2; struct fwdb_collection *coll = (void *)((u8 *)db + ptr); @@ -900,8 +891,6 @@ static int __regdb_query_wmm(const struct fwdb_header *db, wmm_ptr = be16_to_cpu(rrule->wmm_ptr) << 2; wmm = (void *)((u8 *)db + wmm_ptr); set_wmm_rule(rule, wmm); - if (dbptr) - *dbptr = wmm_ptr; return 0; } } @@ -909,8 +898,7 @@ static int __regdb_query_wmm(const struct fwdb_header *db, return -ENODATA; } -int reg_query_regdb_wmm(char *alpha2, int freq, u32 *dbptr, - struct ieee80211_wmm_rule *rule) +int reg_query_regdb_wmm(char *alpha2, int freq, struct ieee80211_reg_rule *rule) { const struct fwdb_header *hdr = regdb; const struct fwdb_country *country; @@ -924,8 +912,7 @@ int reg_query_regdb_wmm(char *alpha2, int freq, u32 *dbptr, country = &hdr->country[0]; while (country->coll_ptr) { if (alpha2_equal(alpha2, country->alpha2)) - return __regdb_query_wmm(regdb, country, freq, dbptr, - rule); + return __regdb_query_wmm(regdb, country, freq, rule); country++; } @@ -934,32 +921,13 @@ int reg_query_regdb_wmm(char *alpha2, int freq, u32 *dbptr, } EXPORT_SYMBOL(reg_query_regdb_wmm); -struct wmm_ptrs { - struct ieee80211_wmm_rule *rule; - u32 ptr; -}; - -static struct ieee80211_wmm_rule *find_wmm_ptr(struct wmm_ptrs *wmm_ptrs, - u32 wmm_ptr, int n_wmms) -{ - int i; - - for (i = 0; i < n_wmms; i++) { - if (wmm_ptrs[i].ptr == wmm_ptr) - return wmm_ptrs[i].rule; - } - return NULL; -} - static int regdb_query_country(const struct fwdb_header *db, const struct fwdb_country *country) { unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2; struct fwdb_collection *coll = (void *)((u8 *)db + ptr); struct ieee80211_regdomain *regdom; - struct ieee80211_regdomain *tmp_rd; - unsigned int size_of_regd, i, n_wmms = 0; - struct wmm_ptrs *wmm_ptrs; + unsigned int size_of_regd, i; size_of_regd = sizeof(struct ieee80211_regdomain) + coll->n_rules * sizeof(struct ieee80211_reg_rule); @@ -968,12 +936,6 @@ static int regdb_query_country(const struct fwdb_header *db, if (!regdom) return -ENOMEM; - wmm_ptrs = kcalloc(coll->n_rules, sizeof(*wmm_ptrs), GFP_KERNEL); - if (!wmm_ptrs) { - kfree(regdom); - return -ENOMEM; - } - regdom->n_reg_rules = coll->n_rules; regdom->alpha2[0] = country->alpha2[0]; regdom->alpha2[1] = country->alpha2[1]; @@ -1012,37 +974,11 @@ static int regdb_query_country(const struct fwdb_header *db, 1000 * be16_to_cpu(rule->cac_timeout); if (rule->len >= offsetofend(struct fwdb_rule, wmm_ptr)) { u32 wmm_ptr = be16_to_cpu(rule->wmm_ptr) << 2; - struct ieee80211_wmm_rule *wmm_pos = - find_wmm_ptr(wmm_ptrs, wmm_ptr, n_wmms); - struct fwdb_wmm_rule *wmm; - struct ieee80211_wmm_rule *wmm_rule; + struct fwdb_wmm_rule *wmm = (void *)((u8 *)db + wmm_ptr); - if (wmm_pos) { - rrule->wmm_rule = wmm_pos; - continue; - } - wmm = (void *)((u8 *)db + wmm_ptr); - tmp_rd = krealloc(regdom, size_of_regd + (n_wmms + 1) * - sizeof(struct ieee80211_wmm_rule), - GFP_KERNEL); - - if (!tmp_rd) { - kfree(regdom); - kfree(wmm_ptrs); - return -ENOMEM; - } - regdom = tmp_rd; - - wmm_rule = (struct ieee80211_wmm_rule *) - ((u8 *)regdom + size_of_regd + n_wmms * - sizeof(struct ieee80211_wmm_rule)); - - set_wmm_rule(wmm_rule, wmm); - wmm_ptrs[n_wmms].ptr = wmm_ptr; - wmm_ptrs[n_wmms++].rule = wmm_rule; + set_wmm_rule(rrule, wmm); } } - kfree(wmm_ptrs); return reg_schedule_apply(regdom); } From 20932750d9c78d307e4f2f18f9c6a32b82b1e0e8 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 20 Aug 2018 13:56:07 +0300 Subject: [PATCH 116/242] mac80211: don't update the PM state of a peer upon a multicast frame I changed the way mac80211 updates the PM state of the peer. I forgot that we could also have multicast frames from the peer and that those frame should of course not change the PM state of the peer: A peer goes to power save when it needs to scan, but it won't send the broadcast Probe Request with the PM bit set. This made us mark the peer as awake when it wasn't and then Intel's firmware would fail to transmit because the peer is asleep according to its database. The driver warned about this and it looked like this: WARNING: CPU: 0 PID: 184 at /usr/src/linux-4.16.14/drivers/net/wireless/intel/iwlwifi/mvm/tx.c:1369 iwl_mvm_rx_tx_cmd+0x53b/0x860 CPU: 0 PID: 184 Comm: irq/124-iwlwifi Not tainted 4.16.14 #1 RIP: 0010:iwl_mvm_rx_tx_cmd+0x53b/0x860 Call Trace: iwl_pcie_rx_handle+0x220/0x880 iwl_pcie_irq_handler+0x6c9/0xa20 ? irq_forced_thread_fn+0x60/0x60 ? irq_thread_dtor+0x90/0x90 The relevant code that spits the WARNING is: case TX_STATUS_FAIL_DEST_PS: /* the FW should have stopped the queue and not * return this status */ WARN_ON(1); info->flags |= IEEE80211_TX_STAT_TX_FILTERED; This fixes https://bugzilla.kernel.org/show_bug.cgi?id=199967. Fixes: 9fef65443388 ("mac80211: always update the PM state of a peer on MGMT / DATA frames") Cc: #4.16+ Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 932985ca4e66..3f80a5ca4050 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1612,6 +1612,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) */ if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) && !ieee80211_has_morefrags(hdr->frame_control) && + !is_multicast_ether_addr(hdr->addr1) && (ieee80211_is_mgmt(hdr->frame_control) || ieee80211_is_data(hdr->frame_control)) && !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && From 3a2af7cccbbaf2362db9053a946a6084e12bfa73 Mon Sep 17 00:00:00 2001 From: Jinbum Park Date: Tue, 31 Jul 2018 23:10:40 +0900 Subject: [PATCH 117/242] mac80211_hwsim: Fix possible Spectre-v1 for hwsim_world_regdom_custom User controls @idx which to be used as index of hwsim_world_regdom_custom. So, It can be exploited via Spectre-like attack. (speculative execution) This kind of attack leaks address of hwsim_world_regdom_custom, It leads an attacker to bypass security mechanism such as KASLR. So sanitize @idx before using it to prevent attack. I leveraged strategy [1] to find and exploit this gadget. [1] https://github.com/jinb-park/linux-exploit/tree/master/exploit-remaining-spectre-gadget/ Signed-off-by: Jinbum Park [johannes: unwrap URL] Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 7d0b460868f9..80e2c8595c7c 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "mac80211_hwsim.h" #define WARN_QUEUE 100 @@ -3229,6 +3230,9 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) kfree(hwname); return -EINVAL; } + + idx = array_index_nospec(idx, + ARRAY_SIZE(hwsim_world_regdom_custom)); param.regd = hwsim_world_regdom_custom[idx]; } From d3c89bbc7491d5e288ca2993e999d24ba9ff52ad Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Tue, 21 Aug 2018 09:22:19 +0300 Subject: [PATCH 118/242] nl80211: Fix nla_put_u8 to u16 for NL80211_WMMR_TXOP TXOP (also known as Channel Occupancy Time) is u16 and should be added using nla_put_u16 instead of u8, fix that. Fixes: 50f32718e125 ("nl80211: Add wmm rule attribute to NL80211_CMD_GET_WIPHY dump command") Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e3dcffd96919..3f7ffbe6c634 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -672,8 +672,8 @@ static int nl80211_msg_put_wmm_rules(struct sk_buff *msg, rule->wmm_rule.client[j].cw_max) || nla_put_u8(msg, NL80211_WMMR_AIFSN, rule->wmm_rule.client[j].aifsn) || - nla_put_u8(msg, NL80211_WMMR_TXOP, - rule->wmm_rule.client[j].cot)) + nla_put_u16(msg, NL80211_WMMR_TXOP, + rule->wmm_rule.client[j].cot)) goto nla_put_failure; nla_nest_end(msg, nl_wmm_rule); From b88d26d97c41680f7327e5fb8061ad0037877f40 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Tue, 21 Aug 2018 09:22:20 +0300 Subject: [PATCH 119/242] nl80211: Pass center frequency in kHz instead of MHz freq_reg_info expects to get the frequency in kHz. Instead we accidently pass it in MHz. Thus, currently the function always return ERR rule. Fix that. Fixes: 50f32718e125 ("nl80211: Add wmm rule attribute to NL80211_CMD_GET_WIPHY dump command") Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho [fix kHz/MHz in commit message] Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3f7ffbe6c634..ce0149a86c13 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -764,7 +764,7 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, if (large) { const struct ieee80211_reg_rule *rule = - freq_reg_info(wiphy, chan->center_freq); + freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq)); if (!IS_ERR_OR_NULL(rule) && rule->has_wmm) { if (nl80211_msg_put_wmm_rules(msg, rule)) From 496f3347d834aec91c38b45d6249ed00f58ad233 Mon Sep 17 00:00:00 2001 From: Neeraj Dantu Date: Tue, 28 Aug 2018 16:37:58 +0000 Subject: [PATCH 120/242] ARM: dts: Fix file permission for am335x-osd3358-sm-red.dts Fix wrong mode for dts file added by commit bb3e3fbbac86 ("ARM: dts: Add DT support for Octavo Systems OSD3358-SM-RED based on TI AM335x"). Signed-off-by: Neeraj Dantu CC: Robert Nelson CC: Jason Kridner Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-osd3358-sm-red.dts | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 arch/arm/boot/dts/am335x-osd3358-sm-red.dts diff --git a/arch/arm/boot/dts/am335x-osd3358-sm-red.dts b/arch/arm/boot/dts/am335x-osd3358-sm-red.dts old mode 100755 new mode 100644 From 2d59bb602314a4b2593fde267734266b5e872dd0 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 27 Aug 2018 19:18:21 -0700 Subject: [PATCH 121/242] ARM: dts: omap4-droid4: Fix emmc errors seen on some devices Otherwise we can get the following errors occasionally on some devices: mmc1: tried to HW reset card, got error -110 mmcblk1: error -110 requesting status mmcblk1: recovery failed! print_req_error: I/O error, dev mmcblk1, sector 14329 ... I have one device that hits this error almost on every boot, and another one that hits it only rarely with the other ones I've used behave without problems. I'm not sure if the issue is related to a particular eMMC card model, but in case it is, both of the machines with issues have: # cat /sys/class/mmc_host/mmc1/mmc1:0001/manfid \ /sys/class/mmc_host/mmc1/mmc1:0001/oemid \ /sys/class/mmc_host/mmc1/mmc1:0001/name 0x000045 0x0100 SEM16G and the working ones have: 0x000011 0x0100 016G92 Note that "ti,non-removable" is different as omap_hsmmc_reg_get() does not call omap_hsmmc_disable_boot_regulators() if no_regulator_off_init is set. And currently we set no_regulator_off_init only for "ti,non-removable" and not for "non-removable". It seems that we should have "non-removable" with some other mmc generic property behave in the same way instead of having to use a non-generic property. But let's fix the issue first. Fixes: 7e2f8c0ae670 ("ARM: dts: Add minimal support for motorola droid 4 xt894") Cc: Marcel Partap Cc: Merlijn Wajer Cc: Michael Scott Cc: NeKit Cc: Pavel Machek Cc: Sebastian Reichel Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap4-droid4-xt894.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts index 3c26a4bbc340..04758a2a87f0 100644 --- a/arch/arm/boot/dts/omap4-droid4-xt894.dts +++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts @@ -354,7 +354,7 @@ &mmc2 { vmmc-supply = <&vsdio>; bus-width = <8>; - non-removable; + ti,non-removable; }; &mmc3 { From 6ddd9769db4fc11a98bd7e58be1764e47fdb8384 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Tue, 28 Aug 2018 20:52:40 +0800 Subject: [PATCH 122/242] drm/amdgpu: Need to set moved to true when evict bo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the VMC page fault when the running sequence is as below: 1.amdgpu_gem_create_ioctl 2.ttm_bo_swapout->amdgpu_vm_bo_invalidate, as not called amdgpu_vm_bo_base_init, so won't called list_add_tail(&base->bo_list, &bo->va). Even the bo was evicted, it won't set the bo_base->moved. 3.drm_gem_open_ioctl->amdgpu_vm_bo_base_init, here only called list_move_tail(&base->vm_status, &vm->evicted), but not set the bo_base->moved. 4.amdgpu_vm_bo_map->amdgpu_vm_bo_insert_map, as the bo_base->moved is not set true, the function amdgpu_vm_bo_insert_map will call list_move(&bo_va->base.vm_status, &vm->moved) 5.amdgpu_cs_ioctl won't validate the swapout bo, as it is only in the moved list, not in the evict list. So VMC page fault occurs. Signed-off-by: Emily Deng Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d174d50e3bd3..b17771dd5ce7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -172,6 +172,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, * is validated on next vm use to avoid fault. * */ list_move_tail(&base->vm_status, &vm->evicted); + base->moved = true; } /** From 5b2695fd4b20f9b8320e9ecbfc232842bacf5b6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 19 Jul 2018 21:21:57 +0300 Subject: [PATCH 123/242] drm/i915: Fix glk/cnl display w/a #1175 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The workaround was supposed to look at the plane destination coordinates. Currently it's looking at some mixture of src and dst coordinates that doesn't make sense. Fix it up. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180719182214.4323-2-ville.syrjala@linux.intel.com Fixes: 394676f05bee (drm/i915: Add WA for planes ending close to left screen edge) Reviewed-by: Imre Deak (cherry picked from commit b1f1c2c11fc6c6cd3e361061e30f9b2839897b28) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_display.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ed3fa1c8a983..4a3c8ee9a973 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2988,6 +2988,7 @@ static int skl_check_main_surface(const struct intel_crtc_state *crtc_state, int w = drm_rect_width(&plane_state->base.src) >> 16; int h = drm_rect_height(&plane_state->base.src) >> 16; int dst_x = plane_state->base.dst.x1; + int dst_w = drm_rect_width(&plane_state->base.dst); int pipe_src_w = crtc_state->pipe_src_w; int max_width = skl_max_plane_width(fb, 0, rotation); int max_height = 4096; @@ -3009,10 +3010,10 @@ static int skl_check_main_surface(const struct intel_crtc_state *crtc_state, * screen may cause FIFO underflow and display corruption. */ if ((IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) && - (dst_x + w < 4 || dst_x > pipe_src_w - 4)) { + (dst_x + dst_w < 4 || dst_x > pipe_src_w - 4)) { DRM_DEBUG_KMS("requested plane X %s position %d invalid (valid range %d-%d)\n", - dst_x + w < 4 ? "end" : "start", - dst_x + w < 4 ? dst_x + w : dst_x, + dst_x + dst_w < 4 ? "end" : "start", + dst_x + dst_w < 4 ? dst_x + dst_w : dst_x, 4, pipe_src_w - 4); return -ERANGE; } From 1b1b1162745e5f9e5c6c095afc8081df3edabc50 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 23 Aug 2018 13:51:36 -0700 Subject: [PATCH 124/242] drm/i915: Free write_buf that we allocated with kzalloc. We use kzalloc to allocate the write_buf that we use for i2c transfer on hdcp write. But it seems that we are forgetting to free the memory that is not needed after i2c transfer is completed. Reported-by: Brian J Wood Fixes: 2320175feb74 ("drm/i915: Implement HDCP for HDMI") Cc: Ramalingam C Cc: Sean Paul Cc: Jani Nikula Cc: Rodrigo Vivi Cc: # v4.17+ Signed-off-by: Rodrigo Vivi Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180823205136.31310-1-rodrigo.vivi@intel.com (cherry picked from commit 62d3a8deaa10b8346d979d0dabde56c33b742afa) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_hdmi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index a9076402dcb0..192972a7d287 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -943,8 +943,12 @@ static int intel_hdmi_hdcp_write(struct intel_digital_port *intel_dig_port, ret = i2c_transfer(adapter, &msg, 1); if (ret == 1) - return 0; - return ret >= 0 ? -EIO : ret; + ret = 0; + else if (ret >= 0) + ret = -EIO; + + kfree(write_buf); + return ret; } static From 11f65ad111fa29de2d11929f773bf1e553d5b7c4 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Mon, 20 Aug 2018 15:47:57 -0700 Subject: [PATCH 125/242] dt-bindings: riscv,cpu-intc: Cleanups from a missed review I managed to miss one of Rob's code reviews on the mailing list . The patch has already been merged, so I'm submitting a fixup. Sorry! Fixes: b67bc7cb4088 ("dt-bindings: interrupt-controller: RISC-V local interrupt controller") Cc: Rob Herring Cc: Christoph Hellwig Cc: Karsten Merker Signed-off-by: Palmer Dabbelt --- .../interrupt-controller/riscv,cpu-intc.txt | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.txt index b0a8af51c388..265b223cd978 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.txt @@ -11,7 +11,7 @@ The RISC-V supervisor ISA manual specifies three interrupt sources that are attached to every HLIC: software interrupts, the timer interrupt, and external interrupts. Software interrupts are used to send IPIs between cores. The timer interrupt comes from an architecturally mandated real-time timer that is -controller via Supervisor Binary Interface (SBI) calls and CSR reads. External +controlled via Supervisor Binary Interface (SBI) calls and CSR reads. External interrupts connect all other device interrupts to the HLIC, which are routed via the platform-level interrupt controller (PLIC). @@ -25,7 +25,15 @@ in the system. Required properties: - compatible : "riscv,cpu-intc" -- #interrupt-cells : should be <1> +- #interrupt-cells : should be <1>. The interrupt sources are defined by the + RISC-V supervisor ISA manual, with only the following three interrupts being + defined for supervisor mode: + - Source 1 is the supervisor software interrupt, which can be sent by an SBI + call and is reserved for use by software. + - Source 5 is the supervisor timer interrupt, which can be configured by + SBI calls and implements a one-shot timer. + - Source 9 is the supervisor external interrupt, which chains to all other + device interrupts. - interrupt-controller : Identifies the node as an interrupt controller Furthermore, this interrupt-controller MUST be embedded inside the cpu @@ -38,7 +46,7 @@ An example device tree entry for a HLIC is show below. ... cpu1-intc: interrupt-controller { #interrupt-cells = <1>; - compatible = "riscv,cpu-intc", "sifive,fu540-c000-cpu-intc"; + compatible = "sifive,fu540-c000-cpu-intc", "riscv,cpu-intc"; interrupt-controller; }; }; From 0ce5671c4450527f90d2bfb31302f78580587983 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 24 Aug 2018 11:22:55 -0700 Subject: [PATCH 126/242] riscv: tlb: Provide definition of tlb_flush() before including tlb.h As of commit fd1102f0aade ("mm: mmu_notifier fix for tlb_end_vma"), asm-generic/tlb.h now calls tlb_flush() from a static inline function, so we need to make sure that it's declared before #including the asm-generic header in the arch header. Reported-by: Guenter Roeck Fixes: fd1102f0aade ("mm: mmu_notifier fix for tlb_end_vma") Signed-off-by: Will Deacon [groeck: Use forward declaration instead of moving inline function] Signed-off-by: Guenter Roeck Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/tlb.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h index c229509288ea..439dc7072e05 100644 --- a/arch/riscv/include/asm/tlb.h +++ b/arch/riscv/include/asm/tlb.h @@ -14,6 +14,10 @@ #ifndef _ASM_RISCV_TLB_H #define _ASM_RISCV_TLB_H +struct mmu_gather; + +static void tlb_flush(struct mmu_gather *tlb); + #include static inline void tlb_flush(struct mmu_gather *tlb) From 47d80a68f10d3290204a12f7836a9a8190dfc327 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 28 Aug 2018 09:37:16 -0700 Subject: [PATCH 127/242] RISC-V: Use a less ugly workaround for unused variable warnings Thanks to Christoph Hellwig for pointing out a cleaner way to do this, as my approach was quite ugly. CC: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sys_riscv.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 568026ccf6e8..fb03a4482ad6 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -65,24 +65,11 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end, uintptr_t, flags) { -#ifdef CONFIG_SMP - struct mm_struct *mm = current->mm; - bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0; -#endif - /* Check the reserved flags. */ if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL)) return -EINVAL; - /* - * Without CONFIG_SMP flush_icache_mm is a just a flush_icache_all(), - * which generates unused variable warnings all over this function. - */ -#ifdef CONFIG_SMP - flush_icache_mm(mm, local); -#else - flush_icache_all(); -#endif + flush_icache_mm(current->mm, flags & SYS_RISCV_FLUSH_ICACHE_LOCAL); return 0; } From 076e2cedd6ea4786569c35f8725b4efdc1ecf2f2 Mon Sep 17 00:00:00 2001 From: Joe Jin Date: Tue, 28 Aug 2018 07:56:08 -0700 Subject: [PATCH 128/242] xen: export device state to sysfs Export device state to sysfs to allow for easier get device state. Signed-off-by: Joe Jin Reviewed-by: Boris Ostrovsky Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Konrad Rzeszutek Wilk Signed-off-by: Boris Ostrovsky --- Documentation/ABI/stable/sysfs-bus-xen-backend | 9 +++++++++ drivers/xen/xenbus/xenbus_probe.c | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend b/Documentation/ABI/stable/sysfs-bus-xen-backend index 3d5951c8bf5f..e8b60bd766f7 100644 --- a/Documentation/ABI/stable/sysfs-bus-xen-backend +++ b/Documentation/ABI/stable/sysfs-bus-xen-backend @@ -73,3 +73,12 @@ KernelVersion: 3.0 Contact: Konrad Rzeszutek Wilk Description: Number of sectors written by the frontend. + +What: /sys/bus/xen-backend/devices/*/state +Date: August 2018 +KernelVersion: 4.19 +Contact: Joe Jin +Description: + The state of the device. One of: 'Unknown', + 'Initialising', 'Initialised', 'Connected', 'Closing', + 'Closed', 'Reconfiguring', 'Reconfigured'. diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index f2088838f690..5b471889d723 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -402,10 +402,19 @@ static ssize_t modalias_show(struct device *dev, } static DEVICE_ATTR_RO(modalias); +static ssize_t state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", + xenbus_strstate(to_xenbus_device(dev)->state)); +} +static DEVICE_ATTR_RO(state); + static struct attribute *xenbus_dev_attrs[] = { &dev_attr_nodename.attr, &dev_attr_devtype.attr, &dev_attr_modalias.attr, + &dev_attr_state.attr, NULL, }; From 6d3c8ce012cefbdd73a3bba3c7f9a44af1c6a0bb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 28 Aug 2018 17:10:46 +0100 Subject: [PATCH 129/242] x86/xen: remove redundant variable save_pud Variable save_pud is being assigned but is never used hence it is redundant and can be removed. Cleans up clang warning: variable 'save_pud' set but not used [-Wunused-but-set-variable] Signed-off-by: Colin Ian King Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- arch/x86/xen/mmu_pv.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 9396b4d17064..ede298c183e9 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2059,7 +2059,6 @@ void __init xen_relocate_p2m(void) pud_t *pud; pgd_t *pgd; unsigned long *new_p2m; - int save_pud; size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); n_pte = roundup(size, PAGE_SIZE) >> PAGE_SHIFT; @@ -2089,7 +2088,6 @@ void __init xen_relocate_p2m(void) pgd = __va(read_cr3_pa()); new_p2m = (unsigned long *)(2 * PGDIR_SIZE); - save_pud = n_pud; for (idx_pud = 0; idx_pud < n_pud; idx_pud++) { pud = early_memremap(pud_phys, PAGE_SIZE); clear_page(pud); From 450b6b9b169382205f88858541a8b79830262ce7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 23 Aug 2018 18:06:54 -0500 Subject: [PATCH 130/242] clk: npcm7xx: fix memory allocation One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; void *entry[]; }; instance = kzalloc(sizeof(struct foo) + sizeof(void *) * count, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL); Notice that, currently, there is a bug during the allocation: sizeof(npcm7xx_clk_data) should be sizeof(*npcm7xx_clk_data) Fix this bug by using struct_size() in kzalloc() This issue was detected with the help of Coccinelle. Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Reviewed-by: Avi Fishman Signed-off-by: Stephen Boyd --- drivers/clk/clk-npcm7xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/clk-npcm7xx.c b/drivers/clk/clk-npcm7xx.c index 740af90a9508..c5edf8f2fd19 100644 --- a/drivers/clk/clk-npcm7xx.c +++ b/drivers/clk/clk-npcm7xx.c @@ -558,8 +558,8 @@ static void __init npcm7xx_clk_init(struct device_node *clk_np) if (!clk_base) goto npcm7xx_init_error; - npcm7xx_clk_data = kzalloc(sizeof(*npcm7xx_clk_data->hws) * - NPCM7XX_NUM_CLOCKS + sizeof(npcm7xx_clk_data), GFP_KERNEL); + npcm7xx_clk_data = kzalloc(struct_size(npcm7xx_clk_data, hws, + NPCM7XX_NUM_CLOCKS), GFP_KERNEL); if (!npcm7xx_clk_data) goto npcm7xx_init_np_err; From 5b24109b0563d45094c470684c1f8cea1af269f8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 28 Aug 2018 16:15:35 +0200 Subject: [PATCH 131/242] bpf: fix several offset tests in bpf_msg_pull_data While recently going over bpf_msg_pull_data(), I noticed three issues which are fixed in here: 1) When we attempt to find the first scatterlist element (sge) for the start offset, we add len to the offset before we check for start < offset + len, whereas it should come after when we iterate to the next sge to accumulate the offsets. For example, given a start offset of 12 with a sge length of 8 for the first sge in the list would lead us to determine this sge as the first sge thinking it covers first 16 bytes where start is located, whereas start sits in subsequent sges so we would end up pulling in the wrong data. 2) After figuring out the starting sge, we have a short-cut test in !msg->sg_copy[i] && bytes <= len. This checks whether it's not needed to make the page at the sge private where we can just exit by updating msg->data and msg->data_end. However, the length test is not fully correct. bytes <= len checks whether the requested bytes (end - start offsets) fit into the sge's length. The part that is missing is that start must not be sge length aligned. Meaning, the start offset into the sge needs to be accounted as well on top of the requested bytes as otherwise we can access the sge out of bounds. For example the sge could have length of 8, our requested bytes could have length of 8, but at a start offset of 4, so we also would need to pull in 4 bytes of the next sge, when we jump to the out label we do set msg->data to sg_virt(&sg[i]) + start - offset and msg->data_end to msg->data + bytes which would be oob. 3) The subsequent bytes < copy test for finding the last sge has the same issue as in point 2) but also it tests for less than rather than less or equal to. Meaning if the sge length is of 8 and requested bytes of 8 while having the start aligned with the sge, we would unnecessarily go and pull in the next sge as well to make it private. Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- net/core/filter.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 7a2430945c71..ec4d67c0cf0c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2286,10 +2286,10 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags) { unsigned int len = 0, offset = 0, copy = 0; + int bytes = end - start, bytes_sg_total; struct scatterlist *sg = msg->sg_data; int first_sg, last_sg, i, shift; unsigned char *p, *to, *from; - int bytes = end - start; struct page *page; if (unlikely(flags || end <= start)) @@ -2299,9 +2299,9 @@ BPF_CALL_4(bpf_msg_pull_data, i = msg->sg_start; do { len = sg[i].length; - offset += len; if (start < offset + len) break; + offset += len; i++; if (i == MAX_SKB_FRAGS) i = 0; @@ -2310,7 +2310,11 @@ BPF_CALL_4(bpf_msg_pull_data, if (unlikely(start >= offset + len)) return -EINVAL; - if (!msg->sg_copy[i] && bytes <= len) + /* The start may point into the sg element so we need to also + * account for the headroom. + */ + bytes_sg_total = start - offset + bytes; + if (!msg->sg_copy[i] && bytes_sg_total <= len) goto out; first_sg = i; @@ -2330,12 +2334,12 @@ BPF_CALL_4(bpf_msg_pull_data, i++; if (i == MAX_SKB_FRAGS) i = 0; - if (bytes < copy) + if (bytes_sg_total <= copy) break; } while (i != msg->sg_end); last_sg = i; - if (unlikely(copy < end - start)) + if (unlikely(bytes_sg_total > copy)) return -EINVAL; page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy)); From 1f631c3201fe5491808df143d8fcba81b3197ffd Mon Sep 17 00:00:00 2001 From: Yuan-Chi Pang Date: Wed, 29 Aug 2018 09:30:08 +0800 Subject: [PATCH 132/242] mac80211: mesh: fix HWMP sequence numbering to follow standard IEEE 802.11-2016 14.10.8.3 HWMP sequence numbering says: If it is a target mesh STA, it shall update its own HWMP SN to maximum (current HWMP SN, target HWMP SN in the PREQ element) + 1 immediately before it generates a PREP element in response to a PREQ element. Signed-off-by: Yuan-Chi Pang Signed-off-by: Johannes Berg --- net/mac80211/mesh_hwmp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 35ad3983ae4b..daf9db3c8f24 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -572,6 +572,10 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, forward = false; reply = true; target_metric = 0; + + if (SN_GT(target_sn, ifmsh->sn)) + ifmsh->sn = target_sn; + if (time_after(jiffies, ifmsh->last_sn_update + net_traversal_jiffies(sdata)) || time_before(jiffies, ifmsh->last_sn_update)) { From 166ac9d55b0ab70b644e429be1f217fe8393cbd7 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 29 Aug 2018 08:57:02 +0200 Subject: [PATCH 133/242] mac80211: avoid kernel panic when building AMSDU from non-linear SKB When building building AMSDU from non-linear SKB, we hit a kernel panic when trying to push the padding to the tail. Instead, put the padding at the head of the next subframe. This also fixes the A-MSDU subframes to not have the padding accounted in the length field and not have pad at all for the last subframe, both required by the spec. Fixes: 6e0456b54545 ("mac80211: add A-MSDU tx support") Signed-off-by: Sara Sharon Reviewed-by: Lorenzo Bianconi Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index fa1f1e63a264..667a73d6eb5c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3073,27 +3073,18 @@ void ieee80211_clear_fast_xmit(struct sta_info *sta) } static bool ieee80211_amsdu_realloc_pad(struct ieee80211_local *local, - struct sk_buff *skb, int headroom, - int *subframe_len) + struct sk_buff *skb, int headroom) { - int amsdu_len = *subframe_len + sizeof(struct ethhdr); - int padding = (4 - amsdu_len) & 3; - - if (skb_headroom(skb) < headroom || skb_tailroom(skb) < padding) { + if (skb_headroom(skb) < headroom) { I802_DEBUG_INC(local->tx_expand_skb_head); - if (pskb_expand_head(skb, headroom, padding, GFP_ATOMIC)) { + if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) { wiphy_debug(local->hw.wiphy, "failed to reallocate TX buffer\n"); return false; } } - if (padding) { - *subframe_len += padding; - skb_put_zero(skb, padding); - } - return true; } @@ -3117,8 +3108,7 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata, if (info->control.flags & IEEE80211_TX_CTRL_AMSDU) return true; - if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr), - &subframe_len)) + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr))) return false; data = skb_push(skb, sizeof(*amsdu_hdr)); @@ -3184,7 +3174,8 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, void *data; bool ret = false; unsigned int orig_len; - int n = 1, nfrags; + int n = 1, nfrags, pad = 0; + u16 hdrlen; if (!ieee80211_hw_check(&local->hw, TX_AMSDU)) return false; @@ -3235,8 +3226,19 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (max_frags && nfrags > max_frags) goto out; - if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) + 2, - &subframe_len)) + /* + * Pad out the previous subframe to a multiple of 4 by adding the + * padding to the next one, that's being added. Note that head->len + * is the length of the full A-MSDU, but that works since each time + * we add a new subframe we pad out the previous one to a multiple + * of 4 and thus it no longer matters in the next round. + */ + hdrlen = fast_tx->hdr_len - sizeof(rfc1042_header); + if ((head->len - hdrlen) & 3) + pad = 4 - ((head->len - hdrlen) & 3); + + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) + + 2 + pad)) goto out; ret = true; @@ -3248,6 +3250,8 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, memcpy(data, &len, 2); memcpy(data + 2, rfc1042_header, sizeof(rfc1042_header)); + memset(skb_push(skb, pad), 0, pad); + head->len += skb->len; head->data_len += skb->len; *frag_tail = skb; From afce0cc9ad8aa510650e781a51e43c26e2a34cf6 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Tue, 14 Aug 2018 11:33:32 +0100 Subject: [PATCH 134/242] Documentation/arm64/sve: Couple of improvements and typos - Fix mismatch between SVE registers (Z) and FPSIMD register (V) - Don't prefix the path for [3] with Linux to stay consistent with [1] and [2]. Signed-off-by: Julien Grall Signed-off-by: Will Deacon --- Documentation/arm64/sve.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/arm64/sve.txt b/Documentation/arm64/sve.txt index f128f736b4a5..7169a0ec41d8 100644 --- a/Documentation/arm64/sve.txt +++ b/Documentation/arm64/sve.txt @@ -200,7 +200,7 @@ prctl(PR_SVE_SET_VL, unsigned long arg) thread. * Changing the vector length causes all of P0..P15, FFR and all bits of - Z0..V31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become + Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become unspecified. Calling PR_SVE_SET_VL with vl equal to the thread's current vector length, or calling PR_SVE_SET_VL with the PR_SVE_SET_VL_ONEXEC flag, does not constitute a change to the vector length for this purpose. @@ -500,7 +500,7 @@ References [2] arch/arm64/include/uapi/asm/ptrace.h AArch64 Linux ptrace ABI definitions -[3] linux/Documentation/arm64/cpu-feature-registers.txt +[3] Documentation/arm64/cpu-feature-registers.txt [4] ARM IHI0055C http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055c/IHI0055C_beta_aapcs64.pdf From 1d8f574708a3fb6f18c85486d0c5217df893c0cf Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 24 Aug 2018 15:08:29 +0100 Subject: [PATCH 135/242] arm/arm64: smccc-1.1: Make return values unsigned long An unfortunate consequence of having a strong typing for the input values to the SMC call is that it also affects the type of the return values, limiting r0 to 32 bits and r{1,2,3} to whatever was passed as an input. Let's turn everything into "unsigned long", which satisfies the requirements of both architectures, and allows for the full range of return values. Reported-by: Julien Grall Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- include/linux/arm-smccc.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index ca1d2cc2cdfa..5a91ff33720b 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -199,31 +199,31 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #define __declare_arg_0(a0, res) \ struct arm_smccc_res *___res = res; \ - register u32 r0 asm("r0") = a0; \ + register unsigned long r0 asm("r0") = (u32)a0; \ register unsigned long r1 asm("r1"); \ register unsigned long r2 asm("r2"); \ register unsigned long r3 asm("r3") #define __declare_arg_1(a0, a1, res) \ struct arm_smccc_res *___res = res; \ - register u32 r0 asm("r0") = a0; \ - register typeof(a1) r1 asm("r1") = a1; \ + register unsigned long r0 asm("r0") = (u32)a0; \ + register unsigned long r1 asm("r1") = a1; \ register unsigned long r2 asm("r2"); \ register unsigned long r3 asm("r3") #define __declare_arg_2(a0, a1, a2, res) \ struct arm_smccc_res *___res = res; \ - register u32 r0 asm("r0") = a0; \ - register typeof(a1) r1 asm("r1") = a1; \ - register typeof(a2) r2 asm("r2") = a2; \ + register unsigned long r0 asm("r0") = (u32)a0; \ + register unsigned long r1 asm("r1") = a1; \ + register unsigned long r2 asm("r2") = a2; \ register unsigned long r3 asm("r3") #define __declare_arg_3(a0, a1, a2, a3, res) \ struct arm_smccc_res *___res = res; \ - register u32 r0 asm("r0") = a0; \ - register typeof(a1) r1 asm("r1") = a1; \ - register typeof(a2) r2 asm("r2") = a2; \ - register typeof(a3) r3 asm("r3") = a3 + register unsigned long r0 asm("r0") = (u32)a0; \ + register unsigned long r1 asm("r1") = a1; \ + register unsigned long r2 asm("r2") = a2; \ + register unsigned long r3 asm("r3") = a3 #define __declare_arg_4(a0, a1, a2, a3, a4, res) \ __declare_arg_3(a0, a1, a2, a3, res); \ From 36156f9241cb0f9e37d998052873ca7501ad4b36 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Aug 2018 10:21:45 +0200 Subject: [PATCH 136/242] of: add helper to lookup compatible child node Add of_get_compatible_child() helper that can be used to lookup compatible child nodes. Several drivers currently use of_find_compatible_node() to lookup child nodes while failing to notice that the of_find_ functions search the entire tree depth-first (from a given start node) and therefore can match unrelated nodes. The fact that these functions also drop a reference to the node they start searching from (e.g. the parent node) is typically also overlooked, something which can lead to use-after-free bugs. Signed-off-by: Johan Hovold Signed-off-by: Rob Herring --- drivers/of/base.c | 25 +++++++++++++++++++++++++ include/linux/of.h | 8 ++++++++ 2 files changed, 33 insertions(+) diff --git a/drivers/of/base.c b/drivers/of/base.c index 466e3c8582f0..bc420d2aa5f5 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -719,6 +719,31 @@ struct device_node *of_get_next_available_child(const struct device_node *node, } EXPORT_SYMBOL(of_get_next_available_child); +/** + * of_get_compatible_child - Find compatible child node + * @parent: parent node + * @compatible: compatible string + * + * Lookup child node whose compatible property contains the given compatible + * string. + * + * Returns a node pointer with refcount incremented, use of_node_put() on it + * when done; or NULL if not found. + */ +struct device_node *of_get_compatible_child(const struct device_node *parent, + const char *compatible) +{ + struct device_node *child; + + for_each_child_of_node(parent, child) { + if (of_device_is_compatible(child, compatible)) + break; + } + + return child; +} +EXPORT_SYMBOL(of_get_compatible_child); + /** * of_get_child_by_name - Find the child node by name for a given parent * @node: parent node diff --git a/include/linux/of.h b/include/linux/of.h index 4d25e4f952d9..b99a1a8c2952 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -290,6 +290,8 @@ extern struct device_node *of_get_next_child(const struct device_node *node, extern struct device_node *of_get_next_available_child( const struct device_node *node, struct device_node *prev); +extern struct device_node *of_get_compatible_child(const struct device_node *parent, + const char *compatible); extern struct device_node *of_get_child_by_name(const struct device_node *node, const char *name); @@ -632,6 +634,12 @@ static inline bool of_have_populated_dt(void) return false; } +static inline struct device_node *of_get_compatible_child(const struct device_node *parent, + const char *compatible) +{ + return NULL; +} + static inline struct device_node *of_get_child_by_name( const struct device_node *node, const char *name) From 8f3fafc9c2f0ece10832c25f7ffcb07c97a32ad4 Mon Sep 17 00:00:00 2001 From: Scott Bauer Date: Thu, 26 Apr 2018 11:51:08 -0600 Subject: [PATCH 137/242] cdrom: Fix info leak/OOB read in cdrom_ioctl_drive_status Like d88b6d04: "cdrom: information leak in cdrom_ioctl_media_changed()" There is another cast from unsigned long to int which causes a bounds check to fail with specially crafted input. The value is then used as an index in the slot array in cdrom_slot_status(). Signed-off-by: Scott Bauer Signed-off-by: Scott Bauer Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/cdrom/cdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 113fc6edb2b0..a5d5a96479bf 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2546,7 +2546,7 @@ static int cdrom_ioctl_drive_status(struct cdrom_device_info *cdi, if (!CDROM_CAN(CDC_SELECT_DISC) || (arg == CDSL_CURRENT || arg == CDSL_NONE)) return cdi->ops->drive_status(cdi, CDSL_CURRENT); - if (((int)arg >= cdi->capacity)) + if (arg >= cdi->capacity) return -EINVAL; return cdrom_slot_status(cdi, arg); } From 0e06b227c5221dd51b5569de93f3b9f532be4a32 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 29 Aug 2018 16:50:34 +0200 Subject: [PATCH 138/242] bpf: fix msg->data/data_end after sg shift repair in bpf_msg_pull_data In the current code, msg->data is set as sg_virt(&sg[i]) + start - offset and msg->data_end relative to it as msg->data + bytes. Using iterator i to point to the updated starting scatterlist element holds true for some cases, however not for all where we'd end up pointing out of bounds. It is /correct/ for these ones: 1) When first finding the starting scatterlist element (sge) where we find that the page is already privately owned by the msg and where the requested bytes and headroom fit into the sge's length. However, it's /incorrect/ for the following ones: 2) After we made the requested area private and updated the newly allocated page into first_sg slot of the scatterlist ring; when we find that no shift repair of the ring is needed where we bail out updating msg->data and msg->data_end. At that point i will point to last_sg, which in this case is the next elem of first_sg in the ring. The sge at that point might as well be invalid (e.g. i == msg->sg_end), which we use for setting the range of sg_virt(&sg[i]). The correct one would have been first_sg. 3) Similar as in 2) but when we find that a shift repair of the ring is needed. In this case we fix up all sges and stop once we've reached the end. In this case i will point to will point to the new msg->sg_end, and the sge at that point will be invalid. Again here the requested range sits in first_sg. Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- net/core/filter.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index ec4d67c0cf0c..b9225c55926a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2310,6 +2310,7 @@ BPF_CALL_4(bpf_msg_pull_data, if (unlikely(start >= offset + len)) return -EINVAL; + first_sg = i; /* The start may point into the sg element so we need to also * account for the headroom. */ @@ -2317,8 +2318,6 @@ BPF_CALL_4(bpf_msg_pull_data, if (!msg->sg_copy[i] && bytes_sg_total <= len) goto out; - first_sg = i; - /* At this point we need to linearize multiple scatterlist * elements or a single shared page. Either way we need to * copy into a linear buffer exclusively owned by BPF. Then @@ -2400,7 +2399,7 @@ BPF_CALL_4(bpf_msg_pull_data, if (msg->sg_end < 0) msg->sg_end += MAX_SKB_FRAGS; out: - msg->data = sg_virt(&sg[i]) + start - offset; + msg->data = sg_virt(&sg[first_sg]) + start - offset; msg->data_end = msg->data + bytes; return 0; From 2e43f95dd8ee62bc8bf57f2afac37fbd70c8d565 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 29 Aug 2018 16:50:35 +0200 Subject: [PATCH 139/242] bpf: fix shift upon scatterlist ring wrap-around in bpf_msg_pull_data If first_sg and last_sg wraps around in the scatterlist ring, then we need to account for that in the shift as well. E.g. crafting such msgs where this is the case leads to a hang as shift becomes negative. E.g. consider the following scenario: first_sg := 14 |=> shift := -12 msg->sg_start := 10 last_sg := 3 | msg->sg_end := 5 round 1: i := 15, move_from := 3, sg[15] := sg[ 3] round 2: i := 0, move_from := -12, sg[ 0] := sg[-12] round 3: i := 1, move_from := -11, sg[ 1] := sg[-11] round 4: i := 2, move_from := -10, sg[ 2] := sg[-10] [...] round 13: i := 11, move_from := -1, sg[ 2] := sg[ -1] round 14: i := 12, move_from := 0, sg[ 2] := sg[ 0] round 15: i := 13, move_from := 1, sg[ 2] := sg[ 1] round 16: i := 14, move_from := 2, sg[ 2] := sg[ 2] round 17: i := 15, move_from := 3, sg[ 2] := sg[ 3] [...] This means we will loop forever and never hit the msg->sg_end condition to break out of the loop. When we see that the ring wraps around, then the shift should be MAX_SKB_FRAGS - first_sg + last_sg - 1. Meaning, the remainder slots from the tail of the ring and the head until last_sg combined. Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- net/core/filter.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index b9225c55926a..43ba5f8c38ca 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2370,7 +2370,10 @@ BPF_CALL_4(bpf_msg_pull_data, * had a single entry though we can just replace it and * be done. Otherwise walk the ring and shift the entries. */ - shift = last_sg - first_sg - 1; + WARN_ON_ONCE(last_sg == first_sg); + shift = last_sg > first_sg ? + last_sg - first_sg - 1 : + MAX_SKB_FRAGS - first_sg + last_sg - 1; if (!shift) goto out; From a8cf76a9023bc6709b1361d06bb2fae5227b9d68 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 29 Aug 2018 16:50:36 +0200 Subject: [PATCH 140/242] bpf: fix sg shift repair start offset in bpf_msg_pull_data When we perform the sg shift repair for the scatterlist ring, we currently start out at i = first_sg + 1. However, this is not correct since the first_sg could point to the sge sitting at slot MAX_SKB_FRAGS - 1, and a subsequent i = MAX_SKB_FRAGS will access the scatterlist ring (sg) out of bounds. Add the sk_msg_iter_var() helper for iterating through the ring, and apply the same rule for advancing to the next ring element as we do elsewhere. Later work will use this helper also in other places. Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- net/core/filter.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 43ba5f8c38ca..2c7801f6737a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2282,6 +2282,13 @@ static const struct bpf_func_proto bpf_msg_cork_bytes_proto = { .arg2_type = ARG_ANYTHING, }; +#define sk_msg_iter_var(var) \ + do { \ + var++; \ + if (var == MAX_SKB_FRAGS) \ + var = 0; \ + } while (0) + BPF_CALL_4(bpf_msg_pull_data, struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags) { @@ -2302,9 +2309,7 @@ BPF_CALL_4(bpf_msg_pull_data, if (start < offset + len) break; offset += len; - i++; - if (i == MAX_SKB_FRAGS) - i = 0; + sk_msg_iter_var(i); } while (i != msg->sg_end); if (unlikely(start >= offset + len)) @@ -2330,9 +2335,7 @@ BPF_CALL_4(bpf_msg_pull_data, */ do { copy += sg[i].length; - i++; - if (i == MAX_SKB_FRAGS) - i = 0; + sk_msg_iter_var(i); if (bytes_sg_total <= copy) break; } while (i != msg->sg_end); @@ -2358,9 +2361,7 @@ BPF_CALL_4(bpf_msg_pull_data, sg[i].length = 0; put_page(sg_page(&sg[i])); - i++; - if (i == MAX_SKB_FRAGS) - i = 0; + sk_msg_iter_var(i); } while (i != last_sg); sg[first_sg].length = copy; @@ -2377,7 +2378,8 @@ BPF_CALL_4(bpf_msg_pull_data, if (!shift) goto out; - i = first_sg + 1; + i = first_sg; + sk_msg_iter_var(i); do { int move_from; @@ -2394,9 +2396,7 @@ BPF_CALL_4(bpf_msg_pull_data, sg[move_from].page_link = 0; sg[move_from].offset = 0; - i++; - if (i == MAX_SKB_FRAGS) - i = 0; + sk_msg_iter_var(i); } while (1); msg->sg_end -= shift; if (msg->sg_end < 0) From 74081c9f16a213f8f2681c175dc6ad7d17ad16ba Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Tue, 14 Aug 2018 13:33:48 +0100 Subject: [PATCH 141/242] dt-bindings: watchdog: renesas-wdt: Document r8a774a1 support RZ/G2M (R8A774A1) watchdog implementation is compatible with R-Car Gen3, therefore add relevant documentation. Signed-off-by: Fabrizio Castro Reviewed-by: Biju Das Reviewed-by: Rob Herring Reviewed-by: Simon Horman Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/devicetree/bindings/watchdog/renesas-wdt.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt b/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt index 5d47a262474c..9407212a85a8 100644 --- a/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt @@ -7,6 +7,7 @@ Required properties: Examples with soctypes are: - "renesas,r8a7743-wdt" (RZ/G1M) - "renesas,r8a7745-wdt" (RZ/G1E) + - "renesas,r8a774a1-wdt" (RZ/G2M) - "renesas,r8a7790-wdt" (R-Car H2) - "renesas,r8a7791-wdt" (R-Car M2-W) - "renesas,r8a7792-wdt" (R-Car V2H) @@ -21,8 +22,8 @@ Required properties: - "renesas,r7s72100-wdt" (RZ/A1) The generic compatible string must be: - "renesas,rza-wdt" for RZ/A - - "renesas,rcar-gen2-wdt" for R-Car Gen2 and RZ/G - - "renesas,rcar-gen3-wdt" for R-Car Gen3 + - "renesas,rcar-gen2-wdt" for R-Car Gen2 and RZ/G1 + - "renesas,rcar-gen3-wdt" for R-Car Gen3 and RZ/G2 - reg : Should contain WDT registers location and length - clocks : the clock feeding the watchdog timer. From 1dbd150d04f11a6e8d03c80132167e8d3391f8ce Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 27 Aug 2018 15:27:08 +0200 Subject: [PATCH 142/242] ARM: defconfig: Update the ARM Versatile defconfig This updates the ARM Versatile defconfig to the latest Kconfig structural changes and adds the DUMB VGA bridge driver so that VGA works out of the box, e.g. with QEMU. Signed-off-by: Linus Walleij Signed-off-by: Olof Johansson --- arch/arm/configs/versatile_defconfig | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arm/configs/versatile_defconfig b/arch/arm/configs/versatile_defconfig index df68dc4056e5..5282324c7cef 100644 --- a/arch/arm/configs/versatile_defconfig +++ b/arch/arm/configs/versatile_defconfig @@ -5,19 +5,19 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y CONFIG_SLAB=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_PARTITION_ADVANCED=y # CONFIG_ARCH_MULTI_V7 is not set CONFIG_ARCH_VERSATILE=y CONFIG_AEABI=y CONFIG_OABI_COMPAT=y -CONFIG_CMA=y CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_CMDLINE="root=1f03 mem=32M" CONFIG_FPE_NWFPE=y CONFIG_VFP=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_CMA=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -59,6 +59,7 @@ CONFIG_GPIO_PL061=y CONFIG_DRM=y CONFIG_DRM_PANEL_ARM_VERSATILE=y CONFIG_DRM_PANEL_SIMPLE=y +CONFIG_DRM_DUMB_VGA_DAC=y CONFIG_DRM_PL111=y CONFIG_FB_MODE_HELPERS=y CONFIG_BACKLIGHT_LCD_SUPPORT=y @@ -89,9 +90,10 @@ CONFIG_NFSD=y CONFIG_NFSD_V3=y CONFIG_NLS_CODEPAGE_850=m CONFIG_NLS_ISO8859_1=m +CONFIG_FONTS=y +CONFIG_FONT_ACORN_8x8=y +CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_USER=y CONFIG_DEBUG_LL=y -CONFIG_FONTS=y -CONFIG_FONT_ACORN_8x8=y From 67e6ddb5be440a92b82e87ca0ab8f973ae31b12c Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 27 Aug 2018 19:46:50 -0500 Subject: [PATCH 143/242] arm64: defconfig: Enable TI's AM6 SoC platform Enable K3 SoC platform for TI's AM6 SoC. Signed-off-by: Nishanth Menon Acked-by: Tony Lindgren Signed-off-by: Olof Johansson --- arch/arm64/configs/defconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index f67e8d5e93ad..db8d364f8476 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -38,6 +38,7 @@ CONFIG_ARCH_BCM_IPROC=y CONFIG_ARCH_BERLIN=y CONFIG_ARCH_BRCMSTB=y CONFIG_ARCH_EXYNOS=y +CONFIG_ARCH_K3=y CONFIG_ARCH_LAYERSCAPE=y CONFIG_ARCH_LG1K=y CONFIG_ARCH_HISI=y @@ -605,6 +606,8 @@ CONFIG_ARCH_TEGRA_132_SOC=y CONFIG_ARCH_TEGRA_210_SOC=y CONFIG_ARCH_TEGRA_186_SOC=y CONFIG_ARCH_TEGRA_194_SOC=y +CONFIG_ARCH_K3_AM6_SOC=y +CONFIG_SOC_TI=y CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y CONFIG_EXTCON_USB_GPIO=y CONFIG_EXTCON_USBC_CROS_EC=y From f013027e266553effa3e9d9d62236ae5ee3b25e7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Aug 2018 08:34:46 +0100 Subject: [PATCH 144/242] drm/i915: Stop holding a ref to the ppgtt from each vma The context owns both the ppgtt and the vma within it, and our activity tracking on the context ensures that we do not release active ppgtt. As the context fulfils our obligations for active memory tracking, we can relinquish the reference from the vma. This fixes a silly transient refleak from closed vma being kept alive until the entire system was idle, keeping all vm alive as well. Reported-by: Paulo Zanoni Testcase: igt/gem_ctx_create/files Fixes: 3365e2268b6b ("drm/i915: Lazily unbind vma on close") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Paulo Zanoni Reviewed-by: Mika Kuoppala Tested-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180816073448.19396-1-chris@chris-wilson.co.uk (cherry picked from commit a4417b7b419a68540ad7945ac4efbb39d19afa63) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_vma.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 11d834f94220..98358b4b36de 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -199,7 +199,6 @@ vma_create(struct drm_i915_gem_object *obj, vma->flags |= I915_VMA_GGTT; list_add(&vma->obj_link, &obj->vma_list); } else { - i915_ppgtt_get(i915_vm_to_ppgtt(vm)); list_add_tail(&vma->obj_link, &obj->vma_list); } @@ -807,9 +806,6 @@ static void __i915_vma_destroy(struct i915_vma *vma) if (vma->obj) rb_erase(&vma->obj_node, &vma->obj->vma_tree); - if (!i915_vma_is_ggtt(vma)) - i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); - rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) { GEM_BUG_ON(i915_gem_active_isset(&iter->base)); kfree(iter); From 299c2a904b1e8d5096d4813df6371357d97a6cd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20Sch=C3=B6n?= Date: Fri, 17 Aug 2018 22:07:28 +0200 Subject: [PATCH 145/242] drm/i915: Increase LSPCON timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 100 ms is not enough time for the LSPCON adapter on Intel NUC devices to settle. This causes dropped display modes at boot or screen reconfiguration. Empirical testing can reproduce the error up to a timeout of 190 ms. Basic boot and stress testing at 200 ms has not (yet) failed. Increase timeout to 400 ms to get some margin of error. Changes from v1: The initial suggestion of 1000 ms was lowered due to concerns about delaying valid timeout cases. Update patch metadata. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107503 Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1570392 Fixes: 357c0ae9198a ("drm/i915/lspcon: Wait for expected LSPCON mode to settle") Cc: Shashank Sharma Cc: Imre Deak Cc: Jani Nikula Cc: # v4.11+ Reviewed-by: Rodrigo Vivi Reviewed-by: Shashank Sharma Signed-off-by: Fredrik Schön Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180817200728.8154-1-fredrik.schon@gmail.com (cherry picked from commit 59f1c8ab30d6f9042562949f42cbd3f3cf69de94) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_lspcon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index 5dae16ccd9f1..3e085c5f2b81 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -74,7 +74,7 @@ static enum drm_lspcon_mode lspcon_wait_mode(struct intel_lspcon *lspcon, DRM_DEBUG_KMS("Waiting for LSPCON mode %s to settle\n", lspcon_mode_name(mode)); - wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode, 100); + wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode, 400); if (current_mode != mode) DRM_ERROR("LSPCON mode hasn't settled\n"); From 80ab316901bc4ae6dd0b5903dbe22766307eac9b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Aug 2018 11:02:41 +0100 Subject: [PATCH 146/242] drm/i915/audio: Hook up component bindings even if displays are disabled If the display has been disabled by modparam, we still want to connect together the HW bits and bobs with the associated drivers so that we can continue to manage their runtime power gating. Fixes: 108109444ff6 ("drm/i915: Check num_pipes before initializing audio component") Signed-off-by: Chris Wilson Cc: Imre Deak Cc: Takashi Iwai Cc: Jani Nikula Cc: Elaine Wang Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20180817100241.4628-1-chris@chris-wilson.co.uk (cherry picked from commit 35a5fd9ebfa93758ca579e30f337b6c9126d995b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_audio.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index b725835b47ef..769f3f586661 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -962,9 +962,6 @@ void i915_audio_component_init(struct drm_i915_private *dev_priv) { int ret; - if (INTEL_INFO(dev_priv)->num_pipes == 0) - return; - ret = component_add(dev_priv->drm.dev, &i915_audio_component_bind_ops); if (ret < 0) { DRM_ERROR("failed to add audio component (%d)\n", ret); From ff69279a44e9ba876466b7d3ab84d6dbd31cac92 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 29 Aug 2018 08:47:53 +0200 Subject: [PATCH 147/242] powerpc: disable support for relative ksymtab references The newly added code that emits ksymtab entries as pairs of 32-bit relative references interacts poorly with the way powerpc lays out its address space: when a module exports a per-CPU variable, the primary module region covering the ksymtab entry -and thus the 32-bit relative reference- is too far away from the actual per-CPU variable's base address (to which the per-CPU offsets are applied to obtain the respective address of each CPU's copy), resulting in corruption when the module loader attempts to resolve symbol references of modules that are loaded on top and link to the exported per-CPU symbol. So let's disable this feature on powerpc. Even though it implements CONFIG_RELOCATABLE, it does not implement CONFIG_RANDOMIZE_BASE and so KASLR kernels (which are the main target of the feature) do not exist on powerpc anyway. Reported-by: Andreas Schwab Suggested-by: Nicholas Piggin Signed-off-by: Ard Biesheuvel Signed-off-by: Linus Torvalds --- arch/powerpc/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index db0b6eebbfa5..a80669209155 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -177,7 +177,6 @@ config PPC select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT - select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_CBPF_JIT if !PPC64 From 9f2895461439fda2801a7906fb4c5fb3dbb37a0a Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 23 Aug 2018 19:49:54 +0300 Subject: [PATCH 148/242] vti6: remove !skb->ignore_df check from vti6_xmit() Before the commit d6990976af7c ("vti6: fix PMTU caching and reporting on xmit") '!skb->ignore_df' check was always true because the function skb_scrub_packet() was called before it, resetting ignore_df to zero. In the commit, skb_scrub_packet() was moved below, and now this check can be false for the packet, e.g. when sending it in the two fragments, this prevents successful PMTU updates in such case. The next attempts to send the packet lead to the same tx error. Moreover, vti6 initial MTU value relies on PMTU adjustments. This issue can be reproduced with the following LTP test script: udp_ipsec_vti.sh -6 -p ah -m tunnel -s 2000 Fixes: ccd740cbc6e0 ("vti6: Add pmtu handling to vti6_xmit.") Signed-off-by: Alexey Kodanev Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/ip6_vti.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 5095367c7204..eeaf7455d51e 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -481,7 +481,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) } mtu = dst_mtu(dst); - if (!skb->ignore_df && skb->len > mtu) { + if (skb->len > mtu) { skb_dst_update_pmtu(skb, mtu); if (skb->protocol == htons(ETH_P_IPV6)) { From bd583fe30427500a2d0abe25724025b1cb5e2636 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 23 Aug 2018 16:19:44 -0700 Subject: [PATCH 149/242] tipc: fix a missing rhashtable_walk_exit() rhashtable_walk_exit() must be paired with rhashtable_walk_enter(). Fixes: 40f9f4397060 ("tipc: Fix tipc_sk_reinit race conditions") Cc: Herbert Xu Cc: Ying Xue Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index c1e93c9515bc..c9a50b62c738 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2672,6 +2672,8 @@ void tipc_sk_reinit(struct net *net) rhashtable_walk_stop(&iter); } while (tsk == ERR_PTR(-EAGAIN)); + + rhashtable_walk_exit(&iter); } static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) From e5133f2f1261f8ab412e7fc5e3694c9f84328f89 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 24 Aug 2018 11:04:40 +0200 Subject: [PATCH 150/242] Revert "net: stmmac: Do not keep rearming the coalesce timer in stmmac_xmit" This reverts commit 4ae0169fd1b3c792b66be58995b7e6b629919ecf. This change in the handling of the coalesce timer is causing regression on (at least) amlogic platforms. Network will break down very quickly (a few seconds) after starting a download. This can easily be reproduced using iperf3 for example. The problem has been reported on the S805, S905, S912 and A113 SoCs (Realtek and Micrel PHYs) and it is likely impacting all Amlogics platforms using Gbit ethernet No problem was seen with the platform using 10/100 only PHYs (GXL internal) Reverting change brings things back to normal and allows to use network again until we better understand the problem with the coalesce timer. Cc: Jose Abreu Cc: Joao Pinto Cc: Vitor Soares Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Cc: Corentin Labbe Signed-off-by: Jerome Brunet Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 - drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 76649adf8fb0..c0a855b7ab3b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -112,7 +112,6 @@ struct stmmac_priv { u32 tx_count_frames; u32 tx_coal_frames; u32 tx_coal_timer; - bool tx_timer_armed; int tx_coalesce; int hwts_tx_en; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ff1ffb46198a..9f458bb16f2a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3147,16 +3147,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) * element in case of no SG. */ priv->tx_count_frames += nfrags + 1; - if (likely(priv->tx_coal_frames > priv->tx_count_frames) && - !priv->tx_timer_armed) { + if (likely(priv->tx_coal_frames > priv->tx_count_frames)) { mod_timer(&priv->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer)); - priv->tx_timer_armed = true; } else { priv->tx_count_frames = 0; stmmac_set_tx_ic(priv, desc); priv->xstats.tx_set_ic_bit++; - priv->tx_timer_armed = false; } skb_tx_timestamp(skb); From 9a07efa9aea2f4a59f35da0785a4e6a6b5a96192 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 24 Aug 2018 12:28:06 -0700 Subject: [PATCH 151/242] tipc: switch to rhashtable iterator syzbot reported a use-after-free in tipc_group_fill_sock_diag(), where tipc_group_fill_sock_diag() still reads tsk->group meanwhile tipc_group_delete() just deletes it in tipc_release(). tipc_nl_sk_walk() aims to lock this sock when walking each sock in the hash table to close race conditions with sock changes like this one, by acquiring tsk->sk.sk_lock.slock spinlock, unfortunately this doesn't work at all. All non-BH call path should take lock_sock() instead to make it work. tipc_nl_sk_walk() brutally iterates with raw rht_for_each_entry_rcu() where RCU read lock is required, this is the reason why lock_sock() can't be taken on this path. This could be resolved by switching to rhashtable iterator API's, where taking a sleepable lock is possible. Also, the iterator API's are friendly for restartable calls like diag dump, the last position is remembered behind the scence, all we need to do here is saving the iterator into cb->args[]. I tested this with parallel tipc diag dump and thousands of tipc socket creation and release, no crash or memory leak. Reported-by: syzbot+b9c8f3ab2994b7cd1625@syzkaller.appspotmail.com Cc: Jon Maloy Cc: Ying Xue Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/tipc/diag.c | 2 ++ net/tipc/netlink.c | 2 ++ net/tipc/socket.c | 80 ++++++++++++++++++++++++++++++---------------- net/tipc/socket.h | 2 ++ 4 files changed, 58 insertions(+), 28 deletions(-) diff --git a/net/tipc/diag.c b/net/tipc/diag.c index aaabb0b776dd..73137f4aeb68 100644 --- a/net/tipc/diag.c +++ b/net/tipc/diag.c @@ -84,7 +84,9 @@ static int tipc_sock_diag_handler_dump(struct sk_buff *skb, if (h->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { + .start = tipc_dump_start, .dump = tipc_diag_dump, + .done = tipc_dump_done, }; netlink_dump_start(net->diag_nlsk, skb, h, &c); return 0; diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 6ff2254088f6..99ee419210ba 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -167,7 +167,9 @@ static const struct genl_ops tipc_genl_v2_ops[] = { }, { .cmd = TIPC_NL_SOCK_GET, + .start = tipc_dump_start, .dumpit = tipc_nl_sk_dump, + .done = tipc_dump_done, .policy = tipc_nl_policy, }, { diff --git a/net/tipc/socket.c b/net/tipc/socket.c index c9a50b62c738..ab7a2a7178f7 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3229,45 +3229,69 @@ int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb, struct netlink_callback *cb, struct tipc_sock *tsk)) { - struct net *net = sock_net(skb->sk); - struct tipc_net *tn = tipc_net(net); - const struct bucket_table *tbl; - u32 prev_portid = cb->args[1]; - u32 tbl_id = cb->args[0]; - struct rhash_head *pos; + struct rhashtable_iter *iter = (void *)cb->args[0]; struct tipc_sock *tsk; int err; - rcu_read_lock(); - tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); - for (; tbl_id < tbl->size; tbl_id++) { - rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) { - spin_lock_bh(&tsk->sk.sk_lock.slock); - if (prev_portid && prev_portid != tsk->portid) { - spin_unlock_bh(&tsk->sk.sk_lock.slock); + rhashtable_walk_start(iter); + while ((tsk = rhashtable_walk_next(iter)) != NULL) { + if (IS_ERR(tsk)) { + err = PTR_ERR(tsk); + if (err == -EAGAIN) { + err = 0; continue; } - - err = skb_handler(skb, cb, tsk); - if (err) { - prev_portid = tsk->portid; - spin_unlock_bh(&tsk->sk.sk_lock.slock); - goto out; - } - - prev_portid = 0; - spin_unlock_bh(&tsk->sk.sk_lock.slock); + break; } - } -out: - rcu_read_unlock(); - cb->args[0] = tbl_id; - cb->args[1] = prev_portid; + sock_hold(&tsk->sk); + rhashtable_walk_stop(iter); + lock_sock(&tsk->sk); + err = skb_handler(skb, cb, tsk); + if (err) { + release_sock(&tsk->sk); + sock_put(&tsk->sk); + goto out; + } + release_sock(&tsk->sk); + rhashtable_walk_start(iter); + sock_put(&tsk->sk); + } + rhashtable_walk_stop(iter); +out: return skb->len; } EXPORT_SYMBOL(tipc_nl_sk_walk); +int tipc_dump_start(struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (void *)cb->args[0]; + struct net *net = sock_net(cb->skb->sk); + struct tipc_net *tn = tipc_net(net); + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[0] = (long)iter; + } + + rhashtable_walk_enter(&tn->sk_rht, iter); + return 0; +} +EXPORT_SYMBOL(tipc_dump_start); + +int tipc_dump_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *hti = (void *)cb->args[0]; + + rhashtable_walk_exit(hti); + kfree(hti); + return 0; +} +EXPORT_SYMBOL(tipc_dump_done); + int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, struct tipc_sock *tsk, u32 sk_filter_state, u64 (*tipc_diag_gen_cookie)(struct sock *sk)) diff --git a/net/tipc/socket.h b/net/tipc/socket.h index aff9b2ae5a1f..d43032e26532 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -68,4 +68,6 @@ int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb, int (*skb_handler)(struct sk_buff *skb, struct netlink_callback *cb, struct tipc_sock *tsk)); +int tipc_dump_start(struct netlink_callback *cb); +int tipc_dump_done(struct netlink_callback *cb); #endif From 05212ba8132b42047ab5d63d759c6f9c28e7eab5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 26 Aug 2018 17:03:09 +0300 Subject: [PATCH 152/242] r8169: set RxConfig after tx/rx is enabled for RTL8169sb/8110sb devices I have two Ethernet adapters: r8169 0000:03:01.0 eth0: RTL8169sb/8110sb, 00:14:d1:14:2d:49, XID 10000000, IRQ 18 r8169 0000:01:00.0 eth0: RTL8168e/8111e, 64:66:b3:11:14:5d, XID 2c200000, IRQ 30 And after upgrading from linux 4.15 [1] to linux 4.18+ [2] RTL8169sb failed to receive any packets. tcpdump shows a lot of checksum mismatch. [1]: a0f79386a4968b4925da6db2d1daffd0605a4402 [2]: 0519359784328bfa92bf0931bf0cff3b58c16932 (4.19 merge window opened) I started bisecting and the found that [3] breaks it. According to [4]: "For 8110S, 8110SB, and 8110SC series, the initial value of RxConfig needs to be set after the tx/rx is enabled." So I moved rtl_init_rxcfg() after enabling tx/rs and now my adapter works (RTL8168e works too). [3]: 3559d81e76bfe3803e89f2e04cf6ef7ab4f3aace [4]: e542a2269f232d61270ceddd42b73a4348dee2bb ("r8169: adjust the RxConfig settings.") Also drop "rx" from rtl_set_rx_tx_config_registers(), since it does nothing with it already. Fixes: 3559d81e76bfe3803e89f2e04cf6ef7ab4f3aace ("r8169: simplify rtl_hw_start_8169") Cc: Heiner Kallweit Cc: David S. Miller Cc: netdev@vger.kernel.org Cc: Realtek linux nic maintainers Signed-off-by: Azat Khuzhin Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 0efa977c422d..ac306797590e 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -4522,7 +4522,7 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp) rtl_hw_reset(tp); } -static void rtl_set_rx_tx_config_registers(struct rtl8169_private *tp) +static void rtl_set_tx_config_registers(struct rtl8169_private *tp) { /* Set DMA burst size and Interframe Gap Time */ RTL_W32(tp, TxConfig, (TX_DMA_BURST << TxDMAShift) | @@ -4633,12 +4633,14 @@ static void rtl_hw_start(struct rtl8169_private *tp) rtl_set_rx_max_size(tp); rtl_set_rx_tx_desc_registers(tp); - rtl_set_rx_tx_config_registers(tp); + rtl_set_tx_config_registers(tp); RTL_W8(tp, Cfg9346, Cfg9346_Lock); /* Initially a 10 us delay. Turned it into a PCI commit. - FR */ RTL_R8(tp, IntrMask); RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb); + rtl_init_rxcfg(tp); + rtl_set_rx_mode(tp->dev); /* no early-rx interrupts */ RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000); From 31fabbee8f5c658c3fa1603c66e9e4f51ea8c2c6 Mon Sep 17 00:00:00 2001 From: Peng Li Date: Mon, 27 Aug 2018 09:59:29 +0800 Subject: [PATCH 153/242] net: hns: add the code for cleaning pkt in chip If there are packets in hardware when changing the speed or duplex, it may cause hardware hang up. This patch adds the code for waiting chip to clean the all pkts(TX & RX) in chip when the driver uses the function named "adjust link". This patch cleans the pkts as follows: 1) close rx of chip, close tx of protocol stack. 2) wait rcb, ppe, mac to clean. 3) adjust link 4) open rx of chip, open tx of protocol stack. Signed-off-by: Peng Li Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hnae.h | 2 + .../net/ethernet/hisilicon/hns/hns_ae_adapt.c | 67 ++++++++++++++++++- .../ethernet/hisilicon/hns/hns_dsaf_gmac.c | 36 ++++++++++ .../net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 44 ++++++++++++ .../net/ethernet/hisilicon/hns/hns_dsaf_mac.h | 8 +++ .../ethernet/hisilicon/hns/hns_dsaf_main.c | 29 ++++++++ .../ethernet/hisilicon/hns/hns_dsaf_main.h | 3 + .../net/ethernet/hisilicon/hns/hns_dsaf_ppe.c | 23 +++++++ .../net/ethernet/hisilicon/hns/hns_dsaf_ppe.h | 1 + .../net/ethernet/hisilicon/hns/hns_dsaf_rcb.c | 23 +++++++ .../net/ethernet/hisilicon/hns/hns_dsaf_rcb.h | 1 + .../net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 1 + drivers/net/ethernet/hisilicon/hns/hns_enet.c | 21 +++++- 13 files changed, 255 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index cad52bd331f7..08a750fb60c4 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -486,6 +486,8 @@ struct hnae_ae_ops { u8 *auto_neg, u16 *speed, u8 *duplex); void (*toggle_ring_irq)(struct hnae_ring *ring, u32 val); void (*adjust_link)(struct hnae_handle *handle, int speed, int duplex); + bool (*need_adjust_link)(struct hnae_handle *handle, + int speed, int duplex); int (*set_loopback)(struct hnae_handle *handle, enum hnae_loop loop_mode, int en); void (*get_ring_bdnum_limit)(struct hnae_queue *queue, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index e6aad30e7e69..b52029e26d15 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -155,6 +155,41 @@ static void hns_ae_put_handle(struct hnae_handle *handle) hns_ae_get_ring_pair(handle->qs[i])->used_by_vf = 0; } +static int hns_ae_wait_flow_down(struct hnae_handle *handle) +{ + struct dsaf_device *dsaf_dev; + struct hns_ppe_cb *ppe_cb; + struct hnae_vf_cb *vf_cb; + int ret; + int i; + + for (i = 0; i < handle->q_num; i++) { + ret = hns_rcb_wait_tx_ring_clean(handle->qs[i]); + if (ret) + return ret; + } + + ppe_cb = hns_get_ppe_cb(handle); + ret = hns_ppe_wait_tx_fifo_clean(ppe_cb); + if (ret) + return ret; + + dsaf_dev = hns_ae_get_dsaf_dev(handle->dev); + if (!dsaf_dev) + return -EINVAL; + ret = hns_dsaf_wait_pkt_clean(dsaf_dev, handle->dport_id); + if (ret) + return ret; + + vf_cb = hns_ae_get_vf_cb(handle); + ret = hns_mac_wait_fifo_clean(vf_cb->mac_cb); + if (ret) + return ret; + + mdelay(10); + return 0; +} + static void hns_ae_ring_enable_all(struct hnae_handle *handle, int val) { int q_num = handle->q_num; @@ -399,12 +434,41 @@ static int hns_ae_get_mac_info(struct hnae_handle *handle, return hns_mac_get_port_info(mac_cb, auto_neg, speed, duplex); } +static bool hns_ae_need_adjust_link(struct hnae_handle *handle, int speed, + int duplex) +{ + struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); + + return hns_mac_need_adjust_link(mac_cb, speed, duplex); +} + static void hns_ae_adjust_link(struct hnae_handle *handle, int speed, int duplex) { struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); - hns_mac_adjust_link(mac_cb, speed, duplex); + switch (mac_cb->dsaf_dev->dsaf_ver) { + case AE_VERSION_1: + hns_mac_adjust_link(mac_cb, speed, duplex); + break; + + case AE_VERSION_2: + /* chip need to clear all pkt inside */ + hns_mac_disable(mac_cb, MAC_COMM_MODE_RX); + if (hns_ae_wait_flow_down(handle)) { + hns_mac_enable(mac_cb, MAC_COMM_MODE_RX); + break; + } + + hns_mac_adjust_link(mac_cb, speed, duplex); + hns_mac_enable(mac_cb, MAC_COMM_MODE_RX); + break; + + default: + break; + } + + return; } static void hns_ae_get_ring_bdnum_limit(struct hnae_queue *queue, @@ -902,6 +966,7 @@ static struct hnae_ae_ops hns_dsaf_ops = { .get_status = hns_ae_get_link_status, .get_info = hns_ae_get_mac_info, .adjust_link = hns_ae_adjust_link, + .need_adjust_link = hns_ae_need_adjust_link, .set_loopback = hns_ae_config_loopback, .get_ring_bdnum_limit = hns_ae_get_ring_bdnum_limit, .get_pauseparam = hns_ae_get_pauseparam, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index 5488c6e89f21..09e4061d1fa6 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -257,6 +257,16 @@ static void hns_gmac_get_pausefrm_cfg(void *mac_drv, u32 *rx_pause_en, *tx_pause_en = dsaf_get_bit(pause_en, GMAC_PAUSE_EN_TX_FDFC_B); } +static bool hns_gmac_need_adjust_link(void *mac_drv, enum mac_speed speed, + int duplex) +{ + struct mac_driver *drv = (struct mac_driver *)mac_drv; + struct hns_mac_cb *mac_cb = drv->mac_cb; + + return (mac_cb->speed != speed) || + (mac_cb->half_duplex == duplex); +} + static int hns_gmac_adjust_link(void *mac_drv, enum mac_speed speed, u32 full_duplex) { @@ -309,6 +319,30 @@ static void hns_gmac_set_promisc(void *mac_drv, u8 en) hns_gmac_set_uc_match(mac_drv, en); } +int hns_gmac_wait_fifo_clean(void *mac_drv) +{ + struct mac_driver *drv = (struct mac_driver *)mac_drv; + int wait_cnt; + u32 val; + + wait_cnt = 0; + while (wait_cnt++ < HNS_MAX_WAIT_CNT) { + val = dsaf_read_dev(drv, GMAC_FIFO_STATE_REG); + /* bit5~bit0 is not send complete pkts */ + if ((val & 0x3f) == 0) + break; + usleep_range(100, 200); + } + + if (wait_cnt >= HNS_MAX_WAIT_CNT) { + dev_err(drv->dev, + "hns ge %d fifo was not idle.\n", drv->mac_id); + return -EBUSY; + } + + return 0; +} + static void hns_gmac_init(void *mac_drv) { u32 port; @@ -690,6 +724,7 @@ void *hns_gmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param) mac_drv->mac_disable = hns_gmac_disable; mac_drv->mac_free = hns_gmac_free; mac_drv->adjust_link = hns_gmac_adjust_link; + mac_drv->need_adjust_link = hns_gmac_need_adjust_link; mac_drv->set_tx_auto_pause_frames = hns_gmac_set_tx_auto_pause_frames; mac_drv->config_max_frame_length = hns_gmac_config_max_frame_length; mac_drv->mac_pausefrm_cfg = hns_gmac_pause_frm_cfg; @@ -717,6 +752,7 @@ void *hns_gmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param) mac_drv->get_strings = hns_gmac_get_strings; mac_drv->update_stats = hns_gmac_update_stats; mac_drv->set_promiscuous = hns_gmac_set_promisc; + mac_drv->wait_fifo_clean = hns_gmac_wait_fifo_clean; return (void *)mac_drv; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 1c2326bd76e2..6ed6f142427e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -114,6 +114,26 @@ int hns_mac_get_port_info(struct hns_mac_cb *mac_cb, return 0; } +/** + *hns_mac_is_adjust_link - check is need change mac speed and duplex register + *@mac_cb: mac device + *@speed: phy device speed + *@duplex:phy device duplex + * + */ +bool hns_mac_need_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex) +{ + struct mac_driver *mac_ctrl_drv; + + mac_ctrl_drv = (struct mac_driver *)(mac_cb->priv.mac); + + if (mac_ctrl_drv->need_adjust_link) + return mac_ctrl_drv->need_adjust_link(mac_ctrl_drv, + (enum mac_speed)speed, duplex); + else + return true; +} + void hns_mac_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex) { int ret; @@ -430,6 +450,16 @@ int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, bool enable) return 0; } +int hns_mac_wait_fifo_clean(struct hns_mac_cb *mac_cb) +{ + struct mac_driver *drv = hns_mac_get_drv(mac_cb); + + if (drv->wait_fifo_clean) + return drv->wait_fifo_clean(drv); + + return 0; +} + void hns_mac_reset(struct hns_mac_cb *mac_cb) { struct mac_driver *drv = hns_mac_get_drv(mac_cb); @@ -998,6 +1028,20 @@ static int hns_mac_get_max_port_num(struct dsaf_device *dsaf_dev) return DSAF_MAX_PORT_NUM; } +void hns_mac_enable(struct hns_mac_cb *mac_cb, enum mac_commom_mode mode) +{ + struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb); + + mac_ctrl_drv->mac_enable(mac_cb->priv.mac, mode); +} + +void hns_mac_disable(struct hns_mac_cb *mac_cb, enum mac_commom_mode mode) +{ + struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb); + + mac_ctrl_drv->mac_disable(mac_cb->priv.mac, mode); +} + /** * hns_mac_init - init mac * @dsaf_dev: dsa fabric device struct pointer diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h index bbc0a98e7ca3..fbc75341bef7 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h @@ -356,6 +356,9 @@ struct mac_driver { /*adjust mac mode of port,include speed and duplex*/ int (*adjust_link)(void *mac_drv, enum mac_speed speed, u32 full_duplex); + /* need adjust link */ + bool (*need_adjust_link)(void *mac_drv, enum mac_speed speed, + int duplex); /* config autoegotaite mode of port*/ void (*set_an_mode)(void *mac_drv, u8 enable); /* config loopbank mode */ @@ -394,6 +397,7 @@ struct mac_driver { void (*get_info)(void *mac_drv, struct mac_info *mac_info); void (*update_stats)(void *mac_drv); + int (*wait_fifo_clean)(void *mac_drv); enum mac_mode mac_mode; u8 mac_id; @@ -427,6 +431,7 @@ void *hns_xgmac_config(struct hns_mac_cb *mac_cb, int hns_mac_init(struct dsaf_device *dsaf_dev); void mac_adjust_link(struct net_device *net_dev); +bool hns_mac_need_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex); void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status); int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, u32 vmid, char *addr); int hns_mac_set_multi(struct hns_mac_cb *mac_cb, @@ -463,5 +468,8 @@ int hns_mac_add_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id, int hns_mac_rm_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id, const unsigned char *addr); int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn); +void hns_mac_enable(struct hns_mac_cb *mac_cb, enum mac_commom_mode mode); +void hns_mac_disable(struct hns_mac_cb *mac_cb, enum mac_commom_mode mode); +int hns_mac_wait_fifo_clean(struct hns_mac_cb *mac_cb); #endif /* _HNS_DSAF_MAC_H */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index ca50c2553a9c..e557a4ef5996 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -2727,6 +2727,35 @@ void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev, soft_mac_entry->index = enable ? entry_index : DSAF_INVALID_ENTRY_IDX; } +int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port) +{ + u32 val, val_tmp; + int wait_cnt; + + if (port >= DSAF_SERVICE_NW_NUM) + return 0; + + wait_cnt = 0; + while (wait_cnt++ < HNS_MAX_WAIT_CNT) { + val = dsaf_read_dev(dsaf_dev, DSAF_VOQ_IN_PKT_NUM_0_REG + + (port + DSAF_XGE_NUM) * 0x40); + val_tmp = dsaf_read_dev(dsaf_dev, DSAF_VOQ_OUT_PKT_NUM_0_REG + + (port + DSAF_XGE_NUM) * 0x40); + if (val == val_tmp) + break; + + usleep_range(100, 200); + } + + if (wait_cnt >= HNS_MAX_WAIT_CNT) { + dev_err(dsaf_dev->dev, "hns dsaf clean wait timeout(%u - %u).\n", + val, val_tmp); + return -EBUSY; + } + + return 0; +} + /** * dsaf_probe - probo dsaf dev * @pdev: dasf platform device diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h index 4507e8222683..0e1cd99831a6 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h @@ -44,6 +44,8 @@ struct hns_mac_cb; #define DSAF_ROCE_CREDIT_CHN 8 #define DSAF_ROCE_CHAN_MODE 3 +#define HNS_MAX_WAIT_CNT 10000 + enum dsaf_roce_port_mode { DSAF_ROCE_6PORT_MODE, DSAF_ROCE_4PORT_MODE, @@ -463,5 +465,6 @@ int hns_dsaf_rm_mac_addr( int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev, u8 mac_id, u8 port_num); +int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port); #endif /* __HNS_DSAF_MAIN_H__ */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c index d160d8c9e45b..0942e4916d9d 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c @@ -275,6 +275,29 @@ static void hns_ppe_exc_irq_en(struct hns_ppe_cb *ppe_cb, int en) dsaf_write_dev(ppe_cb, PPE_INTEN_REG, msk_vlue & vld_msk); } +int hns_ppe_wait_tx_fifo_clean(struct hns_ppe_cb *ppe_cb) +{ + int wait_cnt; + u32 val; + + wait_cnt = 0; + while (wait_cnt++ < HNS_MAX_WAIT_CNT) { + val = dsaf_read_dev(ppe_cb, PPE_CURR_TX_FIFO0_REG) & 0x3ffU; + if (!val) + break; + + usleep_range(100, 200); + } + + if (wait_cnt >= HNS_MAX_WAIT_CNT) { + dev_err(ppe_cb->dev, "hns ppe tx fifo clean wait timeout, still has %u pkt.\n", + val); + return -EBUSY; + } + + return 0; +} + /** * ppe_init_hw - init ppe * @ppe_cb: ppe device diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h index 9d8e643e8aa6..f670e63a5a01 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h @@ -100,6 +100,7 @@ struct ppe_common_cb { }; +int hns_ppe_wait_tx_fifo_clean(struct hns_ppe_cb *ppe_cb); int hns_ppe_init(struct dsaf_device *dsaf_dev); void hns_ppe_uninit(struct dsaf_device *dsaf_dev); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c index 9d76e2e54f9d..5d64519b9b1d 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c @@ -66,6 +66,29 @@ void hns_rcb_wait_fbd_clean(struct hnae_queue **qs, int q_num, u32 flag) "queue(%d) wait fbd(%d) clean fail!!\n", i, fbd_num); } +int hns_rcb_wait_tx_ring_clean(struct hnae_queue *qs) +{ + u32 head, tail; + int wait_cnt; + + tail = dsaf_read_dev(&qs->tx_ring, RCB_REG_TAIL); + wait_cnt = 0; + while (wait_cnt++ < HNS_MAX_WAIT_CNT) { + head = dsaf_read_dev(&qs->tx_ring, RCB_REG_HEAD); + if (tail == head) + break; + + usleep_range(100, 200); + } + + if (wait_cnt >= HNS_MAX_WAIT_CNT) { + dev_err(qs->dev->dev, "rcb wait timeout, head not equal to tail.\n"); + return -EBUSY; + } + + return 0; +} + /** *hns_rcb_reset_ring_hw - ring reset *@q: ring struct pointer diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h index 602816498c8d..2319b772a271 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h @@ -136,6 +136,7 @@ void hns_rcbv2_int_clr_hw(struct hnae_queue *q, u32 flag); void hns_rcb_init_hw(struct ring_pair_cb *ring); void hns_rcb_reset_ring_hw(struct hnae_queue *q); void hns_rcb_wait_fbd_clean(struct hnae_queue **qs, int q_num, u32 flag); +int hns_rcb_wait_tx_ring_clean(struct hnae_queue *qs); u32 hns_rcb_get_rx_coalesced_frames( struct rcb_common_cb *rcb_common, u32 port_idx); u32 hns_rcb_get_tx_coalesced_frames( diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index 886cbbf25761..74d935d82cbc 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -464,6 +464,7 @@ #define RCB_RING_INTMSK_TX_OVERTIME_REG 0x000C4 #define RCB_RING_INTSTS_TX_OVERTIME_REG 0x000C8 +#define GMAC_FIFO_STATE_REG 0x0000UL #define GMAC_DUPLEX_TYPE_REG 0x0008UL #define GMAC_FD_FC_TYPE_REG 0x000CUL #define GMAC_TX_WATER_LINE_REG 0x0010UL diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 02a0ba20fad5..f56855e63c96 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1112,11 +1112,26 @@ static void hns_nic_adjust_link(struct net_device *ndev) struct hnae_handle *h = priv->ae_handle; int state = 1; + /* If there is no phy, do not need adjust link */ if (ndev->phydev) { - h->dev->ops->adjust_link(h, ndev->phydev->speed, - ndev->phydev->duplex); - state = ndev->phydev->link; + /* When phy link down, do nothing */ + if (ndev->phydev->link == 0) + return; + + if (h->dev->ops->need_adjust_link(h, ndev->phydev->speed, + ndev->phydev->duplex)) { + /* because Hi161X chip don't support to change gmac + * speed and duplex with traffic. Delay 200ms to + * make sure there is no more data in chip FIFO. + */ + netif_carrier_off(ndev); + msleep(200); + h->dev->ops->adjust_link(h, ndev->phydev->speed, + ndev->phydev->duplex); + netif_carrier_on(ndev); + } } + state = state && h->dev->ops->get_status(h); if (state != priv->link) { From 455c4401fe7a538facaffb35b906ce19f1ece474 Mon Sep 17 00:00:00 2001 From: Peng Li Date: Mon, 27 Aug 2018 09:59:30 +0800 Subject: [PATCH 154/242] net: hns: add netif_carrier_off before change speed and duplex If there are packets in hardware when changing the speed or duplex, it may cause hardware hang up. This patch adds netif_carrier_off before change speed and duplex in ethtool_ops.set_link_ksettings, and adds netif_carrier_on after complete the change. Signed-off-by: Peng Li Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 08f3c4743f74..774beda040a1 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -243,7 +243,9 @@ static int hns_nic_set_link_ksettings(struct net_device *net_dev, } if (h->dev->ops->adjust_link) { + netif_carrier_off(net_dev); h->dev->ops->adjust_link(h, (int)speed, cmd->base.duplex); + netif_carrier_on(net_dev); return 0; } From 6e0bb04d0e4f597d8d8f4f21401a9636f2809fd1 Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Mon, 27 Aug 2018 12:42:02 -0500 Subject: [PATCH 155/242] sh_eth: Add R7S9210 support Add support for the R7S9210 which is part of the RZ/A2 series. Signed-off-by: Chris Brandt Acked-by: Rob Herring Signed-off-by: David S. Miller --- .../devicetree/bindings/net/sh_eth.txt | 1 + drivers/net/ethernet/renesas/sh_eth.c | 36 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/Documentation/devicetree/bindings/net/sh_eth.txt b/Documentation/devicetree/bindings/net/sh_eth.txt index 76db9f13ad96..abc36274227c 100644 --- a/Documentation/devicetree/bindings/net/sh_eth.txt +++ b/Documentation/devicetree/bindings/net/sh_eth.txt @@ -16,6 +16,7 @@ Required properties: "renesas,ether-r8a7794" if the device is a part of R8A7794 SoC. "renesas,gether-r8a77980" if the device is a part of R8A77980 SoC. "renesas,ether-r7s72100" if the device is a part of R7S72100 SoC. + "renesas,ether-r7s9210" if the device is a part of R7S9210 SoC. "renesas,rcar-gen1-ether" for a generic R-Car Gen1 device. "renesas,rcar-gen2-ether" for a generic R-Car Gen2 or RZ/G1 device. diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index ad4433d59237..f27a0dc8c563 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -798,6 +798,41 @@ static struct sh_eth_cpu_data r8a77980_data = { .magic = 1, .cexcr = 1, }; + +/* R7S9210 */ +static struct sh_eth_cpu_data r7s9210_data = { + .soft_reset = sh_eth_soft_reset, + + .set_duplex = sh_eth_set_duplex, + .set_rate = sh_eth_set_rate_rcar, + + .register_type = SH_ETH_REG_FAST_SH4, + + .edtrr_trns = EDTRR_TRNS_ETHER, + .ecsr_value = ECSR_ICD, + .ecsipr_value = ECSIPR_ICDIP, + .eesipr_value = EESIPR_TWBIP | EESIPR_TABTIP | EESIPR_RABTIP | + EESIPR_RFCOFIP | EESIPR_ECIIP | EESIPR_FTCIP | + EESIPR_TDEIP | EESIPR_TFUFIP | EESIPR_FRIP | + EESIPR_RDEIP | EESIPR_RFOFIP | EESIPR_CNDIP | + EESIPR_DLCIP | EESIPR_CDIP | EESIPR_TROIP | + EESIPR_RMAFIP | EESIPR_RRFIP | EESIPR_RTLFIP | + EESIPR_RTSFIP | EESIPR_PREIP | EESIPR_CERFIP, + + .tx_check = EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_TRO, + .eesr_err_check = EESR_TWB | EESR_TABT | EESR_RABT | EESR_RFE | + EESR_RDE | EESR_RFRMER | EESR_TFE | EESR_TDE, + + .fdr_value = 0x0000070f, + + .apr = 1, + .mpr = 1, + .tpauser = 1, + .hw_swap = 1, + .rpadir = 1, + .no_ade = 1, + .xdfar_rw = 1, +}; #endif /* CONFIG_OF */ static void sh_eth_set_rate_sh7724(struct net_device *ndev) @@ -3121,6 +3156,7 @@ static const struct of_device_id sh_eth_match_table[] = { { .compatible = "renesas,ether-r8a7794", .data = &rcar_gen2_data }, { .compatible = "renesas,gether-r8a77980", .data = &r8a77980_data }, { .compatible = "renesas,ether-r7s72100", .data = &r7s72100_data }, + { .compatible = "renesas,ether-r7s9210", .data = &r7s9210_data }, { .compatible = "renesas,rcar-gen1-ether", .data = &rcar_gen1_data }, { .compatible = "renesas,rcar-gen2-ether", .data = &rcar_gen2_data }, { } From 85eb9af182243ce9a8b72410d5321c440ac5f8d7 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 27 Aug 2018 22:56:22 +0200 Subject: [PATCH 156/242] net/sched: act_pedit: fix dump of extended layered op in the (rare) case of failure in nla_nest_start(), missing NULL checks in tcf_pedit_key_ex_dump() can make the following command # tc action add action pedit ex munge ip ttl set 64 dereference a NULL pointer: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 800000007d1cd067 P4D 800000007d1cd067 PUD 7acd3067 PMD 0 Oops: 0002 [#1] SMP PTI CPU: 0 PID: 3336 Comm: tc Tainted: G E 4.18.0.pedit+ #425 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_pedit_dump+0x19d/0x358 [act_pedit] Code: be 02 00 00 00 48 89 df 66 89 44 24 20 e8 9b b1 fd e0 85 c0 75 46 8b 83 c8 00 00 00 49 83 c5 08 48 03 83 d0 00 00 00 4d 39 f5 <66> 89 04 25 00 00 00 00 0f 84 81 01 00 00 41 8b 45 00 48 8d 4c 24 RSP: 0018:ffffb5d4004478a8 EFLAGS: 00010246 RAX: ffff8880fcda2070 RBX: ffff8880fadd2900 RCX: 0000000000000000 RDX: 0000000000000002 RSI: ffffb5d4004478ca RDI: ffff8880fcda206e RBP: ffff8880fb9cb900 R08: 0000000000000008 R09: ffff8880fcda206e R10: ffff8880fadd2900 R11: 0000000000000000 R12: ffff8880fd26cf40 R13: ffff8880fc957430 R14: ffff8880fc957430 R15: ffff8880fb9cb988 FS: 00007f75a537a740(0000) GS:ffff8880fda00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000007a2fa005 CR4: 00000000001606f0 Call Trace: ? __nla_reserve+0x38/0x50 tcf_action_dump_1+0xd2/0x130 tcf_action_dump+0x6a/0xf0 tca_get_fill.constprop.31+0xa3/0x120 tcf_action_add+0xd1/0x170 tc_ctl_action+0x137/0x150 rtnetlink_rcv_msg+0x263/0x2d0 ? _cond_resched+0x15/0x40 ? rtnl_calcit.isra.30+0x110/0x110 netlink_rcv_skb+0x4d/0x130 netlink_unicast+0x1a3/0x250 netlink_sendmsg+0x2ae/0x3a0 sock_sendmsg+0x36/0x40 ___sys_sendmsg+0x26f/0x2d0 ? do_wp_page+0x8e/0x5f0 ? handle_pte_fault+0x6c3/0xf50 ? __handle_mm_fault+0x38e/0x520 ? __sys_sendmsg+0x5e/0xa0 __sys_sendmsg+0x5e/0xa0 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f75a4583ba0 Code: c3 48 8b 05 f2 62 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d fd c3 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 ae cc 00 00 48 89 04 24 RSP: 002b:00007fff60ee7418 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fff60ee7540 RCX: 00007f75a4583ba0 RDX: 0000000000000000 RSI: 00007fff60ee7490 RDI: 0000000000000003 RBP: 000000005b842d3e R08: 0000000000000002 R09: 0000000000000000 R10: 00007fff60ee6ea0 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fff60ee7554 R14: 0000000000000001 R15: 000000000066c100 Modules linked in: act_pedit(E) ip6table_filter ip6_tables iptable_filter binfmt_misc crct10dif_pclmul ext4 crc32_pclmul mbcache ghash_clmulni_intel jbd2 pcbc snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd snd_timer cryptd glue_helper snd joydev pcspkr soundcore virtio_balloon i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi virtio_net net_failover virtio_blk virtio_console failover qxl crc32c_intel drm_kms_helper syscopyarea serio_raw sysfillrect sysimgblt fb_sys_fops ttm drm ata_piix virtio_pci libata virtio_ring i2c_core virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_pedit] CR2: 0000000000000000 Like it's done for other TC actions, give up dumping pedit rules and return an error if nla_nest_start() returns NULL. Fixes: 71d0ed7079df ("net/act_pedit: Support using offset relative to the conventional network headers") Signed-off-by: Davide Caratti Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 107034070019..ad99a99f11f6 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -109,16 +109,18 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb, { struct nlattr *keys_start = nla_nest_start(skb, TCA_PEDIT_KEYS_EX); + if (!keys_start) + goto nla_failure; for (; n > 0; n--) { struct nlattr *key_start; key_start = nla_nest_start(skb, TCA_PEDIT_KEY_EX); + if (!key_start) + goto nla_failure; if (nla_put_u16(skb, TCA_PEDIT_KEY_EX_HTYPE, keys_ex->htype) || - nla_put_u16(skb, TCA_PEDIT_KEY_EX_CMD, keys_ex->cmd)) { - nlmsg_trim(skb, keys_start); - return -EINVAL; - } + nla_put_u16(skb, TCA_PEDIT_KEY_EX_CMD, keys_ex->cmd)) + goto nla_failure; nla_nest_end(skb, key_start); @@ -128,6 +130,9 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb, nla_nest_end(skb, keys_start); return 0; +nla_failure: + nla_nest_cancel(skb, keys_start); + return -EINVAL; } static int tcf_pedit_init(struct net *net, struct nlattr *nla, @@ -418,7 +423,10 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, opt->bindcnt = atomic_read(&p->tcf_bindcnt) - bind; if (p->tcfp_keys_ex) { - tcf_pedit_key_ex_dump(skb, p->tcfp_keys_ex, p->tcfp_nkeys); + if (tcf_pedit_key_ex_dump(skb, + p->tcfp_keys_ex, + p->tcfp_nkeys)) + goto nla_put_failure; if (nla_put(skb, TCA_PEDIT_PARMS_EX, s, opt)) goto nla_put_failure; From afe49de44c27a89e8e9631c44b5ffadf6ace65e2 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 28 Aug 2018 13:40:51 +0200 Subject: [PATCH 157/242] ipv6: fix cleanup ordering for ip6_mr failure Commit 15e668070a64 ("ipv6: reorder icmpv6_init() and ip6_mr_init()") moved the cleanup label for ipmr_fail, but should have changed the contents of the cleanup labels as well. Now we can end up cleaning up icmpv6 even though it hasn't been initialized (jump to icmp_fail or ipmr_fail). Simply undo things in the reverse order of their initialization. Example of panic (triggered by faking a failure of icmpv6_init): kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN PTI [...] RIP: 0010:__list_del_entry_valid+0x79/0x160 [...] Call Trace: ? lock_release+0x8a0/0x8a0 unregister_pernet_operations+0xd4/0x560 ? ops_free_list+0x480/0x480 ? down_write+0x91/0x130 ? unregister_pernet_subsys+0x15/0x30 ? down_read+0x1b0/0x1b0 ? up_read+0x110/0x110 ? kmem_cache_create_usercopy+0x1b4/0x240 unregister_pernet_subsys+0x1d/0x30 icmpv6_cleanup+0x1d/0x30 inet6_init+0x1b5/0x23f Fixes: 15e668070a64 ("ipv6: reorder icmpv6_init() and ip6_mr_init()") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 673bba31eb18..e5da133c6932 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1113,11 +1113,11 @@ netfilter_fail: igmp_fail: ndisc_cleanup(); ndisc_fail: - ip6_mr_cleanup(); -icmp_fail: - unregister_pernet_subsys(&inet6_net_ops); -ipmr_fail: icmpv6_cleanup(); +icmp_fail: + ip6_mr_cleanup(); +ipmr_fail: + unregister_pernet_subsys(&inet6_net_ops); register_pernet_fail: sock_unregister(PF_INET6); rtnl_unregister_all(PF_INET6); From a03dc36bdca6b614651fedfcd8559cf914d2d21d Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 28 Aug 2018 13:40:52 +0200 Subject: [PATCH 158/242] ipv6: fix cleanup ordering for pingv6 registration Commit 6d0bfe226116 ("net: ipv6: Add IPv6 support to the ping socket.") contains an error in the cleanup path of inet6_init(): when proto_register(&pingv6_prot, 1) fails, we try to unregister &pingv6_prot. When rawv6_init() fails, we skip unregistering &pingv6_prot. Example of panic (triggered by faking a failure of proto_register(&pingv6_prot, 1)): general protection fault: 0000 [#1] PREEMPT SMP KASAN PTI [...] RIP: 0010:__list_del_entry_valid+0x79/0x160 [...] Call Trace: proto_unregister+0xbb/0x550 ? trace_preempt_on+0x6f0/0x6f0 ? sock_no_shutdown+0x10/0x10 inet6_init+0x153/0x1b8 Fixes: 6d0bfe226116 ("net: ipv6: Add IPv6 support to the ping socket.") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e5da133c6932..9a4261e50272 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -938,14 +938,14 @@ static int __init inet6_init(void) err = proto_register(&pingv6_prot, 1); if (err) - goto out_unregister_ping_proto; + goto out_unregister_raw_proto; /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. */ err = rawv6_init(); if (err) - goto out_unregister_raw_proto; + goto out_unregister_ping_proto; /* Register the family here so that the init calls below will * be able to create sockets. (?? is this dangerous ??) From f707ef61e17261f2bb18c3e4871c6f135ab3aba9 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 28 Aug 2018 13:40:53 +0200 Subject: [PATCH 159/242] net: rtnl: return early from rtnl_unregister_all when protocol isn't registered rtnl_unregister_all(PF_INET6) gets called from inet6_init in cases when no handler has been registered for PF_INET6 yet, for example if ip6_mr_init() fails. Abort and avoid a NULL pointer deref in that case. Example of panic (triggered by faking a failure of register_pernet_subsys): general protection fault: 0000 [#1] PREEMPT SMP KASAN PTI [...] RIP: 0010:rtnl_unregister_all+0x17e/0x2a0 [...] Call Trace: ? rtnetlink_net_init+0x250/0x250 ? sock_unregister+0x103/0x160 ? kernel_getsockopt+0x200/0x200 inet6_init+0x197/0x20d Fixes: e2fddf5e96df ("[IPV6]: Make af_inet6 to check ip6_route_init return value.") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 24431e578310..60c928894a78 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -324,6 +324,10 @@ void rtnl_unregister_all(int protocol) rtnl_lock(); tab = rtnl_msg_handlers[protocol]; + if (!tab) { + rtnl_unlock(); + return; + } RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL); for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) { link = tab[msgindex]; From c305660b325463cdf3d944492f96155dc931b448 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Tue, 28 Aug 2018 10:19:20 -0500 Subject: [PATCH 160/242] net: stmmac: build the dwmac-socfpga platform driver for Stratix10 The Stratix10 SoC is an AARCH64 based platform that shares the same ethernet controller that is on other SoCFPGA platforms. Build the platform driver. Signed-off-by: Dinh Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index bf4acebb6bcd..324049eebb9b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -110,7 +110,7 @@ config DWMAC_ROCKCHIP config DWMAC_SOCFPGA tristate "SOCFPGA dwmac support" - default ARCH_SOCFPGA + default (ARCH_SOCFPGA || ARCH_STRATIX10) depends on OF && (ARCH_SOCFPGA || ARCH_STRATIX10 || COMPILE_TEST) select MFD_SYSCON help From c3c397c1f16c51601a3fac4fe0c63ad8aa85a904 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Tue, 28 Aug 2018 12:33:15 -0700 Subject: [PATCH 161/242] net: bcmgenet: use MAC link status for fixed phy When using the fixed PHY with GENET (e.g. MOCA) the PHY link status can be determined from the internal link status captured by the MAC. This allows the PHY state machine to use the correct link state with the fixed PHY even if MAC link event interrupts are missed when the net device is opened. Fixes: 8d88c6ebb34c ("net: bcmgenet: enable MoCA link state change detection") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 3 +++ drivers/net/ethernet/broadcom/genet/bcmmii.c | 10 ++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index b773bc07edf7..14b49612aa86 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -186,6 +186,9 @@ struct bcmgenet_mib_counters { #define UMAC_MAC1 0x010 #define UMAC_MAX_FRAME_LEN 0x014 +#define UMAC_MODE 0x44 +#define MODE_LINK_STATUS (1 << 5) + #define UMAC_EEE_CTRL 0x064 #define EN_LPI_RX_PAUSE (1 << 0) #define EN_LPI_TX_PFC (1 << 1) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 5333274a283c..4241ae928d4a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -115,8 +115,14 @@ void bcmgenet_mii_setup(struct net_device *dev) static int bcmgenet_fixed_phy_link_update(struct net_device *dev, struct fixed_phy_status *status) { - if (dev && dev->phydev && status) - status->link = dev->phydev->link; + struct bcmgenet_priv *priv; + u32 reg; + + if (dev && dev->phydev && status) { + priv = netdev_priv(dev); + reg = bcmgenet_umac_readl(priv, UMAC_MODE); + status->link = !!(reg & MODE_LINK_STATUS); + } return 0; } From c4053ef322081554765e1b708d6cdd8855e1d72d Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Wed, 29 Aug 2018 09:44:39 +0300 Subject: [PATCH 162/242] net: mvpp2: initialize port of_node pointer Without a valid of_node in struct device we can't find the mvpp2 port device by its DT node. Specifically, this breaks of_find_net_device_by_node(). For example, the Armada 8040 based Clearfog GT-8K uses Marvell 88E6141 switch connected to the &cp1_eth2 port: &cp1_mdio { ... switch0: switch0@4 { compatible = "marvell,mv88e6085"; ... ports { ... port@5 { reg = <5>; label = "cpu"; ethernet = <&cp1_eth2>; }; }; }; }; Without this patch, dsa_register_switch() returns -EPROBE_DEFER because of_find_net_device_by_node() can't find the device_node of the &cp1_eth2 device. Reviewed-by: Andrew Lunn Signed-off-by: Baruch Siach Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 32d785b616e1..28500417843e 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4803,6 +4803,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, dev->min_mtu = ETH_MIN_MTU; /* 9704 == 9728 - 20 and rounding to 8 */ dev->max_mtu = MVPP2_BM_JUMBO_PKT_SIZE; + dev->dev.of_node = port_node; /* Phylink isn't used w/ ACPI as of now */ if (port_node) { From 97763dc0f4010bc20e2969a6bf9a40a2551c4f79 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 29 Aug 2018 10:22:33 +0200 Subject: [PATCH 163/242] net_sched: reject unknown tcfa_action values After the commit 802bfb19152c ("net/sched: user-space can't set unknown tcfa_action values"), unknown tcfa_action values are converted to TC_ACT_UNSPEC, but the common agreement is instead rejecting such configurations. This change also introduces a helper to simplify the destruction of a single action, avoiding code duplication. v1 -> v2: - helper is now static and renamed according to act_* convention - updated extack message, according to the new behavior Fixes: 802bfb19152c ("net/sched: user-space can't set unknown tcfa_action values") Signed-off-by: Paolo Abeni Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_api.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index db83dac1e7f4..316c98bb87e4 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -662,6 +662,13 @@ int tcf_action_destroy(struct tc_action *actions[], int bind) return ret; } +static int tcf_action_destroy_1(struct tc_action *a, int bind) +{ + struct tc_action *actions[] = { a, NULL }; + + return tcf_action_destroy(actions, bind); +} + static int tcf_action_put(struct tc_action *p) { return __tcf_action_put(p, false); @@ -881,17 +888,16 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) { err = tcf_action_goto_chain_init(a, tp); if (err) { - struct tc_action *actions[] = { a, NULL }; - - tcf_action_destroy(actions, bind); + tcf_action_destroy_1(a, bind); NL_SET_ERR_MSG(extack, "Failed to init TC action chain"); return ERR_PTR(err); } } if (!tcf_action_valid(a->tcfa_action)) { - NL_SET_ERR_MSG(extack, "invalid action value, using TC_ACT_UNSPEC instead"); - a->tcfa_action = TC_ACT_UNSPEC; + tcf_action_destroy_1(a, bind); + NL_SET_ERR_MSG(extack, "Invalid control action value"); + return ERR_PTR(-EINVAL); } return a; From 25a8238f4cc8425d4aade4f9041be468d0e8aa2e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 29 Aug 2018 10:22:34 +0200 Subject: [PATCH 164/242] tc-testing: add test-cases for numeric and invalid control action Only the police action allows us to specify an arbitrary numeric value for the control action. This change introduces an explicit test case for the above feature and then leverage it for testing the kernel behavior for invalid control actions (reject). Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/actions/police.json | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json index f03763d81617..30f9b54bd666 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -312,6 +312,54 @@ "$TC actions flush action police" ] }, + { + "id": "6aaf", + "name": "Add police actions with conform-exceed control pass/pipe [with numeric values]", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed 0/3 index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action police index 1", + "matchPattern": "action order [0-9]*: police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action pass/pipe", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "29b1", + "name": "Add police actions with conform-exceed control /drop", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed 10/drop index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action ", + "matchCount": "0", + "teardown": [ + "$TC actions flush action police" + ] + }, { "id": "c26f", "name": "Add police action with invalid peakrate value", From 4f0223bfe9c3e62d8f45a85f1ef1b18a8a263ef9 Mon Sep 17 00:00:00 2001 From: Arunk Khandavalli Date: Thu, 30 Aug 2018 00:40:16 +0300 Subject: [PATCH 165/242] cfg80211: nl80211_update_ft_ies() to validate NL80211_ATTR_IE nl80211_update_ft_ies() tried to validate NL80211_ATTR_IE with is_valid_ie_attr() before dereferencing it, but that helper function returns true in case of NULL pointer (i.e., attribute not included). This can result to dereferencing a NULL pointer. Fix that by explicitly checking that NL80211_ATTR_IE is included. Fixes: 355199e02b83 ("cfg80211: Extend support for IEEE 802.11r Fast BSS Transition") Signed-off-by: Arunk Khandavalli Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ce0149a86c13..733ccf867972 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12099,6 +12099,7 @@ static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MDID] || + !info->attrs[NL80211_ATTR_IE] || !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) return -EINVAL; From 1eb507903665442360a959136dfa3234c43db085 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 29 Aug 2018 21:03:25 +0200 Subject: [PATCH 166/242] mac80211: do not convert to A-MSDU if frag/subframe limited Do not start to aggregate packets in a A-MSDU frame (converting the first subframe to A-MSDU, adding the header) if max_tx_fragments or max_amsdu_subframes limits are already exceeded by it. In particular, this happens when drivers set the limit to 1 to avoid A-MSDUs at all. Signed-off-by: Lorenzo Bianconi [reword commit message to be more precise] Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 667a73d6eb5c..1aac5e3c7eee 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3208,9 +3208,6 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (skb->len + head->len > max_amsdu_len) goto out; - if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head)) - goto out; - nfrags = 1 + skb_shinfo(skb)->nr_frags; nfrags += 1 + skb_shinfo(head)->nr_frags; frag_tail = &skb_shinfo(head)->frag_list; @@ -3226,6 +3223,9 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (max_frags && nfrags > max_frags) goto out; + if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head)) + goto out; + /* * Pad out the previous subframe to a multiple of 4 by adding the * padding to the next one, that's being added. Note that head->len From aa58acf325b4aadeecae2bfc90658273b47dbace Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Aug 2018 10:55:49 +0200 Subject: [PATCH 167/242] mac80211: always account for A-MSDU header changes In the error path of changing the SKB headroom of the second A-MSDU subframe, we would not account for the already-changed length of the first frame that just got converted to be in A-MSDU format and thus is a bit longer now. Fix this by doing the necessary accounting. It would be possible to reorder the operations, but that would make the code more complex (to calculate the necessary pad), and the headroom expansion should not fail frequently enough to make that worthwhile. Fixes: 6e0456b54545 ("mac80211: add A-MSDU tx support") Signed-off-by: Johannes Berg Acked-by: Lorenzo Bianconi Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1aac5e3c7eee..6ca0865de945 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3239,7 +3239,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) + 2 + pad)) - goto out; + goto out_recalc; ret = true; data = skb_push(skb, ETH_ALEN + 2); @@ -3256,11 +3256,13 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, head->data_len += skb->len; *frag_tail = skb; - flow->backlog += head->len - orig_len; - tin->backlog_bytes += head->len - orig_len; - - fq_recalc_backlog(fq, tin, flow); +out_recalc: + if (head->len != orig_len) { + flow->backlog += head->len - orig_len; + tin->backlog_bytes += head->len - orig_len; + fq_recalc_backlog(fq, tin, flow); + } out: spin_unlock_bh(&fq->lock); From 36bf9da2913054c218337d8cd7cb11bddc1fafb0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 27 Aug 2018 14:45:14 +0900 Subject: [PATCH 168/242] x86/build: Remove jump label quirk for GCC older than 4.5.2 Commit cafa0010cd51 ("Raise the minimum required gcc version to 4.6") bumped the minimum GCC version to 4.6 for all architectures. Remove the workaround code. It was the only user of cc-if-fullversion. Remove the macro as well. Signed-off-by: Masahiro Yamada Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Michal Marek Cc: linux-kbuild@vger.kernel.org Link: https://lkml.kernel.org/r/1535348714-25457-1-git-send-email-yamada.masahiro@socionext.com --- arch/x86/Makefile | 12 ------------ scripts/Kbuild.include | 4 ---- 2 files changed, 16 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 94859241bc3e..8fc8f94ef5f5 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -179,18 +179,6 @@ ifndef CC_HAVE_ASM_GOTO $(error Compiler lacks asm-goto support.) endif -# -# Jump labels need '-maccumulate-outgoing-args' for gcc < 4.5.2 to prevent a -# GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226). There's no way -# to test for this bug at compile-time because the test case needs to execute, -# which is a no-go for cross compilers. So check the GCC version instead. -# -ifdef CONFIG_JUMP_LABEL - ifneq ($(ACCUMULATE_OUTGOING_ARGS), 1) - ACCUMULATE_OUTGOING_ARGS = $(call cc-if-fullversion, -lt, 040502, 1) - endif -endif - ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1) # This compiler flag is not supported by Clang: KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index c75413d05a63..ce53639a864a 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -153,10 +153,6 @@ cc-fullversion = $(shell $(CONFIG_SHELL) \ # Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1) cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4)) -# cc-if-fullversion -# Usage: EXTRA_CFLAGS += $(call cc-if-fullversion, -lt, 040502, -O1) -cc-if-fullversion = $(shell [ $(cc-fullversion) $(1) $(2) ] && echo $(3) || echo $(4)) - # cc-ldoption # Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both) cc-ldoption = $(call try-run,\ From 1f59a4581b5ecfe9b4f049a7a2cf904d8352842d Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 27 Aug 2018 14:40:09 -0700 Subject: [PATCH 169/242] x86/irqflags: Mark native_restore_fl extern inline This should have been marked extern inline in order to pick up the out of line definition in arch/x86/kernel/irqflags.S. Fixes: 208cbb325589 ("x86/irqflags: Provide a declaration for native_save_fl") Reported-by: Ben Hutchings Signed-off-by: Nick Desaulniers Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: "H. Peter Anvin" Cc: Boris Ostrovsky Cc: Greg Kroah-Hartman Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180827214011.55428-1-ndesaulniers@google.com --- arch/x86/include/asm/irqflags.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index c14f2a74b2be..15450a675031 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -33,7 +33,8 @@ extern inline unsigned long native_save_fl(void) return flags; } -static inline void native_restore_fl(unsigned long flags) +extern inline void native_restore_fl(unsigned long flags); +extern inline void native_restore_fl(unsigned long flags) { asm volatile("push %0 ; popf" : /* no output */ From f12d11c5c184626b4befdee3d573ec8237405a33 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 28 Aug 2018 20:40:33 +0200 Subject: [PATCH 170/242] x86/entry/64: Wipe KASAN stack shadow before rewind_stack_do_exit() Reset the KASAN shadow state of the task stack before rewinding RSP. Without this, a kernel oops will leave parts of the stack poisoned, and code running under do_exit() can trip over such poisoned regions and cause nonsensical false-positive KASAN reports about stack-out-of-bounds bugs. This does not wipe the exception stacks; if an oops happens on an exception stack, it might result in random KASAN false-positives from other tasks afterwards. This is probably relatively uninteresting, since if the kernel oopses on an exception stack, there are most likely bigger things to worry about. It'd be more interesting if vmapped stacks and KASAN were compatible, since then handle_stack_overflow() would oops from exception stack context. Fixes: 2deb4be28077 ("x86/dumpstack: When OOPSing, rewind the stack before do_exit()") Signed-off-by: Jann Horn Signed-off-by: Thomas Gleixner Acked-by: Andrey Ryabinin Cc: Andy Lutomirski Cc: Dmitry Vyukov Cc: Alexander Potapenko Cc: Kees Cook Cc: kasan-dev@googlegroups.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180828184033.93712-1-jannh@google.com --- arch/x86/kernel/dumpstack.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 9c8652974f8e..1596e6bfea6f 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -346,7 +347,10 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr) * We're not going to return, but we might be on an IST stack or * have very little stack space left. Rewind the stack and kill * the task. + * Before we rewind the stack, we have to tell KASAN that we're going to + * reuse the task stack and that existing poisons are invalid. */ + kasan_unpoison_task_stack(current); rewind_stack_do_exit(signr); } NOKPROBE_SYMBOL(oops_end); From cb9d7fd51d9fbb329d182423bd7b92d0f8cb0e01 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Tue, 21 Aug 2018 17:25:07 +0200 Subject: [PATCH 171/242] watchdog: Mark watchdog touch functions as notrace Some architectures need to use stop_machine() to patch functions for ftrace, and the assumption is that the stopped CPUs do not make function calls to traceable functions when they are in the stopped state. Commit ce4f06dcbb5d ("stop_machine: Touch_nmi_watchdog() after MULTI_STOP_PREPARE") added calls to the watchdog touch functions from the stopped CPUs and those functions lack notrace annotations. This leads to crashes when enabling/disabling ftrace on ARM kernels built with the Thumb-2 instruction set. Fix it by adding the necessary notrace annotations. Fixes: ce4f06dcbb5d ("stop_machine: Touch_nmi_watchdog() after MULTI_STOP_PREPARE") Signed-off-by: Vincent Whitchurch Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: oleg@redhat.com Cc: tj@kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180821152507.18313-1-vincent.whitchurch@axis.com --- kernel/watchdog.c | 4 ++-- kernel/watchdog_hld.c | 2 +- kernel/workqueue.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 5470dce212c0..977918d5d350 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -261,7 +261,7 @@ static void __touch_watchdog(void) * entering idle state. This should only be used for scheduler events. * Use touch_softlockup_watchdog() for everything else. */ -void touch_softlockup_watchdog_sched(void) +notrace void touch_softlockup_watchdog_sched(void) { /* * Preemption can be enabled. It doesn't matter which CPU's timestamp @@ -270,7 +270,7 @@ void touch_softlockup_watchdog_sched(void) raw_cpu_write(watchdog_touch_ts, 0); } -void touch_softlockup_watchdog(void) +notrace void touch_softlockup_watchdog(void) { touch_softlockup_watchdog_sched(); wq_watchdog_touch(raw_smp_processor_id()); diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 1f7020d65d0a..71381168dede 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -29,7 +29,7 @@ static struct cpumask dead_events_mask; static unsigned long hardlockup_allcpu_dumped; static atomic_t watchdog_cpus = ATOMIC_INIT(0); -void arch_touch_nmi_watchdog(void) +notrace void arch_touch_nmi_watchdog(void) { /* * Using __raw here because some code paths have diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 60e80198c3df..0280deac392e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -5574,7 +5574,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) mod_timer(&wq_watchdog_timer, jiffies + thresh); } -void wq_watchdog_touch(int cpu) +notrace void wq_watchdog_touch(int cpu) { if (cpu >= 0) per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies; From 13ba17bee18e321b073b49a88dcab10881f757da Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Fri, 24 Aug 2018 18:03:53 +0530 Subject: [PATCH 172/242] notifier: Remove notifier header file wherever not used The conversion of the hotplug notifiers to a state machine left the notifier.h includes around in some places. Remove them. Signed-off-by: Mukesh Ojha Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/1535114033-4605-1-git-send-email-mojha@codeaurora.org --- fs/buffer.c | 1 - kernel/printk/printk.c | 1 - lib/percpu_counter.c | 1 - mm/page-writeback.c | 1 - mm/page_alloc.c | 1 - mm/slub.c | 1 - net/core/dev.c | 1 - 7 files changed, 7 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 4cc679d5bf58..6f1ae3ac9789 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 924e37fb1620..fd6f8ed28e01 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index c72577e472f2..a66595ba5543 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -4,7 +4,6 @@ */ #include -#include #include #include #include diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6551d3b0dc30..84ae9bf5858a 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e75865d58ba7..05e983f42316 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include diff --git a/mm/slub.c b/mm/slub.c index ce2b9e5cea77..8da34a8af53d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -19,7 +19,6 @@ #include #include "slab.h" #include -#include #include #include #include diff --git a/net/core/dev.c b/net/core/dev.c index 325fc5088370..82114e1111e6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -93,7 +93,6 @@ #include #include #include -#include #include #include #include From 113fc08357ad4e8b84caa75402430875d9ac4c1a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 27 Aug 2018 12:39:43 +0900 Subject: [PATCH 173/242] objtool: Remove workaround for unreachable warnings from old GCC Commit cafa0010cd51 ("Raise the minimum required gcc version to 4.6") bumped the minimum GCC version to 4.6 for all architectures. This effectively reverts commit da541b20021c ("objtool: Skip unreachable warnings for GCC 4.4 and older"), which was a workaround for GCC 4.4 or older. Signed-off-by: Masahiro Yamada Signed-off-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Peter Zijlstra Cc: Michal Marek Cc: linux-kbuild@vger.kernel.org Link: https://lkml.kernel.org/r/1535341183-19994-1-git-send-email-yamada.masahiro@socionext.com --- scripts/Makefile.build | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 1c48572223d1..5a2d1c9578a0 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -246,8 +246,6 @@ objtool_args += --no-fp endif ifdef CONFIG_GCOV_KERNEL objtool_args += --no-unreachable -else -objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable) endif ifdef CONFIG_RETPOLINE ifneq ($(RETPOLINE_CFLAGS),) From 9222f606506c5f8ca2c8b8c939d59ed3e6ac4148 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 28 Aug 2018 08:55:14 +0200 Subject: [PATCH 174/242] x86/alternatives: Lockdep-enforce text_mutex in text_poke*() text_poke() and text_poke_bp() must be called with text_mutex held. Put proper lockdep anotation in place instead of just mentioning the requirement in a comment. Reported-by: Peter Zijlstra Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1808280853520.25787@cbobk.fhfr.pm --- arch/x86/kernel/alternative.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 014f214da581..b9d5e7c9ef43 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -684,8 +684,6 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode, * It means the size must be writable atomically and the address must be aligned * in a way that permits an atomic write. It also makes sure we fit on a single * page. - * - * Note: Must be called under text_mutex. */ void *text_poke(void *addr, const void *opcode, size_t len) { @@ -700,6 +698,8 @@ void *text_poke(void *addr, const void *opcode, size_t len) */ BUG_ON(!after_bootmem); + lockdep_assert_held(&text_mutex); + if (!core_kernel_text((unsigned long)addr)) { pages[0] = vmalloc_to_page(addr); pages[1] = vmalloc_to_page(addr + PAGE_SIZE); @@ -782,8 +782,6 @@ int poke_int3_handler(struct pt_regs *regs) * - replace the first byte (int3) by the first byte of * replacing opcode * - sync cores - * - * Note: must be called under text_mutex. */ void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) { @@ -792,6 +790,9 @@ void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) bp_int3_handler = handler; bp_int3_addr = (u8 *)addr + sizeof(int3); bp_patching_in_progress = true; + + lockdep_assert_held(&text_mutex); + /* * Corresponding read barrier in int3 notifier for making sure the * in_progress and handler are correctly ordered wrt. patching. From 26e609eccd37967d3681662433086894830c5d62 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 14 Aug 2018 18:59:51 +0200 Subject: [PATCH 175/242] x86/asm: Use CC_SET()/CC_OUT() in __gen_sigismember() Replace open-coded set instructions with CC_SET()/CC_OUT(). Signed-off-by: Uros Bizjak Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20180814165951.13538-1-ubizjak@gmail.com --- arch/x86/include/asm/signal.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 5f9012ff52ed..33d3c88a7225 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -39,6 +39,7 @@ extern void do_signal(struct pt_regs *regs); #define __ARCH_HAS_SA_RESTORER +#include #include #ifdef __i386__ @@ -86,9 +87,9 @@ static inline int __const_sigismember(sigset_t *set, int _sig) static inline int __gen_sigismember(sigset_t *set, int _sig) { - unsigned char ret; - asm("btl %2,%1\n\tsetc %0" - : "=qm"(ret) : "m"(*set), "Ir"(_sig-1) : "cc"); + bool ret; + asm("btl %2,%1" CC_SET(c) + : CC_OUT(c) (ret) : "m"(*set), "Ir"(_sig-1)); return ret; } From 755a8bf5579d22eb5636685c516d8dede799e27b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 24 Aug 2018 15:08:30 +0100 Subject: [PATCH 176/242] arm/arm64: smccc-1.1: Handle function result as parameters If someone has the silly idea to write something along those lines: extern u64 foo(void); void bar(struct arm_smccc_res *res) { arm_smccc_1_1_smc(0xbad, foo(), res); } they are in for a surprise, as this gets compiled as: 0000000000000588 : 588: a9be7bfd stp x29, x30, [sp, #-32]! 58c: 910003fd mov x29, sp 590: f9000bf3 str x19, [sp, #16] 594: aa0003f3 mov x19, x0 598: aa1e03e0 mov x0, x30 59c: 94000000 bl 0 <_mcount> 5a0: 94000000 bl 0 5a4: aa0003e1 mov x1, x0 5a8: d4000003 smc #0x0 5ac: b4000073 cbz x19, 5b8 5b0: a9000660 stp x0, x1, [x19] 5b4: a9010e62 stp x2, x3, [x19, #16] 5b8: f9400bf3 ldr x19, [sp, #16] 5bc: a8c27bfd ldp x29, x30, [sp], #32 5c0: d65f03c0 ret 5c4: d503201f nop The call to foo "overwrites" the x0 register for the return value, and we end up calling the wrong secure service. A solution is to evaluate all the parameters before assigning anything to specific registers, leading to the expected result: 0000000000000588 : 588: a9be7bfd stp x29, x30, [sp, #-32]! 58c: 910003fd mov x29, sp 590: f9000bf3 str x19, [sp, #16] 594: aa0003f3 mov x19, x0 598: aa1e03e0 mov x0, x30 59c: 94000000 bl 0 <_mcount> 5a0: 94000000 bl 0 5a4: aa0003e1 mov x1, x0 5a8: d28175a0 mov x0, #0xbad 5ac: d4000003 smc #0x0 5b0: b4000073 cbz x19, 5bc 5b4: a9000660 stp x0, x1, [x19] 5b8: a9010e62 stp x2, x3, [x19, #16] 5bc: f9400bf3 ldr x19, [sp, #16] 5c0: a8c27bfd ldp x29, x30, [sp], #32 5c4: d65f03c0 ret Reported-by: Julien Grall Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- include/linux/arm-smccc.h | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 5a91ff33720b..18863d56273c 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -205,41 +205,51 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, register unsigned long r3 asm("r3") #define __declare_arg_1(a0, a1, res) \ + typeof(a1) __a1 = a1; \ struct arm_smccc_res *___res = res; \ register unsigned long r0 asm("r0") = (u32)a0; \ - register unsigned long r1 asm("r1") = a1; \ + register unsigned long r1 asm("r1") = __a1; \ register unsigned long r2 asm("r2"); \ register unsigned long r3 asm("r3") #define __declare_arg_2(a0, a1, a2, res) \ + typeof(a1) __a1 = a1; \ + typeof(a2) __a2 = a2; \ struct arm_smccc_res *___res = res; \ register unsigned long r0 asm("r0") = (u32)a0; \ - register unsigned long r1 asm("r1") = a1; \ - register unsigned long r2 asm("r2") = a2; \ + register unsigned long r1 asm("r1") = __a1; \ + register unsigned long r2 asm("r2") = __a2; \ register unsigned long r3 asm("r3") #define __declare_arg_3(a0, a1, a2, a3, res) \ + typeof(a1) __a1 = a1; \ + typeof(a2) __a2 = a2; \ + typeof(a3) __a3 = a3; \ struct arm_smccc_res *___res = res; \ register unsigned long r0 asm("r0") = (u32)a0; \ - register unsigned long r1 asm("r1") = a1; \ - register unsigned long r2 asm("r2") = a2; \ - register unsigned long r3 asm("r3") = a3 + register unsigned long r1 asm("r1") = __a1; \ + register unsigned long r2 asm("r2") = __a2; \ + register unsigned long r3 asm("r3") = __a3 #define __declare_arg_4(a0, a1, a2, a3, a4, res) \ + typeof(a4) __a4 = a4; \ __declare_arg_3(a0, a1, a2, a3, res); \ - register typeof(a4) r4 asm("r4") = a4 + register unsigned long r4 asm("r4") = __a4 #define __declare_arg_5(a0, a1, a2, a3, a4, a5, res) \ + typeof(a5) __a5 = a5; \ __declare_arg_4(a0, a1, a2, a3, a4, res); \ - register typeof(a5) r5 asm("r5") = a5 + register unsigned long r5 asm("r5") = __a5 #define __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res) \ + typeof(a6) __a6 = a6; \ __declare_arg_5(a0, a1, a2, a3, a4, a5, res); \ - register typeof(a6) r6 asm("r6") = a6 + register unsigned long r6 asm("r6") = __a6 #define __declare_arg_7(a0, a1, a2, a3, a4, a5, a6, a7, res) \ + typeof(a7) __a7 = a7; \ __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res); \ - register typeof(a7) r7 asm("r7") = a7 + register unsigned long r7 asm("r7") = __a7 #define ___declare_args(count, ...) __declare_arg_ ## count(__VA_ARGS__) #define __declare_args(count, ...) ___declare_args(count, __VA_ARGS__) From f42b0e18f2e5cf34f73ef1b6327b49040b307a33 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 27 Aug 2018 07:50:47 -0500 Subject: [PATCH 177/242] of: add node name compare helper functions In preparation to remove device_node.name pointer, add helper functions for node name comparisons which are a common pattern throughout the kernel. Cc: Frank Rowand Signed-off-by: Rob Herring --- drivers/of/base.c | 22 ++++++++++++++++++++++ include/linux/of.h | 13 +++++++++++++ 2 files changed, 35 insertions(+) diff --git a/drivers/of/base.c b/drivers/of/base.c index bc420d2aa5f5..9095b8290150 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -54,6 +54,28 @@ DEFINE_MUTEX(of_mutex); */ DEFINE_RAW_SPINLOCK(devtree_lock); +bool of_node_name_eq(const struct device_node *np, const char *name) +{ + const char *node_name; + size_t len; + + if (!np) + return false; + + node_name = kbasename(np->full_name); + len = strchrnul(node_name, '@') - node_name; + + return (strlen(name) == len) && (strncmp(node_name, name, len) == 0); +} + +bool of_node_name_prefix(const struct device_node *np, const char *prefix) +{ + if (!np) + return false; + + return strncmp(kbasename(np->full_name), prefix, strlen(prefix)) == 0; +} + int of_n_addr_cells(struct device_node *np) { u32 cells; diff --git a/include/linux/of.h b/include/linux/of.h index b99a1a8c2952..688c52dd7b3e 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -256,6 +256,9 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size) #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) +extern bool of_node_name_eq(const struct device_node *np, const char *name); +extern bool of_node_name_prefix(const struct device_node *np, const char *prefix); + static inline const char *of_node_full_name(const struct device_node *np) { return np ? np->full_name : ""; @@ -563,6 +566,16 @@ static inline struct device_node *to_of_node(const struct fwnode_handle *fwnode) return NULL; } +static inline bool of_node_name_eq(const struct device_node *np, const char *name) +{ + return false; +} + +static inline bool of_node_name_prefix(const struct device_node *np, const char *prefix) +{ + return false; +} + static inline const char* of_node_full_name(const struct device_node *np) { return ""; From 7fd6d98b89f382d414e1db528e29a67bbd749457 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 30 Aug 2018 11:50:13 +0300 Subject: [PATCH 178/242] i2c: i801: Allow ACPI AML access I/O ports not reserved for SMBus Commit 7ae81952cda ("i2c: i801: Allow ACPI SystemIO OpRegion to conflict with PCI BAR") made it possible for AML code to access SMBus I/O ports by installing custom SystemIO OpRegion handler and blocking i80i driver access upon first AML read/write to this OpRegion. However, while ThinkPad T560 does have SystemIO OpRegion declared under the SMBus device, it does not access any of the SMBus registers: Device (SMBU) { ... OperationRegion (SMBP, PCI_Config, 0x50, 0x04) Field (SMBP, DWordAcc, NoLock, Preserve) { , 5, TCOB, 11, Offset (0x04) } Name (TCBV, 0x00) Method (TCBS, 0, NotSerialized) { If ((TCBV == 0x00)) { TCBV = (\_SB.PCI0.SMBU.TCOB << 0x05) } Return (TCBV) /* \_SB_.PCI0.SMBU.TCBV */ } OperationRegion (TCBA, SystemIO, TCBS (), 0x10) Field (TCBA, ByteAcc, NoLock, Preserve) { Offset (0x04), , 9, CPSC, 1 } } Problem with the current approach is that it blocks all I/O port access and because this system has touchpad connected to the SMBus controller after first AML access (happens during suspend/resume cycle) the touchpad fails to work anymore. Fix this so that we allow ACPI AML I/O port access if it does not touch the region reserved for the SMBus. Fixes: 7ae81952cda ("i2c: i801: Allow ACPI SystemIO OpRegion to conflict with PCI BAR") Link: https://bugzilla.kernel.org/show_bug.cgi?id=200737 Reported-by: Yussuf Khalil Signed-off-by: Mika Westerberg Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 941c223f6491..04b60a349d7e 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1415,6 +1415,13 @@ static void i801_add_tco(struct i801_priv *priv) } #ifdef CONFIG_ACPI +static bool i801_acpi_is_smbus_ioport(const struct i801_priv *priv, + acpi_physical_address address) +{ + return address >= priv->smba && + address <= pci_resource_end(priv->pci_dev, SMBBAR); +} + static acpi_status i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits, u64 *value, void *handler_context, void *region_context) @@ -1430,7 +1437,7 @@ i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits, */ mutex_lock(&priv->acpi_lock); - if (!priv->acpi_reserved) { + if (!priv->acpi_reserved && i801_acpi_is_smbus_ioport(priv, address)) { priv->acpi_reserved = true; dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n"); From 9d9a152ebaa86a9dede4624919566483c955d0a7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 29 Aug 2018 15:06:31 +0200 Subject: [PATCH 179/242] i2c: designware: Re-init controllers with pm_disabled set on resume On Bay Trail and Cherry Trail devices we set the pm_disabled flag for I2C busses which the OS shares with the PUNIT as these need special handling. Until now we called dev_pm_syscore_device(dev, true) for I2C controllers with this flag set to keep these I2C controllers always on. After commit 12864ff8545f ("ACPI / LPSS: Avoid PM quirks on suspend and resume from hibernation"), this no longer works. This commit modifies lpss_iosf_exit_d3_state() to only run if lpss_iosf_enter_d3_state() has ran before it, so that it does not run on a resume from hibernate (or from S3). On these systems the conditions for lpss_iosf_enter_d3_state() to run never become true, so lpss_iosf_exit_d3_state() never gets called and the 2 LPSS DMA controllers never get forced into D0 mode, instead they are left in their default automatic power-on when needed mode. The not forcing of D0 mode for the DMA controllers enables these systems to properly enter S0ix modes, which is a good thing. But after entering S0ix modes the I2C controller connected to the PMIC no longer works, leading to e.g. broken battery monitoring. The _PS3 method for this I2C controller looks like this: Method (_PS3, 0, NotSerialized) // _PS3: Power State 3 { If ((((PMID == 0x04) || (PMID == 0x05)) || (PMID == 0x06))) { Return (Zero) } PSAT |= 0x03 Local0 = PSAT /* \_SB_.I2C5.PSAT */ } Where PMID = 0x05, so we enter the Return (Zero) path on these systems. So even if we were to not call dev_pm_syscore_device(dev, true) the I2C controller will be left in D0 rather then be switched to D3. Yet on other Bay and Cherry Trail devices S0ix is not entered unless *all* I2C controllers are in D3 mode. This combined with the I2C controller no longer working now that we reach S0ix states on these systems leads to me believing that the PUNIT itself puts the I2C controller in D3 when all other conditions for entering S0ix states are true. Since now the I2C controller is put in D3 over a suspend/resume we must re-initialize it afterwards and that does indeed fix it no longer working. This commit implements this fix by: 1) Making the suspend_late callback a no-op if pm_disabled is set and making the resume_early callback skip the clock re-enable (since it now was not disabled) while still doing the necessary I2C controller re-init. 2) Removing the dev_pm_syscore_device(dev, true) call, so that the suspend and resume callbacks are actually called. Normally this would cause the ACPI pm code to call _PS3 putting the I2C controller in D3, wreaking havoc since it is shared with the PUNIT, but in this special case the _PS3 method is a no-op so we can safely allow a "fake" suspend / resume. Fixes: 12864ff8545f ("ACPI / LPSS: Avoid PM quirks on suspend and resume ...") Link: https://bugzilla.kernel.org/show_bug.cgi?id=200861 Cc: 4.15+ # 4.15+ Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-master.c | 1 - drivers/i2c/busses/i2c-designware-platdrv.c | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index e18442b9973a..94d94b4a9a0d 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -708,7 +708,6 @@ int i2c_dw_probe(struct dw_i2c_dev *dev) i2c_set_adapdata(adap, dev); if (dev->pm_disabled) { - dev_pm_syscore_device(dev->dev, true); irq_flags = IRQF_NO_SUSPEND; } else { irq_flags = IRQF_SHARED | IRQF_COND_SUSPEND; diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 1a8d2da5b000..b5750fd85125 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -434,6 +434,9 @@ static int dw_i2c_plat_suspend(struct device *dev) { struct dw_i2c_dev *i_dev = dev_get_drvdata(dev); + if (i_dev->pm_disabled) + return 0; + i_dev->disable(i_dev); i2c_dw_prepare_clk(i_dev, false); @@ -444,7 +447,9 @@ static int dw_i2c_plat_resume(struct device *dev) { struct dw_i2c_dev *i_dev = dev_get_drvdata(dev); - i2c_dw_prepare_clk(i_dev, true); + if (!i_dev->pm_disabled) + i2c_dw_prepare_clk(i_dev, true); + i_dev->init(i_dev); return 0; From 1204d12a494cf5dff497859a5febf2ae30a28970 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kundr=C3=A1t?= Date: Tue, 28 Aug 2018 10:07:40 +0200 Subject: [PATCH 180/242] i2c: algos: bit: make the error messages grepable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Yep, I went looking for one of these, and I wasn't able to find it easily. That's worse than a line which is 82-chars long, IMHO. Signed-off-by: Jan Kundrát Signed-off-by: Wolfram Sang --- drivers/i2c/algos/i2c-algo-bit.c | 55 ++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c index 6ec65adaba49..c33dcfb87993 100644 --- a/drivers/i2c/algos/i2c-algo-bit.c +++ b/drivers/i2c/algos/i2c-algo-bit.c @@ -110,8 +110,8 @@ static int sclhi(struct i2c_algo_bit_data *adap) } #ifdef DEBUG if (jiffies != start && i2c_debug >= 3) - pr_debug("i2c-algo-bit: needed %ld jiffies for SCL to go " - "high\n", jiffies - start); + pr_debug("i2c-algo-bit: needed %ld jiffies for SCL to go high\n", + jiffies - start); #endif done: @@ -171,8 +171,9 @@ static int i2c_outb(struct i2c_adapter *i2c_adap, unsigned char c) setsda(adap, sb); udelay((adap->udelay + 1) / 2); if (sclhi(adap) < 0) { /* timed out */ - bit_dbg(1, &i2c_adap->dev, "i2c_outb: 0x%02x, " - "timeout at bit #%d\n", (int)c, i); + bit_dbg(1, &i2c_adap->dev, + "i2c_outb: 0x%02x, timeout at bit #%d\n", + (int)c, i); return -ETIMEDOUT; } /* FIXME do arbitration here: @@ -185,8 +186,8 @@ static int i2c_outb(struct i2c_adapter *i2c_adap, unsigned char c) } sdahi(adap); if (sclhi(adap) < 0) { /* timeout */ - bit_dbg(1, &i2c_adap->dev, "i2c_outb: 0x%02x, " - "timeout at ack\n", (int)c); + bit_dbg(1, &i2c_adap->dev, + "i2c_outb: 0x%02x, timeout at ack\n", (int)c); return -ETIMEDOUT; } @@ -215,8 +216,9 @@ static int i2c_inb(struct i2c_adapter *i2c_adap) sdahi(adap); for (i = 0; i < 8; i++) { if (sclhi(adap) < 0) { /* timeout */ - bit_dbg(1, &i2c_adap->dev, "i2c_inb: timeout at bit " - "#%d\n", 7 - i); + bit_dbg(1, &i2c_adap->dev, + "i2c_inb: timeout at bit #%d\n", + 7 - i); return -ETIMEDOUT; } indata *= 2; @@ -265,8 +267,9 @@ static int test_bus(struct i2c_adapter *i2c_adap) goto bailout; } if (!scl) { - printk(KERN_WARNING "%s: SCL unexpected low " - "while pulling SDA low!\n", name); + printk(KERN_WARNING + "%s: SCL unexpected low while pulling SDA low!\n", + name); goto bailout; } @@ -278,8 +281,9 @@ static int test_bus(struct i2c_adapter *i2c_adap) goto bailout; } if (!scl) { - printk(KERN_WARNING "%s: SCL unexpected low " - "while pulling SDA high!\n", name); + printk(KERN_WARNING + "%s: SCL unexpected low while pulling SDA high!\n", + name); goto bailout; } @@ -291,8 +295,9 @@ static int test_bus(struct i2c_adapter *i2c_adap) goto bailout; } if (!sda) { - printk(KERN_WARNING "%s: SDA unexpected low " - "while pulling SCL low!\n", name); + printk(KERN_WARNING + "%s: SDA unexpected low while pulling SCL low!\n", + name); goto bailout; } @@ -304,8 +309,9 @@ static int test_bus(struct i2c_adapter *i2c_adap) goto bailout; } if (!sda) { - printk(KERN_WARNING "%s: SDA unexpected low " - "while pulling SCL high!\n", name); + printk(KERN_WARNING + "%s: SDA unexpected low while pulling SCL high!\n", + name); goto bailout; } @@ -352,8 +358,8 @@ static int try_address(struct i2c_adapter *i2c_adap, i2c_start(adap); } if (i && ret) - bit_dbg(1, &i2c_adap->dev, "Used %d tries to %s client at " - "0x%02x: %s\n", i + 1, + bit_dbg(1, &i2c_adap->dev, + "Used %d tries to %s client at 0x%02x: %s\n", i + 1, addr & 1 ? "read from" : "write to", addr >> 1, ret == 1 ? "success" : "failed, timeout?"); return ret; @@ -442,8 +448,9 @@ static int readbytes(struct i2c_adapter *i2c_adap, struct i2c_msg *msg) if (inval <= 0 || inval > I2C_SMBUS_BLOCK_MAX) { if (!(flags & I2C_M_NO_RD_ACK)) acknak(i2c_adap, 0); - dev_err(&i2c_adap->dev, "readbytes: invalid " - "block length (%d)\n", inval); + dev_err(&i2c_adap->dev, + "readbytes: invalid block length (%d)\n", + inval); return -EPROTO; } /* The original count value accounts for the extra @@ -506,8 +513,8 @@ static int bit_doAddress(struct i2c_adapter *i2c_adap, struct i2c_msg *msg) return -ENXIO; } if (flags & I2C_M_RD) { - bit_dbg(3, &i2c_adap->dev, "emitting repeated " - "start condition\n"); + bit_dbg(3, &i2c_adap->dev, + "emitting repeated start condition\n"); i2c_repstart(adap); /* okay, now switch into reading mode */ addr |= 0x01; @@ -564,8 +571,8 @@ static int bit_xfer(struct i2c_adapter *i2c_adap, } ret = bit_doAddress(i2c_adap, pmsg); if ((ret != 0) && !nak_ok) { - bit_dbg(1, &i2c_adap->dev, "NAK from " - "device addr 0x%02x msg #%d\n", + bit_dbg(1, &i2c_adap->dev, + "NAK from device addr 0x%02x msg #%d\n", msgs[i].addr, i); goto bailout; } From 82fe39a6bc7b866fc3ffd838e3c5a4cadb328b04 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 24 Aug 2018 16:52:44 +0200 Subject: [PATCH 181/242] i2c: refactor function to release a DMA safe buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit a) rename to 'put' instead of 'release' to match 'get' when obtaining the buffer b) change the argument order to have the buffer as first argument c) add a new argument telling the function if the message was transferred. This allows the function to be used also in cases where setting up DMA failed, so the buffer needs to be freed without syncing to the message buffer. Also convert the only user. Signed-off-by: Wolfram Sang Reviewed-by: Niklas Söderlund Signed-off-by: Wolfram Sang --- Documentation/i2c/DMA-considerations | 10 +++++++--- drivers/i2c/busses/i2c-sh_mobile.c | 2 +- drivers/i2c/i2c-core-base.c | 11 ++++++----- include/linux/i2c.h | 2 +- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/Documentation/i2c/DMA-considerations b/Documentation/i2c/DMA-considerations index 966610aa4620..203002054120 100644 --- a/Documentation/i2c/DMA-considerations +++ b/Documentation/i2c/DMA-considerations @@ -50,10 +50,14 @@ bounce buffer. But you don't need to care about that detail, just use the returned buffer. If NULL is returned, the threshold was not met or a bounce buffer could not be allocated. Fall back to PIO in that case. -In any case, a buffer obtained from above needs to be released. It ensures data -is copied back to the message and a potentially used bounce buffer is freed:: +In any case, a buffer obtained from above needs to be released. Another helper +function ensures a potentially used bounce buffer is freed:: - i2c_release_dma_safe_msg_buf(msg, dma_buf); + i2c_put_dma_safe_msg_buf(dma_buf, msg, xferred); + +The last argument 'xferred' controls if the buffer is synced back to the +message or not. No syncing is needed in cases setting up DMA had an error and +there was no data transferred. The bounce buffer handling from the core is generic and simple. It will always allocate a new bounce buffer. If you want a more sophisticated handling (e.g. diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index 439e8778f849..279d0e5bd433 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -507,7 +507,7 @@ static void sh_mobile_i2c_dma_callback(void *data) pd->pos = pd->msg->len; pd->stop_after_dma = true; - i2c_release_dma_safe_msg_buf(pd->msg, pd->dma_buf); + i2c_put_dma_safe_msg_buf(pd->dma_buf, pd->msg, true); iic_set_clr(pd, ICIC, 0, ICIC_TDMAE | ICIC_RDMAE); } diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index f15737763608..9ee9a15e7134 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -2293,21 +2293,22 @@ u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold) EXPORT_SYMBOL_GPL(i2c_get_dma_safe_msg_buf); /** - * i2c_release_dma_safe_msg_buf - release DMA safe buffer and sync with i2c_msg - * @msg: the message to be synced with + * i2c_put_dma_safe_msg_buf - release DMA safe buffer and sync with i2c_msg * @buf: the buffer obtained from i2c_get_dma_safe_msg_buf(). May be NULL. + * @msg: the message which the buffer corresponds to + * @xferred: bool saying if the message was transferred */ -void i2c_release_dma_safe_msg_buf(struct i2c_msg *msg, u8 *buf) +void i2c_put_dma_safe_msg_buf(u8 *buf, struct i2c_msg *msg, bool xferred) { if (!buf || buf == msg->buf) return; - if (msg->flags & I2C_M_RD) + if (xferred && msg->flags & I2C_M_RD) memcpy(msg->buf, buf, msg->len); kfree(buf); } -EXPORT_SYMBOL_GPL(i2c_release_dma_safe_msg_buf); +EXPORT_SYMBOL_GPL(i2c_put_dma_safe_msg_buf); MODULE_AUTHOR("Simon G. Vogl "); MODULE_DESCRIPTION("I2C-Bus main module"); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index b79387fd57da..65b4eaed1d96 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -855,7 +855,7 @@ static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg) } u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold); -void i2c_release_dma_safe_msg_buf(struct i2c_msg *msg, u8 *buf); +void i2c_put_dma_safe_msg_buf(u8 *buf, struct i2c_msg *msg, bool xferred); int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr); /** From 531db50170a3e6d113c968fe7a6dda8d55d02ede Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 24 Aug 2018 16:52:45 +0200 Subject: [PATCH 182/242] i2c: sh_mobile: define start_ch() void as it only returns 0 anyhow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After various refactoring over the years, start_ch() doesn't return errno anymore, so make the function return void. This saves the error handling when calling it which in turn eases cleanup of resources of a future patch. Signed-off-by: Wolfram Sang Reviewed-by: Niklas Söderlund Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-sh_mobile.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index 279d0e5bd433..b965d52338ba 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -602,8 +602,8 @@ static void sh_mobile_i2c_xfer_dma(struct sh_mobile_i2c_data *pd) dma_async_issue_pending(chan); } -static int start_ch(struct sh_mobile_i2c_data *pd, struct i2c_msg *usr_msg, - bool do_init) +static void start_ch(struct sh_mobile_i2c_data *pd, struct i2c_msg *usr_msg, + bool do_init) { if (do_init) { /* Initialize channel registers */ @@ -627,7 +627,6 @@ static int start_ch(struct sh_mobile_i2c_data *pd, struct i2c_msg *usr_msg, /* Enable all interrupts to begin with */ iic_wr(pd, ICIC, ICIC_DTEE | ICIC_WAITE | ICIC_ALE | ICIC_TACKE); - return 0; } static int poll_dte(struct sh_mobile_i2c_data *pd) @@ -698,9 +697,7 @@ static int sh_mobile_i2c_xfer(struct i2c_adapter *adapter, pd->send_stop = i == num - 1 || msg->flags & I2C_M_STOP; pd->stop_after_dma = false; - err = start_ch(pd, msg, do_start); - if (err) - break; + start_ch(pd, msg, do_start); if (do_start) i2c_op(pd, OP_START, 0); From cebc07d84ad71bc58d6f59b770e4347da48a5a2b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 24 Aug 2018 16:52:46 +0200 Subject: [PATCH 183/242] i2c: sh_mobile: fix leak when using DMA bounce buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We only freed the bounce buffer after successful DMA, missing the cases where DMA setup may have gone wrong. Use a better location which always gets called after each message and use 'stop_after_dma' as a flag for a successful transfer. Signed-off-by: Wolfram Sang Reviewed-by: Niklas Söderlund Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-sh_mobile.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index b965d52338ba..818cab14e87c 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -507,8 +507,6 @@ static void sh_mobile_i2c_dma_callback(void *data) pd->pos = pd->msg->len; pd->stop_after_dma = true; - i2c_put_dma_safe_msg_buf(pd->dma_buf, pd->msg, true); - iic_set_clr(pd, ICIC, 0, ICIC_TDMAE | ICIC_RDMAE); } @@ -706,6 +704,10 @@ static int sh_mobile_i2c_xfer(struct i2c_adapter *adapter, timeout = wait_event_timeout(pd->wait, pd->sr & (ICSR_TACK | SW_DONE), adapter->timeout); + + /* 'stop_after_dma' tells if DMA transfer was complete */ + i2c_put_dma_safe_msg_buf(pd->dma_buf, pd->msg, pd->stop_after_dma); + if (!timeout) { dev_err(pd->dev, "Transfer request timed out\n"); if (pd->dma_direction != DMA_NONE) From bded6c03e398dc6e862dc8301fb9a60175740653 Mon Sep 17 00:00:00 2001 From: Akshu Agrawal Date: Tue, 21 Aug 2018 12:21:57 +0530 Subject: [PATCH 184/242] clk: x86: Set default parent to 48Mhz System clk provided in ST soc can be set to: 48Mhz, non-spread 25Mhz, spread To get accurate rate, we need it to set it at non-spread option which is 48Mhz. Signed-off-by: Akshu Agrawal Reviewed-by: Daniel Kurtz Fixes: 421bf6a1f061 ("clk: x86: Add ST oscout platform clock") Signed-off-by: Stephen Boyd --- drivers/clk/x86/clk-st.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/x86/clk-st.c b/drivers/clk/x86/clk-st.c index fb62f3938008..3a0996f2d556 100644 --- a/drivers/clk/x86/clk-st.c +++ b/drivers/clk/x86/clk-st.c @@ -46,7 +46,7 @@ static int st_clk_probe(struct platform_device *pdev) clk_oscout1_parents, ARRAY_SIZE(clk_oscout1_parents), 0, st_data->base + CLKDRVSTR2, OSCOUT1CLK25MHZ, 3, 0, NULL); - clk_set_parent(hws[ST_CLK_MUX]->clk, hws[ST_CLK_25M]->clk); + clk_set_parent(hws[ST_CLK_MUX]->clk, hws[ST_CLK_48M]->clk); hws[ST_CLK_GATE] = clk_hw_register_gate(NULL, "oscout1", "oscout1_mux", 0, st_data->base + MISCCLKCNTL1, OSCCLKENB, From 217c3e0196758662aa0429863b09d1c13da1c5d6 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 31 Aug 2018 07:47:28 +1000 Subject: [PATCH 185/242] disable stringop truncation warnings for now They are too noisy Signed-off-by: Stephen Rothwell Signed-off-by: Linus Torvalds --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 2b458801ba74..a34a9283ee90 100644 --- a/Makefile +++ b/Makefile @@ -807,6 +807,9 @@ KBUILD_CFLAGS += $(call cc-option,-Wdeclaration-after-statement,) # disable pointer signed / unsigned warnings in gcc 4.0 KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign) +# disable stringop warnings in gcc 8+ +KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation) + # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) From 0986b16ab49b18063d29a9e02e9c7fab1928bc8e Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Fri, 24 Aug 2018 12:02:06 +1000 Subject: [PATCH 186/242] m68k/mac: Use correct PMU response format Now that the 68k Mac port has adopted the via-pmu driver, it must decode the PMU response accordingly otherwise the date and time will be wrong. Fixes: ebd722275f9cfc67 ("macintosh/via-pmu: Replace via-pmu68k driver with via-pmu driver") Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- arch/m68k/mac/misc.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c index 3534aa6a4dc2..1b083c500b9a 100644 --- a/arch/m68k/mac/misc.c +++ b/arch/m68k/mac/misc.c @@ -98,11 +98,10 @@ static time64_t pmu_read_time(void) if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0) return 0; - while (!req.complete) - pmu_poll(); + pmu_wait_complete(&req); - time = (u32)((req.reply[1] << 24) | (req.reply[2] << 16) | - (req.reply[3] << 8) | req.reply[4]); + time = (u32)((req.reply[0] << 24) | (req.reply[1] << 16) | + (req.reply[2] << 8) | req.reply[3]); return time - RTC_OFFSET; } @@ -116,8 +115,7 @@ static void pmu_write_time(time64_t time) (data >> 24) & 0xFF, (data >> 16) & 0xFF, (data >> 8) & 0xFF, data & 0xFF) < 0) return; - while (!req.complete) - pmu_poll(); + pmu_wait_complete(&req); } static __u8 pmu_read_pram(int offset) From f52bb98f5aded4c43e52f5ce19fb83f7261e9e73 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 30 Aug 2018 16:05:32 +0100 Subject: [PATCH 187/242] arm64: mm: always enable CONFIG_HOLES_IN_ZONE Commit 6d526ee26ccd ("arm64: mm: enable CONFIG_HOLES_IN_ZONE for NUMA") only enabled HOLES_IN_ZONE for NUMA systems because the NUMA code was choking on the missing zone for nomap pages. This problem doesn't just apply to NUMA systems. If the architecture doesn't set HAVE_ARCH_PFN_VALID, pfn_valid() will return true if the pfn is part of a valid sparsemem section. When working with multiple pages, the mm code uses pfn_valid_within() to test each page it uses within the sparsemem section is valid. On most systems memory comes in MAX_ORDER_NR_PAGES chunks which all have valid/initialised struct pages. In this case pfn_valid_within() is optimised out. Systems where this isn't true (e.g. due to nomap) should set HOLES_IN_ZONE and provide HAVE_ARCH_PFN_VALID so that mm tests each page as it works with it. Currently non-NUMA arm64 systems can't enable HOLES_IN_ZONE, leading to a VM_BUG_ON(): | page:fffffdff802e1780 is uninitialized and poisoned | raw: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff | raw: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff | page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) | ------------[ cut here ]------------ | kernel BUG at include/linux/mm.h:978! | Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [...] | CPU: 1 PID: 25236 Comm: dd Not tainted 4.18.0 #7 | Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 | pstate: 40000085 (nZcv daIf -PAN -UAO) | pc : move_freepages_block+0x144/0x248 | lr : move_freepages_block+0x144/0x248 | sp : fffffe0071177680 [...] | Process dd (pid: 25236, stack limit = 0x0000000094cc07fb) | Call trace: | move_freepages_block+0x144/0x248 | steal_suitable_fallback+0x100/0x16c | get_page_from_freelist+0x440/0xb20 | __alloc_pages_nodemask+0xe8/0x838 | new_slab+0xd4/0x418 | ___slab_alloc.constprop.27+0x380/0x4a8 | __slab_alloc.isra.21.constprop.26+0x24/0x34 | kmem_cache_alloc+0xa8/0x180 | alloc_buffer_head+0x1c/0x90 | alloc_page_buffers+0x68/0xb0 | create_empty_buffers+0x20/0x1ec | create_page_buffers+0xb0/0xf0 | __block_write_begin_int+0xc4/0x564 | __block_write_begin+0x10/0x18 | block_write_begin+0x48/0xd0 | blkdev_write_begin+0x28/0x30 | generic_perform_write+0x98/0x16c | __generic_file_write_iter+0x138/0x168 | blkdev_write_iter+0x80/0xf0 | __vfs_write+0xe4/0x10c | vfs_write+0xb4/0x168 | ksys_write+0x44/0x88 | sys_write+0xc/0x14 | el0_svc_naked+0x30/0x34 | Code: aa1303e0 90001a01 91296421 94008902 (d4210000) | ---[ end trace 1601ba47f6e883fe ]--- Remove the NUMA dependency. Link: https://www.spinics.net/lists/arm-kernel/msg671851.html Cc: Cc: Ard Biesheuvel Reported-by: Mikulas Patocka Reviewed-by: Pavel Tatashin Tested-by: Mikulas Patocka Signed-off-by: James Morse Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 29e75b47becd..1b1a0e95c751 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -763,7 +763,6 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK config HOLES_IN_ZONE def_bool y - depends on NUMA source kernel/Kconfig.hz From 6fb86d97207880c1286cd4cb3a7e6a598afbc727 Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Tue, 28 Aug 2018 12:24:54 +0530 Subject: [PATCH 188/242] cpu/hotplug: Remove skip_onerr field from cpuhp_step structure When notifiers were there, `skip_onerr` was used to avoid calling particular step startup/teardown callbacks in the CPU up/down rollback path, which made the hotplug asymmetric. As notifiers are gone now after the full state machine conversion, the `skip_onerr` field is no longer required. Remove it from the structure and its usage. Signed-off-by: Mukesh Ojha Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1535439294-31426-1-git-send-email-mojha@codeaurora.org --- kernel/cpu.c | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index ed44d7d34c2d..aa7fe85ad62e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -102,8 +102,6 @@ static inline void cpuhp_lock_release(bool bringup) { } * @name: Name of the step * @startup: Startup function of the step * @teardown: Teardown function of the step - * @skip_onerr: Do not invoke the functions on error rollback - * Will go away once the notifiers are gone * @cant_stop: Bringup/teardown can't be stopped at this step */ struct cpuhp_step { @@ -119,7 +117,6 @@ struct cpuhp_step { struct hlist_node *node); } teardown; struct hlist_head list; - bool skip_onerr; bool cant_stop; bool multi_instance; }; @@ -550,12 +547,8 @@ static int bringup_cpu(unsigned int cpu) static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st) { - for (st->state--; st->state > st->target; st->state--) { - struct cpuhp_step *step = cpuhp_get_step(st->state); - - if (!step->skip_onerr) - cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); - } + for (st->state--; st->state > st->target; st->state--) + cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); } static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, @@ -644,12 +637,6 @@ static void cpuhp_thread_fun(unsigned int cpu) WARN_ON_ONCE(!cpuhp_is_ap_state(state)); - if (st->rollback) { - struct cpuhp_step *step = cpuhp_get_step(state); - if (step->skip_onerr) - goto next; - } - if (cpuhp_is_atomic_state(state)) { local_irq_disable(); st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last); @@ -673,7 +660,6 @@ static void cpuhp_thread_fun(unsigned int cpu) st->should_run = false; } -next: cpuhp_lock_release(bringup); if (!st->should_run) @@ -916,12 +902,8 @@ void cpuhp_report_idle_dead(void) static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st) { - for (st->state++; st->state < st->target; st->state++) { - struct cpuhp_step *step = cpuhp_get_step(st->state); - - if (!step->skip_onerr) - cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); - } + for (st->state++; st->state < st->target; st->state++) + cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); } static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, From 0413bedabc886c3a56804d1c80a58e99077b1d91 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 28 Aug 2018 15:10:48 -0500 Subject: [PATCH 189/242] of: Add device_type access helper functions In preparation to remove direct access to device_node.type, add of_node_is_type() and of_node_get_device_type() helpers to check and retrieve the device type. Cc: Frank Rowand Signed-off-by: Rob Herring --- include/linux/of.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/of.h b/include/linux/of.h index 688c52dd7b3e..99b0ebf49632 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -988,6 +988,18 @@ static inline struct device_node *of_find_matching_node( return of_find_matching_node_and_match(from, matches, NULL); } +static inline const char *of_node_get_device_type(const struct device_node *np) +{ + return of_get_property(np, "type", NULL); +} + +static inline bool of_node_is_type(const struct device_node *np, const char *type) +{ + const char *match = of_node_get_device_type(np); + + return np && match && type && !strcmp(match, type); +} + /** * of_property_count_u8_elems - Count the number of u8 elements in a property * From 342db04ae71273322f0011384a9ed414df8bdae4 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 28 Aug 2018 17:49:01 +0200 Subject: [PATCH 190/242] x86/dumpstack: Don't dump kernel memory based on usermode RIP show_opcodes() is used both for dumping kernel instructions and for dumping user instructions. If userspace causes #PF by jumping to a kernel address, show_opcodes() can be reached with regs->ip controlled by the user, pointing to kernel code. Make sure that userspace can't trick us into dumping kernel memory into dmesg. Fixes: 7cccf0725cf7 ("x86/dumpstack: Add a show_ip() function") Signed-off-by: Jann Horn Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Reviewed-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: security@kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180828154901.112726-1-jannh@google.com --- arch/x86/include/asm/stacktrace.h | 2 +- arch/x86/kernel/dumpstack.c | 16 +++++++++++++--- arch/x86/mm/fault.c | 2 +- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index b6dc698f992a..f335aad404a4 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -111,6 +111,6 @@ static inline unsigned long caller_frame_pointer(void) return (unsigned long)frame; } -void show_opcodes(u8 *rip, const char *loglvl); +void show_opcodes(struct pt_regs *regs, const char *loglvl); void show_ip(struct pt_regs *regs, const char *loglvl); #endif /* _ASM_X86_STACKTRACE_H */ diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 1596e6bfea6f..f56895106ccf 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -90,14 +90,24 @@ static void printk_stack_address(unsigned long address, int reliable, * Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random * guesstimate in attempt to achieve all of the above. */ -void show_opcodes(u8 *rip, const char *loglvl) +void show_opcodes(struct pt_regs *regs, const char *loglvl) { #define PROLOGUE_SIZE 42 #define EPILOGUE_SIZE 21 #define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE) u8 opcodes[OPCODE_BUFSIZE]; + unsigned long prologue = regs->ip - PROLOGUE_SIZE; + bool bad_ip; - if (probe_kernel_read(opcodes, rip - PROLOGUE_SIZE, OPCODE_BUFSIZE)) { + /* + * Make sure userspace isn't trying to trick us into dumping kernel + * memory by pointing the userspace instruction pointer at it. + */ + bad_ip = user_mode(regs) && + __chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX); + + if (bad_ip || probe_kernel_read(opcodes, (u8 *)prologue, + OPCODE_BUFSIZE)) { printk("%sCode: Bad RIP value.\n", loglvl); } else { printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %" @@ -113,7 +123,7 @@ void show_ip(struct pt_regs *regs, const char *loglvl) #else printk("%sRIP: %04x:%pS\n", loglvl, (int)regs->cs, (void *)regs->ip); #endif - show_opcodes((u8 *)regs->ip, loglvl); + show_opcodes(regs, loglvl); } void show_iret_regs(struct pt_regs *regs) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b9123c497e0a..47bebfe6efa7 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -837,7 +837,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, printk(KERN_CONT "\n"); - show_opcodes((u8 *)regs->ip, loglvl); + show_opcodes(regs, loglvl); } static void From 829fe4aa9ac16417a904ad1de1307de906854bcf Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 29 Aug 2018 20:43:17 +0100 Subject: [PATCH 191/242] x86: Allow generating user-space headers without a compiler When bootstrapping an architecture, it's usual to generate the kernel's user-space headers (make headers_install) before building a compiler. Move the compiler check (for asm goto support) to the archprepare target so that it is only done when building code for the target. Fixes: e501ce957a78 ("x86: Force asm-goto") Reported-by: Helmut Grohne Signed-off-by: Ben Hutchings Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180829194317.GA4765@decadent.org.uk --- arch/x86/Makefile | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 8fc8f94ef5f5..8f6e7eb8ae9f 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -175,10 +175,6 @@ ifdef CONFIG_FUNCTION_GRAPH_TRACER endif endif -ifndef CC_HAVE_ASM_GOTO - $(error Compiler lacks asm-goto support.) -endif - ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1) # This compiler flag is not supported by Clang: KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,) @@ -300,6 +296,13 @@ PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/x86/entry/vdso $@ +archprepare: checkbin +checkbin: +ifndef CC_HAVE_ASM_GOTO + @echo Compiler lacks asm-goto support. + @exit 1 +endif + archclean: $(Q)rm -rf $(objtree)/arch/i386 $(Q)rm -rf $(objtree)/arch/x86_64 From 4012e77a903d114f915fc607d6d2ed54a3d6c9b1 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 29 Aug 2018 08:47:18 -0700 Subject: [PATCH 192/242] x86/nmi: Fix NMI uaccess race against CR3 switching A NMI can hit in the middle of context switching or in the middle of switch_mm_irqs_off(). In either case, CR3 might not match current->mm, which could cause copy_from_user_nmi() and friends to read the wrong memory. Fix it by adding a new nmi_uaccess_okay() helper and checking it in copy_from_user_nmi() and in __copy_from_user_nmi()'s callers. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Rik van Riel Cc: Nadav Amit Cc: Borislav Petkov Cc: Jann Horn Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/dd956eba16646fd0b15c3c0741269dfd84452dac.1535557289.git.luto@kernel.org --- arch/x86/events/core.c | 2 +- arch/x86/include/asm/tlbflush.h | 40 +++++++++++++++++++++++++++++++++ arch/x86/lib/usercopy.c | 5 +++++ arch/x86/mm/tlb.c | 7 ++++++ 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 5f4829f10129..dfb2f7c0d019 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2465,7 +2465,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs perf_callchain_store(entry, regs->ip); - if (!current->mm) + if (!nmi_uaccess_okay()) return; if (perf_callchain_user32(regs, entry)) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 29c9da6c62fc..58ce5288878e 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -175,8 +175,16 @@ struct tlb_state { * are on. This means that it may not match current->active_mm, * which will contain the previous user mm when we're in lazy TLB * mode even if we've already switched back to swapper_pg_dir. + * + * During switch_mm_irqs_off(), loaded_mm will be set to + * LOADED_MM_SWITCHING during the brief interrupts-off window + * when CR3 and loaded_mm would otherwise be inconsistent. This + * is for nmi_uaccess_okay()'s benefit. */ struct mm_struct *loaded_mm; + +#define LOADED_MM_SWITCHING ((struct mm_struct *)1) + u16 loaded_mm_asid; u16 next_asid; /* last user mm's ctx id */ @@ -246,6 +254,38 @@ struct tlb_state { }; DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); +/* + * Blindly accessing user memory from NMI context can be dangerous + * if we're in the middle of switching the current user task or + * switching the loaded mm. It can also be dangerous if we + * interrupted some kernel code that was temporarily using a + * different mm. + */ +static inline bool nmi_uaccess_okay(void) +{ + struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); + struct mm_struct *current_mm = current->mm; + + VM_WARN_ON_ONCE(!loaded_mm); + + /* + * The condition we want to check is + * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though, + * if we're running in a VM with shadow paging, and nmi_uaccess_okay() + * is supposed to be reasonably fast. + * + * Instead, we check the almost equivalent but somewhat conservative + * condition below, and we rely on the fact that switch_mm_irqs_off() + * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3. + */ + if (loaded_mm != current_mm) + return false; + + VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa())); + + return true; +} + /* Initialize cr4 shadow for this CPU. */ static inline void cr4_init_shadow(void) { diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index c8c6ad0d58b8..3f435d7fca5e 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -7,6 +7,8 @@ #include #include +#include + /* * We rely on the nested NMI work to allow atomic faults from the NMI path; the * nested NMI paths are careful to preserve CR2. @@ -19,6 +21,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) if (__range_not_ok(from, n, TASK_SIZE)) return n; + if (!nmi_uaccess_okay()) + return n; + /* * Even though this function is typically called from NMI/IRQ context * disable pagefaults so that its behaviour is consistent even when diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 9517d1b2a281..e96b99eb800c 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -305,6 +305,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); + /* Let nmi_uaccess_okay() know that we're changing CR3. */ + this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); + barrier(); + if (need_flush) { this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); @@ -335,6 +339,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (next != &init_mm) this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); + /* Make sure we write CR3 before loaded_mm. */ + barrier(); + this_cpu_write(cpu_tlbstate.loaded_mm, next); this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); } From eeb89e2bb1ac45b0836d4170e97a988c3a746c62 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 31 Aug 2018 10:05:38 +0200 Subject: [PATCH 193/242] x86/efi: Load fixmap GDT in efi_call_phys_epilog() When PTI is enabled on x86-32 the kernel uses the GDT mapped in the fixmap for the simple reason that this address is also mapped for user-space. The efi_call_phys_prolog()/efi_call_phys_epilog() wrappers change the GDT to call EFI runtime services and switch back to the kernel GDT when they return. But the switch-back uses the writable GDT, not the fixmap GDT. When that happened and and the CPU returns to user-space it switches to the user %cr3 and tries to restore user segment registers. This fails because the writable GDT is not mapped in the user page-table, and without a GDT the fault handlers also can't be launched. The result is a triple fault and reboot of the machine. Fix that by restoring the GDT back to the fixmap GDT which is also mapped in the user page-table. Fixes: 7757d607c6b3 x86/pti: ('Allow CONFIG_PAGE_TABLE_ISOLATION for x86_32') Reported-by: Guenter Roeck Signed-off-by: Joerg Roedel Signed-off-by: Thomas Gleixner Tested-by: Guenter Roeck Cc: Ard Biesheuvel Cc: Michal Hocko Cc: Andi Kleen Cc: Linus Torvalds Cc: Dave Hansen Cc: Pavel Machek Cc: hpa@zytor.com Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/1535702738-10971-1-git-send-email-joro@8bytes.org --- arch/x86/platform/efi/efi_32.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 324b93328b37..05ca14222463 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -85,14 +85,10 @@ pgd_t * __init efi_call_phys_prolog(void) void __init efi_call_phys_epilog(pgd_t *save_pgd) { - struct desc_ptr gdt_descr; - - gdt_descr.address = (unsigned long)get_cpu_gdt_rw(0); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); - load_cr3(save_pgd); __flush_tlb_all(); + + load_fixmap_gdt(0); } void __init efi_runtime_update_mappings(void) From 3a7ad0634f0986d807772ba74f66f7c3a73612e5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Aug 2018 11:50:12 -0700 Subject: [PATCH 194/242] Revert "packet: switch kvzalloc to allocate memory" This reverts commit 71e41286203c017d24f041a7cd71abea7ca7b1e0. mmap()/munmap() can not be backed by kmalloced pages : We fault in : VM_BUG_ON_PAGE(PageSlab(page), page); unmap_single_vma+0x8a/0x110 unmap_vmas+0x4b/0x90 unmap_region+0xc9/0x140 do_munmap+0x274/0x360 vm_munmap+0x81/0xc0 SyS_munmap+0x2b/0x40 do_syscall_64+0x13e/0x1c0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 Fixes: 71e41286203c ("packet: switch kvzalloc to allocate memory") Signed-off-by: Eric Dumazet Reported-by: John Sperbeck Bisected-by: John Sperbeck Cc: Zhang Yu Cc: Li RongQing Signed-off-by: David S. Miller --- net/packet/af_packet.c | 44 +++++++++++++++++++++++++++++------------- net/packet/internal.h | 1 + 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5610061e7f2e..75c92a87e7b2 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4137,36 +4137,52 @@ static const struct vm_operations_struct packet_mmap_ops = { .close = packet_mm_close, }; -static void free_pg_vec(struct pgv *pg_vec, unsigned int len) +static void free_pg_vec(struct pgv *pg_vec, unsigned int order, + unsigned int len) { int i; for (i = 0; i < len; i++) { if (likely(pg_vec[i].buffer)) { - kvfree(pg_vec[i].buffer); + if (is_vmalloc_addr(pg_vec[i].buffer)) + vfree(pg_vec[i].buffer); + else + free_pages((unsigned long)pg_vec[i].buffer, + order); pg_vec[i].buffer = NULL; } } kfree(pg_vec); } -static char *alloc_one_pg_vec_page(unsigned long size) +static char *alloc_one_pg_vec_page(unsigned long order) { char *buffer; + gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | + __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; - buffer = kvzalloc(size, GFP_KERNEL); + buffer = (char *) __get_free_pages(gfp_flags, order); if (buffer) return buffer; - buffer = kvzalloc(size, GFP_KERNEL | __GFP_RETRY_MAYFAIL); + /* __get_free_pages failed, fall back to vmalloc */ + buffer = vzalloc(array_size((1 << order), PAGE_SIZE)); + if (buffer) + return buffer; - return buffer; + /* vmalloc failed, lets dig into swap here */ + gfp_flags &= ~__GFP_NORETRY; + buffer = (char *) __get_free_pages(gfp_flags, order); + if (buffer) + return buffer; + + /* complete and utter failure */ + return NULL; } -static struct pgv *alloc_pg_vec(struct tpacket_req *req) +static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) { unsigned int block_nr = req->tp_block_nr; - unsigned long size = req->tp_block_size; struct pgv *pg_vec; int i; @@ -4175,7 +4191,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req) goto out; for (i = 0; i < block_nr; i++) { - pg_vec[i].buffer = alloc_one_pg_vec_page(size); + pg_vec[i].buffer = alloc_one_pg_vec_page(order); if (unlikely(!pg_vec[i].buffer)) goto out_free_pgvec; } @@ -4184,7 +4200,7 @@ out: return pg_vec; out_free_pgvec: - free_pg_vec(pg_vec, block_nr); + free_pg_vec(pg_vec, order, block_nr); pg_vec = NULL; goto out; } @@ -4194,9 +4210,9 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, { struct pgv *pg_vec = NULL; struct packet_sock *po = pkt_sk(sk); + int was_running, order = 0; struct packet_ring_buffer *rb; struct sk_buff_head *rb_queue; - int was_running; __be16 num; int err = -EINVAL; /* Added to avoid minimal code churn */ @@ -4258,7 +4274,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, goto out; err = -ENOMEM; - pg_vec = alloc_pg_vec(req); + order = get_order(req->tp_block_size); + pg_vec = alloc_pg_vec(req, order); if (unlikely(!pg_vec)) goto out; switch (po->tp_version) { @@ -4312,6 +4329,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, rb->frame_size = req->tp_frame_size; spin_unlock_bh(&rb_queue->lock); + swap(rb->pg_vec_order, order); swap(rb->pg_vec_len, req->tp_block_nr); rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE; @@ -4337,7 +4355,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } if (pg_vec) - free_pg_vec(pg_vec, req->tp_block_nr); + free_pg_vec(pg_vec, order, req->tp_block_nr); out: return err; } diff --git a/net/packet/internal.h b/net/packet/internal.h index 8f50036f62f0..3bb7c5fb3bff 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -64,6 +64,7 @@ struct packet_ring_buffer { unsigned int frame_size; unsigned int frame_max; + unsigned int pg_vec_order; unsigned int pg_vec_pages; unsigned int pg_vec_len; From 9ad716b95fd6c6be46a4f2d5936e514b5bcd744d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 Aug 2018 12:46:08 -0700 Subject: [PATCH 195/242] nfp: wait for posted reconfigs when disabling the device To avoid leaking a running timer we need to wait for the posted reconfigs after netdev is unregistered. In common case the process of deinitializing the device will perform synchronous reconfigs which wait for posted requests, but especially with VXLAN ports being actively added and removed there can be a race condition leaving a timer running after adapter structure is freed leading to a crash. Add an explicit flush after deregistering and for a good measure a warning to check if timer is running just before structures are freed. Fixes: 3d780b926a12 ("nfp: add async reconfiguration mechanism") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/nfp_net_common.c | 66 ++++++++++++------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index a8b9fbab5f73..253bdaef1505 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -229,6 +229,45 @@ done: spin_unlock_bh(&nn->reconfig_lock); } +static void nfp_net_reconfig_sync_enter(struct nfp_net *nn) +{ + bool cancelled_timer = false; + u32 pre_posted_requests; + + spin_lock_bh(&nn->reconfig_lock); + + nn->reconfig_sync_present = true; + + if (nn->reconfig_timer_active) { + nn->reconfig_timer_active = false; + cancelled_timer = true; + } + pre_posted_requests = nn->reconfig_posted; + nn->reconfig_posted = 0; + + spin_unlock_bh(&nn->reconfig_lock); + + if (cancelled_timer) { + del_timer_sync(&nn->reconfig_timer); + nfp_net_reconfig_wait(nn, nn->reconfig_timer.expires); + } + + /* Run the posted reconfigs which were issued before we started */ + if (pre_posted_requests) { + nfp_net_reconfig_start(nn, pre_posted_requests); + nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT); + } +} + +static void nfp_net_reconfig_wait_posted(struct nfp_net *nn) +{ + nfp_net_reconfig_sync_enter(nn); + + spin_lock_bh(&nn->reconfig_lock); + nn->reconfig_sync_present = false; + spin_unlock_bh(&nn->reconfig_lock); +} + /** * nfp_net_reconfig() - Reconfigure the firmware * @nn: NFP Net device to reconfigure @@ -242,32 +281,9 @@ done: */ int nfp_net_reconfig(struct nfp_net *nn, u32 update) { - bool cancelled_timer = false; - u32 pre_posted_requests; int ret; - spin_lock_bh(&nn->reconfig_lock); - - nn->reconfig_sync_present = true; - - if (nn->reconfig_timer_active) { - del_timer(&nn->reconfig_timer); - nn->reconfig_timer_active = false; - cancelled_timer = true; - } - pre_posted_requests = nn->reconfig_posted; - nn->reconfig_posted = 0; - - spin_unlock_bh(&nn->reconfig_lock); - - if (cancelled_timer) - nfp_net_reconfig_wait(nn, nn->reconfig_timer.expires); - - /* Run the posted reconfigs which were issued before we started */ - if (pre_posted_requests) { - nfp_net_reconfig_start(nn, pre_posted_requests); - nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT); - } + nfp_net_reconfig_sync_enter(nn); nfp_net_reconfig_start(nn, update); ret = nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT); @@ -3633,6 +3649,7 @@ struct nfp_net *nfp_net_alloc(struct pci_dev *pdev, bool needs_netdev, */ void nfp_net_free(struct nfp_net *nn) { + WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted); if (nn->dp.netdev) free_netdev(nn->dp.netdev); else @@ -3920,4 +3937,5 @@ void nfp_net_clean(struct nfp_net *nn) return; unregister_netdev(nn->dp.netdev); + nfp_net_reconfig_wait_posted(nn); } From e04e7a7bbd4bbabef4e1a58367e5fc9b2edc3b10 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 30 Aug 2018 05:42:13 +0000 Subject: [PATCH 196/242] hv_netvsc: Fix a deadlock by getting rtnl lock earlier in netvsc_probe() This patch fixes the race between netvsc_probe() and rndis_set_subchannel(), which can cause a deadlock. These are the related 3 paths which show the deadlock: path #1: Workqueue: hv_vmbus_con vmbus_onmessage_work [hv_vmbus] Call Trace: schedule schedule_preempt_disabled __mutex_lock __device_attach bus_probe_device device_add vmbus_device_register vmbus_onoffer vmbus_onmessage_work process_one_work worker_thread kthread ret_from_fork path #2: schedule schedule_preempt_disabled __mutex_lock netvsc_probe vmbus_probe really_probe __driver_attach bus_for_each_dev driver_attach_async async_run_entry_fn process_one_work worker_thread kthread ret_from_fork path #3: Workqueue: events netvsc_subchan_work [hv_netvsc] Call Trace: schedule rndis_set_subchannel netvsc_subchan_work process_one_work worker_thread kthread ret_from_fork Before path #1 finishes, path #2 can start to run, because just before the "bus_probe_device(dev);" in device_add() in path #1, there is a line "object_uevent(&dev->kobj, KOBJ_ADD);", so systemd-udevd can immediately try to load hv_netvsc and hence path #2 can start to run. Next, path #2 offloads the subchannal's initialization to a workqueue, i.e. path #3, so we can end up in a deadlock situation like this: Path #2 gets the device lock, and is trying to get the rtnl lock; Path #3 gets the rtnl lock and is waiting for all the subchannel messages to be processed; Path #1 is trying to get the device lock, but since #2 is not releasing the device lock, path #1 has to sleep; since the VMBus messages are processed one by one, this means the sub-channel messages can't be procedded, so #3 has to sleep with the rtnl lock held, and finally #2 has to sleep... Now all the 3 paths are sleeping and we hit the deadlock. With the patch, we can make sure #2 gets both the device lock and the rtnl lock together, gets its job done, and releases the locks, so #1 and #3 will not be blocked for ever. Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug") Signed-off-by: Dexuan Cui Cc: Stephen Hemminger Cc: K. Y. Srinivasan Cc: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 1121a1ec407c..70921bbe0e28 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2206,6 +2206,16 @@ static int netvsc_probe(struct hv_device *dev, memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + /* We must get rtnl lock before scheduling nvdev->subchan_work, + * otherwise netvsc_subchan_work() can get rtnl lock first and wait + * all subchannels to show up, but that may not happen because + * netvsc_probe() can't get rtnl lock and as a result vmbus_onoffer() + * -> ... -> device_add() -> ... -> __device_attach() can't get + * the device lock, so all the subchannels can't be processed -- + * finally netvsc_subchan_work() hangs for ever. + */ + rtnl_lock(); + if (nvdev->num_chn > 1) schedule_work(&nvdev->subchan_work); @@ -2224,7 +2234,6 @@ static int netvsc_probe(struct hv_device *dev, else net->max_mtu = ETH_DATA_LEN; - rtnl_lock(); ret = register_netdevice(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); From b0e0b0abbd5e52568e3848b0ba54a9efa3a11547 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 30 Aug 2018 13:30:03 +0200 Subject: [PATCH 197/242] net/rds: RDS is not Radio Data System Getting prompt "The RDS Protocol" (RDS) is not too helpful, and it is easily confused with Radio Data System (which we may want to support in kernel, too). Signed-off-by: Pavel Machek Acked-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Acked-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/Kconfig b/net/rds/Kconfig index 01b3bd6a3708..b9092111bc45 100644 --- a/net/rds/Kconfig +++ b/net/rds/Kconfig @@ -1,6 +1,6 @@ config RDS - tristate "The RDS Protocol" + tristate "The Reliable Datagram Sockets Protocol" depends on INET ---help--- The RDS (Reliable Datagram Sockets) protocol provides reliable, From 63cc357f7bba6729869565a12df08441a5995d9a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 30 Aug 2018 14:24:29 +0200 Subject: [PATCH 198/242] tcp: do not restart timewait timer on rst reception RFC 1337 says: ''Ignore RST segments in TIME-WAIT state. If the 2 minute MSL is enforced, this fix avoids all three hazards.'' So with net.ipv4.tcp_rfc1337=1, expected behaviour is to have TIME-WAIT sk expire rather than removing it instantly when a reset is received. However, Linux will also re-start the TIME-WAIT timer. This causes connect to fail when tying to re-use ports or very long delays (until syn retry interval exceeds MSL). packetdrill test case: // Demonstrate bogus rearming of TIME-WAIT timer in rfc1337 mode. `sysctl net.ipv4.tcp_rfc1337=1` 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 0.000 bind(3, ..., ...) = 0 0.000 listen(3, 1) = 0 0.100 < S 0:0(0) win 29200 0.100 > S. 0:0(0) ack 1 0.200 < . 1:1(0) ack 1 win 257 0.200 accept(3, ..., ...) = 4 // Receive first segment 0.310 < P. 1:1001(1000) ack 1 win 46 // Send one ACK 0.310 > . 1:1(0) ack 1001 // read 1000 byte 0.310 read(4, ..., 1000) = 1000 // Application writes 100 bytes 0.350 write(4, ..., 100) = 100 0.350 > P. 1:101(100) ack 1001 // ACK 0.500 < . 1001:1001(0) ack 101 win 257 // close the connection 0.600 close(4) = 0 0.600 > F. 101:101(0) ack 1001 win 244 // Our side is in FIN_WAIT_1 & waits for ack to fin 0.7 < . 1001:1001(0) ack 102 win 244 // Our side is in FIN_WAIT_2 with no outstanding data. 0.8 < F. 1001:1001(0) ack 102 win 244 0.8 > . 102:102(0) ack 1002 win 244 // Our side is now in TIME_WAIT state, send ack for fin. 0.9 < F. 1002:1002(0) ack 102 win 244 0.9 > . 102:102(0) ack 1002 win 244 // Peer reopens with in-window SYN: 1.000 < S 1000:1000(0) win 9200 // Therefore, reply with ACK. 1.000 > . 102:102(0) ack 1002 win 244 // Peer sends RST for this ACK. Normally this RST results // in tw socket removal, but rfc1337=1 setting prevents this. 1.100 < R 1002:1002(0) win 244 // second syn. Due to rfc1337=1 expect another pure ACK. 31.0 < S 1000:1000(0) win 9200 31.0 > . 102:102(0) ack 1002 win 244 // .. and another RST from peer. 31.1 < R 1002:1002(0) win 244 31.2 `echo no timer restart;ss -m -e -a -i -n -t -o state TIME-WAIT` // third syn after one minute. Time-Wait socket should have expired by now. 63.0 < S 1000:1000(0) win 9200 // so we expect a syn-ack & 3whs to proceed from here on. 63.0 > S. 0:0(0) ack 1 Without this patch, 'ss' shows restarts of tw timer and last packet is thus just another pure ack, more than one minute later. This restores the original code from commit 283fd6cf0be690a83 ("Merge in ANK networking jumbo patch") in netdev-vger-cvs.git . For some reason the else branch was removed/lost in 1f28b683339f7 ("Merge in TCP/UDP optimizations and [..]") and timer restart became unconditional. Reported-by: Michal Tesar Signed-off-by: Florian Westphal Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 75ef332a7caf..12affb7864d9 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -184,8 +184,9 @@ kill: inet_twsk_deschedule_put(tw); return TCP_TW_SUCCESS; } + } else { + inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); } - inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent = tmp_opt.rcv_tsval; From 902b5417f28d955cdb4898df6ffaab15f56c5cff Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 30 Aug 2018 16:01:17 +0200 Subject: [PATCH 199/242] selftests: pmtu: maximum MTU for vti4 is 2^16-1-20 Since commit 82612de1c98e ("ip_tunnel: restore binding to ifaces with a large mtu"), the maximum MTU for vti4 is based on IP_MAX_MTU instead of the mysterious constant 0xFFF8. This makes this selftest fail. Fixes: 82612de1c98e ("ip_tunnel: restore binding to ifaces with a large mtu") Signed-off-by: Sabrina Dubroca Acked-by: Stefano Brivio Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- tools/testing/selftests/net/pmtu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index f8cc38afffa2..0ecf2609b9a4 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -334,7 +334,7 @@ test_pmtu_vti4_link_add_mtu() { fail=0 min=68 - max=$((65528 - 20)) + max=$((65535 - 20)) # Check invalid values first for v in $((min - 1)) $((max + 1)); do ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null From c81c7012e0c769b5704c2b07bd5224965e76fb70 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 30 Aug 2018 16:01:18 +0200 Subject: [PATCH 200/242] selftests: pmtu: detect correct binary to ping ipv6 addresses Some systems don't have the ping6 binary anymore, and use ping for everything. Detect the absence of ping6 and try to use ping instead. Fixes: d1f1b9cbf34c ("selftests: net: Introduce first PMTU test") Signed-off-by: Sabrina Dubroca Acked-by: Stefano Brivio Signed-off-by: David S. Miller --- tools/testing/selftests/net/pmtu.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 0ecf2609b9a4..32a194e3e07a 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -46,6 +46,9 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +# Some systems don't have a ping6 binary anymore +which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) + tests=" pmtu_vti6_exception vti6: PMTU exceptions pmtu_vti4_exception vti4: PMTU exceptions @@ -274,7 +277,7 @@ test_pmtu_vti6_exception() { mtu "${ns_b}" veth_b 4000 mtu "${ns_a}" vti6_a 5000 mtu "${ns_b}" vti6_b 5000 - ${ns_a} ping6 -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null + ${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null # Check that exception was created if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then From f611a5b4a51fa36a0aa792be474f5d6aacaef7e3 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 30 Aug 2018 13:19:53 -0500 Subject: [PATCH 201/242] ibmvnic: Include missing return code checks in reset function Check the return codes of these functions and halt reset in case of failure. The driver will remain in a dormant state until the next reset event, when device initialization will be re-attempted. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index dafdd4ade705..4f0daf67b18d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1823,11 +1823,17 @@ static int do_reset(struct ibmvnic_adapter *adapter, adapter->map_id = 1; release_rx_pools(adapter); release_tx_pools(adapter); - init_rx_pools(netdev); - init_tx_pools(netdev); + rc = init_rx_pools(netdev); + if (rc) + return rc; + rc = init_tx_pools(netdev); + if (rc) + return rc; release_napi(adapter); - init_napi(adapter); + rc = init_napi(adapter); + if (rc) + return rc; } else { rc = reset_tx_pools(adapter); if (rc) From c7486104a5ce7e8763e3cb5157bba8d0f1468d87 Mon Sep 17 00:00:00 2001 From: LuckTony Date: Fri, 31 Aug 2018 09:55:06 -0700 Subject: [PATCH 202/242] x86/mce: Fix set_mce_nospec() to avoid #GP fault The trick with flipping bit 63 to avoid loading the address of the 1:1 mapping of the poisoned page while the 1:1 map is updated used to work when unmapping the page. But it falls down horribly when attempting to directly set the page as uncacheable. The problem is that when the cache mode is changed to uncachable, the pages needs to be flushed from the cache first. But the decoy address is non-canonical due to bit 63 flipped, and the CLFLUSH instruction throws a #GP fault. Add code to change_page_attr_set_clr() to fix the address before calling flush. Fixes: 284ce4011ba6 ("x86/memory_failure: Introduce {set, clear}_mce_nospec()") Suggested-by: Linus Torvalds Signed-off-by: Tony Luck Signed-off-by: Thomas Gleixner Acked-by: Linus Torvalds Cc: Peter Anvin Cc: Borislav Petkov Cc: linux-edac Cc: Dan Williams Cc: Dave Jiang Link: https://lkml.kernel.org/r/20180831165506.GA9605@agluck-desk --- arch/x86/mm/pageattr.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 8d6c34fe49be..51a5a69ecac9 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1420,6 +1420,29 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) return 0; } +/* + * Machine check recovery code needs to change cache mode of poisoned + * pages to UC to avoid speculative access logging another error. But + * passing the address of the 1:1 mapping to set_memory_uc() is a fine + * way to encourage a speculative access. So we cheat and flip the top + * bit of the address. This works fine for the code that updates the + * page tables. But at the end of the process we need to flush the cache + * and the non-canonical address causes a #GP fault when used by the + * CLFLUSH instruction. + * + * But in the common case we already have a canonical address. This code + * will fix the top bit if needed and is a no-op otherwise. + */ +static inline unsigned long make_addr_canonical_again(unsigned long addr) +{ +#ifdef CONFIG_X86_64 + return (long)(addr << 1) >> 1; +#else + return addr; +#endif +} + + static int change_page_attr_set_clr(unsigned long *addr, int numpages, pgprot_t mask_set, pgprot_t mask_clr, int force_split, int in_flag, @@ -1465,7 +1488,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, * Save address for cache flush. *addr is modified in the call * to __change_page_attr_set_clr() below. */ - baddr = *addr; + baddr = make_addr_canonical_again(*addr); } /* Must avoid aliasing mappings in the highmem code */ From c1d0af1a1d5dfde880f588eceb4c00710e0f60ff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Aug 2018 08:07:52 +0200 Subject: [PATCH 203/242] kernel/dma/direct: take DMA offset into account in dma_direct_supported When a device has a DMA offset the dma capable result will change due to the difference between the physical and DMA address. Take that into account. Signed-off-by: Christoph Hellwig Reviewed-by: Benjamin Herrenschmidt Reviewed-by: Robin Murphy --- kernel/dma/direct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 1c35b7b945d0..de87b0282e74 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -168,7 +168,7 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, int dma_direct_supported(struct device *dev, u64 mask) { #ifdef CONFIG_ZONE_DMA - if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) + if (mask < phys_to_dma(dev, DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))) return 0; #else /* @@ -177,7 +177,7 @@ int dma_direct_supported(struct device *dev, u64 mask) * memory, or by providing a ZONE_DMA32. If neither is the case, the * architecture needs to use an IOMMU instead of the direct mapping. */ - if (mask < DMA_BIT_MASK(32)) + if (mask < phys_to_dma(dev, DMA_BIT_MASK(32))) return 0; #endif /* From e78e5a91456fcecaa2efbb3706572fe043766f4d Mon Sep 17 00:00:00 2001 From: Samuel Neves Date: Sat, 1 Sep 2018 21:14:52 +0100 Subject: [PATCH 204/242] x86/vdso: Fix lsl operand order In the __getcpu function, lsl is using the wrong target and destination registers. Luckily, the compiler tends to choose %eax for both variables, so it has been working so far. Fixes: a582c540ac1b ("x86/vdso: Use RDPID in preference to LSL when available") Signed-off-by: Samuel Neves Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180901201452.27828-1-sneves@dei.uc.pt --- arch/x86/include/asm/vgtod.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index fb856c9f0449..53748541c487 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -93,7 +93,7 @@ static inline unsigned int __getcpu(void) * * If RDPID is available, use it. */ - alternative_io ("lsl %[p],%[seg]", + alternative_io ("lsl %[seg],%[p]", ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ X86_FEATURE_RDPID, [p] "=a" (p), [seg] "r" (__PER_CPU_SEG)); From 93bbadd6e0a2a58e49d265b9b1aa58e621b60a26 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 30 Aug 2018 19:11:24 +0300 Subject: [PATCH 205/242] ipv6: don't get lwtstate twice in ip6_rt_copy_init() Commit 80f1a0f4e0cd ("net/ipv6: Put lwtstate when destroying fib6_info") partially fixed the kmemleak [1], lwtstate can be copied from fib6_info, with ip6_rt_copy_init(), and it should be done only once there. rt->dst.lwtstate is set by ip6_rt_init_dst(), at the start of the function ip6_rt_copy_init(), so there is no need to get it again at the end. With this patch, lwtstate also isn't copied from RTF_REJECT routes. [1]: unreferenced object 0xffff880b6aaa14e0 (size 64): comm "ip", pid 10577, jiffies 4295149341 (age 1273.903s) hex dump (first 32 bytes): 01 00 04 00 04 00 00 00 10 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000018664623>] lwtunnel_build_state+0x1bc/0x420 [<00000000b73aa29a>] ip6_route_info_create+0x9f7/0x1fd0 [<00000000ee2c5d1f>] ip6_route_add+0x14/0x70 [<000000008537b55c>] inet6_rtm_newroute+0xd9/0xe0 [<000000002acc50f5>] rtnetlink_rcv_msg+0x66f/0x8e0 [<000000008d9cd381>] netlink_rcv_skb+0x268/0x3b0 [<000000004c893c76>] netlink_unicast+0x417/0x5a0 [<00000000f2ab1afb>] netlink_sendmsg+0x70b/0xc30 [<00000000890ff0aa>] sock_sendmsg+0xb1/0xf0 [<00000000a2e7b66f>] ___sys_sendmsg+0x659/0x950 [<000000001e7426c8>] __sys_sendmsg+0xde/0x170 [<00000000fe411443>] do_syscall_64+0x9f/0x4a0 [<000000001be7b28b>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<000000006d21f353>] 0xffffffffffffffff Fixes: 6edb3c96a5f0 ("net/ipv6: Defer initialization of dst to data path") Signed-off-by: Alexey Kodanev Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c4ea13e8360b..18e00ce1719a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -996,7 +996,6 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort) rt->rt6i_src = ort->fib6_src; #endif rt->rt6i_prefsrc = ort->fib6_prefsrc; - rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate); } static struct fib6_node* fib6_backtrack(struct fib6_node *fn, From 5a7faef72eb9d51487feac467c8c68afa459534c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Aug 2018 11:25:51 +0200 Subject: [PATCH 206/242] sparc: set a default 32-bit dma mask for OF devices This keeps the historic default behavior for devices without a DMA mask, but removes the warning about a lacking DMA mask for doing DMA without a mask. Reported-by: Meelis Roos Signed-off-by: Christoph Hellwig Tested-by: Guenter Roeck --- arch/sparc/kernel/of_device_32.c | 4 ++++ arch/sparc/kernel/of_device_64.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c index 3641a294ed54..e4abe9b8f97a 100644 --- a/arch/sparc/kernel/of_device_32.c +++ b/arch/sparc/kernel/of_device_32.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -381,6 +382,9 @@ static struct platform_device * __init scan_one_device(struct device_node *dp, else dev_set_name(&op->dev, "%08x", dp->phandle); + op->dev.coherent_dma_mask = DMA_BIT_MASK(32); + op->dev.dma_mask = &op->dev.coherent_dma_mask; + if (of_device_register(op)) { printk("%s: Could not register of device.\n", dp->full_name); diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c index 44e4d4435bed..6df6086968c6 100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -675,6 +676,8 @@ static struct platform_device * __init scan_one_device(struct device_node *dp, dev_set_name(&op->dev, "root"); else dev_set_name(&op->dev, "%08x", dp->phandle); + op->dev.coherent_dma_mask = DMA_BIT_MASK(32); + op->dev.dma_mask = &op->dev.coherent_dma_mask; if (of_device_register(op)) { printk("%s: Could not register of device.\n", From 8c89ef7b6b64ba093239305f77a485905d03f7bf Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 31 Aug 2018 16:13:07 +0200 Subject: [PATCH 207/242] of/platform: initialise AMBA default DMA masks This addresses a v4.19-rc1 regression in the PL111 DRM driver in drivers/gpu/pl111/* The driver uses the CMA KMS helpers and will thus at some point call down to dma_alloc_attrs() to allocate a chunk of contigous DMA memory for the framebuffer. It appears that in v4.18, it was OK that this (and other DMA mastering AMBA devices) left dev->coherent_dma_mask blank (zero). In v4.19-rc1 the WARN_ON_ONCE(dev && !dev->coherent_dma_mask) in dma_alloc_attrs() in include/linux/dma-mapping.h is triggered. The allocation later fails when get_coherent_dma_mask() is called from __dma_alloc() and __dma_alloc() returns NULL: drm-clcd-pl111 dev:20: coherent DMA mask is unset drm-clcd-pl111 dev:20: [drm:drm_fb_helper_fbdev_setup] *ERROR* Failed to set fbdev configuration It turns out that in commit 4d8bde883bfb ("OF: Don't set default coherent DMA mask") the OF core stops setting the default DMA mask on new devices, especially those lines of the patch: - if (!dev->coherent_dma_mask) - dev->coherent_dma_mask = DMA_BIT_MASK(32); Robin Murphy solved a similar problem in a5516219b102 ("of/platform: Initialise default DMA masks") by simply assigning dev.coherent_dma_mask and the dev.dma_mask to point to the same when creating devices from the device tree, and introducing the same code into the code path creating AMBA/PrimeCell devices solved my problem, graphics now come up. The code simply assumes that the device can access all of the system memory by setting the coherent DMA mask to 0xffffffff when creating a device from the device tree, which is crude, but seems to be what kernel v4.18 assumed. The AMBA PrimeCells do not differ between coherent and streaming DMA so we can just assign the same to any DMA mask. Possibly drivers should augment their coherent DMA mask in accordance with "dma-ranges" from the device tree if more finegranular masking is needed. Reported-by: Russell King Fixes: 4d8bde883bfb ("OF: Don't set default coherent DMA mask") Cc: Russell King Cc: Robin Murphy Signed-off-by: Linus Walleij Signed-off-by: Christoph Hellwig --- drivers/of/platform.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 7ba90c290a42..6c59673933e9 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -241,6 +241,10 @@ static struct amba_device *of_amba_device_create(struct device_node *node, if (!dev) goto err_clear_flag; + /* AMBA devices only support a single DMA mask */ + dev->dev.coherent_dma_mask = DMA_BIT_MASK(32); + dev->dev.dma_mask = &dev->dev.coherent_dma_mask; + /* setup generic device info */ dev->dev.of_node = of_node_get(node); dev->dev.fwnode = &node->fwnode; From ff924c5a1ec7548825cc2d07980b03be4224ffac Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 1 Sep 2018 21:01:28 -0700 Subject: [PATCH 208/242] x86/pti: Fix section mismatch warning/error Fix the section mismatch warning in arch/x86/mm/pti.c: WARNING: vmlinux.o(.text+0x6972a): Section mismatch in reference from the function pti_clone_pgtable() to the function .init.text:pti_user_pagetable_walk_pte() The function pti_clone_pgtable() references the function __init pti_user_pagetable_walk_pte(). This is often because pti_clone_pgtable lacks a __init annotation or the annotation of pti_user_pagetable_walk_pte is wrong. FATAL: modpost: Section mismatches detected. Fixes: 85900ea51577 ("x86/pti: Map the vsyscall page if needed") Reported-by: kbuild test robot Signed-off-by: Randy Dunlap Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/43a6d6a3-d69d-5eda-da09-0b1c88215a2a@infradead.org --- arch/x86/mm/pti.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 31341ae7309f..c1fc1ae6b429 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -248,7 +248,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) * * Returns a pointer to a PTE on success, or NULL on failure. */ -static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address) +static pte_t *pti_user_pagetable_walk_pte(unsigned long address) { gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); pmd_t *pmd; From 9db39f4d4f94b61e4b64b077f6ddb2bdfb533a88 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Fri, 31 Aug 2018 23:45:16 +0200 Subject: [PATCH 209/242] bpf: Fix bpf_msg_pull_data() Helper bpf_msg_pull_data() mistakenly reuses variable 'offset' while linearizing multiple scatterlist elements. Variable 'offset' is used to find first starting scatterlist element i.e. msg->data = sg_virt(&sg[first_sg]) + start - offset" Use different variable name while linearizing multiple scatterlist elements so that value contained in variable 'offset' won't get overwritten. Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data") Signed-off-by: Tushar Dave Signed-off-by: Daniel Borkmann --- net/core/filter.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 2c7801f6737a..aecdeba052d3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2292,7 +2292,7 @@ static const struct bpf_func_proto bpf_msg_cork_bytes_proto = { BPF_CALL_4(bpf_msg_pull_data, struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags) { - unsigned int len = 0, offset = 0, copy = 0; + unsigned int len = 0, offset = 0, copy = 0, poffset = 0; int bytes = end - start, bytes_sg_total; struct scatterlist *sg = msg->sg_data; int first_sg, last_sg, i, shift; @@ -2348,16 +2348,15 @@ BPF_CALL_4(bpf_msg_pull_data, if (unlikely(!page)) return -ENOMEM; p = page_address(page); - offset = 0; i = first_sg; do { from = sg_virt(&sg[i]); len = sg[i].length; - to = p + offset; + to = p + poffset; memcpy(to, from, len); - offset += len; + poffset += len; sg[i].length = 0; put_page(sg_page(&sg[i])); From 97911e0ccb54c7478b9dac77e6c54c01b449e3a7 Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Fri, 31 Aug 2018 15:32:42 +0900 Subject: [PATCH 210/242] tools/bpf: bpftool, add xskmap in map types When listed all maps, bpftool currently shows (null) for xskmap. Added xskmap type in map_type_name[] to show correct type. Signed-off-by: Prashant Bhole Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index b2ec20e562bd..b455930a3eaf 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -68,6 +68,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_DEVMAP] = "devmap", [BPF_MAP_TYPE_SOCKMAP] = "sockmap", [BPF_MAP_TYPE_CPUMAP] = "cpumap", + [BPF_MAP_TYPE_XSKMAP] = "xskmap", [BPF_MAP_TYPE_SOCKHASH] = "sockhash", [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", }; From 597222f72a94118f593e4f32bf58ae7e049a0df1 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 30 Aug 2018 21:25:02 -0700 Subject: [PATCH 211/242] bpf: avoid misuse of psock when TCP_ULP_BPF collides with another ULP Currently we check sk_user_data is non NULL to determine if the sk exists in a map. However, this is not sufficient to ensure the psock or the ULP ops are not in use by another user, such as kcm or TLS. To avoid this when adding a sock to a map also verify it is of the correct ULP type. Additionally, when releasing a psock verify that it is the TCP_ULP_BPF type before releasing the ULP. The error case where we abort an update due to ULP collision can cause this error path. For example, __sock_map_ctx_update_elem() [...] err = tcp_set_ulp_id(sock, TCP_ULP_BPF) <- collides with TLS if (err) <- so err out here goto out_free [...] out_free: smap_release_sock() <- calling tcp_cleanup_ulp releases the TLS ULP incorrectly. Fixes: 2f857d04601a ("bpf: sockmap, remove STRPARSER map_flags and add multi-map support") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- kernel/bpf/sockmap.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index ce63e5801746..488ef9663c01 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -1462,10 +1462,16 @@ static void smap_destroy_psock(struct rcu_head *rcu) schedule_work(&psock->gc_work); } +static bool psock_is_smap_sk(struct sock *sk) +{ + return inet_csk(sk)->icsk_ulp_ops == &bpf_tcp_ulp_ops; +} + static void smap_release_sock(struct smap_psock *psock, struct sock *sock) { if (refcount_dec_and_test(&psock->refcnt)) { - tcp_cleanup_ulp(sock); + if (psock_is_smap_sk(sock)) + tcp_cleanup_ulp(sock); write_lock_bh(&sock->sk_callback_lock); smap_stop_sock(psock, sock); write_unlock_bh(&sock->sk_callback_lock); @@ -1892,6 +1898,10 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map, * doesn't update user data. */ if (psock) { + if (!psock_is_smap_sk(sock)) { + err = -EBUSY; + goto out_progs; + } if (READ_ONCE(psock->bpf_parse) && parse) { err = -EBUSY; goto out_progs; From 4fb7253e4f9a8f06a986a3b317e2f79d9b43d552 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 29 Aug 2018 18:06:08 +0800 Subject: [PATCH 212/242] igmp: fix incorrect unsolicit report count when join group We should not start timer if im->unsolicit_count equal to 0 after decrease. Or we will send one more unsolicit report message. i.e. 3 instead of 2 by default. Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index cf75f8944b05..deb1f8274d71 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -820,10 +820,9 @@ static void igmp_timer_expire(struct timer_list *t) spin_lock(&im->lock); im->tm_running = 0; - if (im->unsolicit_count) { - im->unsolicit_count--; + if (im->unsolicit_count && --im->unsolicit_count) igmp_start_timer(im, unsolicited_report_interval(in_dev)); - } + im->reporter = 1; spin_unlock(&im->lock); From ff06525fcb8ae3c302ac1319bf6c07c026dea964 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 29 Aug 2018 18:06:10 +0800 Subject: [PATCH 213/242] igmp: fix incorrect unsolicit report count after link down and up After link down and up, i.e. when call ip_mc_up(), we doesn't init im->unsolicit_count. So after igmp_timer_expire(), we will not start timer again and only send one unsolicit report at last. Fix it by initializing im->unsolicit_count in igmp_group_added(), so we can respect igmp robustness value. Fixes: 24803f38a5c0b ("igmp: do not remove igmp souce list info when set link down") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index deb1f8274d71..4da39446da2d 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1307,6 +1307,8 @@ static void igmp_group_added(struct ip_mc_list *im) if (in_dev->dead) return; + + im->unsolicit_count = net->ipv4.sysctl_igmp_qrv; if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) { spin_lock_bh(&im->lock); igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY); @@ -1390,9 +1392,6 @@ static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, unsigned int mode) { struct ip_mc_list *im; -#ifdef CONFIG_IP_MULTICAST - struct net *net = dev_net(in_dev->dev); -#endif ASSERT_RTNL(); @@ -1419,7 +1418,6 @@ static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST timer_setup(&im->timer, igmp_timer_expire, 0); - im->unsolicit_count = net->ipv4.sysctl_igmp_qrv; #endif im->next_rcu = in_dev->mc_list; From 10d7fac4c52618d94a42d701d28f114147291ecc Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 29 Aug 2018 08:00:23 -0700 Subject: [PATCH 214/242] dt-bindings: net: cpsw: Document cpsw-phy-sel usage but prefer phandle The current cpsw usage for cpsw-phy-sel is undocumented but is used for all the boards using cpsw. And cpsw-phy-sel is not really a child of the cpsw device, it lives in the system control module instead. Let's document the existing usage, and improve it a bit where we prefer to use a phandle instead of a child device for it. That way we can properly describe the hardware in dts files for things like genpd. Cc: devicetree@vger.kernel.org Cc: Andrew Lunn Cc: Grygorii Strashko Cc: Ivan Khoronzhuk Cc: Mark Rutland Cc: Murali Karicheri Cc: Rob Herring Signed-off-by: Tony Lindgren Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/cpsw.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt index 41089369f891..b3acebe08eb0 100644 --- a/Documentation/devicetree/bindings/net/cpsw.txt +++ b/Documentation/devicetree/bindings/net/cpsw.txt @@ -19,6 +19,10 @@ Required properties: - slaves : Specifies number for slaves - active_slave : Specifies the slave to use for time stamping, ethtool and SIOCGMIIPHY +- cpsw-phy-sel : Specifies the phandle to the CPSW phy mode selection + device. See also cpsw-phy-sel.txt for it's binding. + Note that in legacy cases cpsw-phy-sel may be + a child device instead of a phandle. Optional properties: - ti,hwmods : Must be "cpgmac0" @@ -75,6 +79,7 @@ Examples: cpts_clock_mult = <0x80000000>; cpts_clock_shift = <29>; syscon = <&cm>; + cpsw-phy-sel = <&phy_sel>; cpsw_emac0: slave@0 { phy_id = <&davinci_mdio>, <0>; phy-mode = "rgmii-txid"; @@ -103,6 +108,7 @@ Examples: cpts_clock_mult = <0x80000000>; cpts_clock_shift = <29>; syscon = <&cm>; + cpsw-phy-sel = <&phy_sel>; cpsw_emac0: slave@0 { phy_id = <&davinci_mdio>, <0>; phy-mode = "rgmii-txid"; From 18eb8aea7fb2fb4490e578b1b8a1096c34b2fc48 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 29 Aug 2018 08:00:24 -0700 Subject: [PATCH 215/242] net: ethernet: cpsw-phy-sel: prefer phandle for phy sel The cpsw-phy-sel device is not a child of the cpsw interconnect target module. It lives in the system control module. Let's fix this issue by trying to use cpsw-phy-sel phandle first if it exists and if not fall back to current usage of trying to find the cpsw-phy-sel child. That way the phy sel driver can be a child of the system control module where it belongs in the device tree. Without this fix, we cannot have a proper interconnect target module hierarchy in device tree for things like genpd. Note that deferred probe is mostly not supported by cpsw and this patch does not attempt to fix that. In case deferred probe support is needed, this could be added to cpsw_slave_open() and phy_connect() so they start handling and returning errors. For documenting it, looks like the cpsw-phy-sel is used for all cpsw device tree nodes. It's missing the related binding documentation, so let's also update the binding documentation accordingly. Cc: devicetree@vger.kernel.org Cc: Andrew Lunn Cc: Grygorii Strashko Cc: Ivan Khoronzhuk Cc: Mark Rutland Cc: Murali Karicheri Cc: Rob Herring Signed-off-by: Tony Lindgren Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw-phy-sel.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c index 0c1adad7415d..396e1cd10667 100644 --- a/drivers/net/ethernet/ti/cpsw-phy-sel.c +++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c @@ -170,10 +170,13 @@ void cpsw_phy_sel(struct device *dev, phy_interface_t phy_mode, int slave) struct device_node *node; struct cpsw_phy_sel_priv *priv; - node = of_get_child_by_name(dev->of_node, "cpsw-phy-sel"); + node = of_parse_phandle(dev->of_node, "cpsw-phy-sel", 0); if (!node) { - dev_err(dev, "Phy mode driver DT not found\n"); - return; + node = of_get_child_by_name(dev->of_node, "cpsw-phy-sel"); + if (!node) { + dev_err(dev, "Phy mode driver DT not found\n"); + return; + } } dev = bus_find_device(&platform_bus_type, NULL, node, match); From 15a81b418e22a9aa4a0504471fdcb0f4ebf69b96 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 30 Aug 2018 14:15:43 -0700 Subject: [PATCH 216/242] net/ipv6: Only update MTU metric if it set Jan reported a regression after an update to 4.18.5. In this case ipv6 default route is setup by systemd-networkd based on data from an RA. The RA contains an MTU of 1492 which is used when the route is first inserted but then systemd-networkd pushes down updates to the default route without the mtu set. Prior to the change to fib6_info, metrics such as MTU were held in the dst_entry and rt6i_pmtu in rt6_info contained an update to the mtu if any. ip6_mtu would look at rt6i_pmtu first and use it if set. If not, the value from the metrics is used if it is set and finally falling back to the idev value. After the fib6_info change metrics are contained in the fib6_info struct and there is no equivalent to rt6i_pmtu. To maintain consistency with the old behavior the new code should only reset the MTU in the metrics if the route update has it set. Fixes: d4ead6b34b67 ("net/ipv6: move metrics from dst to rt6_info") Reported-by: Jan Janssen Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index c861a6d4671d..5516f55e214b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -989,7 +989,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, fib6_clean_expires(iter); else fib6_set_expires(iter, rt->expires); - fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu); + + if (rt->fib6_pmtu) + fib6_metric_set(iter, RTAX_MTU, + rt->fib6_pmtu); return -EEXIST; } /* If we have the same destination and the same metric, From 57361846b52bc686112da6ca5368d11210796804 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Sep 2018 14:37:30 -0700 Subject: [PATCH 217/242] Linux 4.19-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a34a9283ee90..19948e556941 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 19 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Merciless Moray # *DOCUMENTATION* From 16fe10cf92783ed9ceb182d6ea2b8adf5e8ec1b8 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sat, 1 Sep 2018 20:11:05 +0800 Subject: [PATCH 218/242] net: cadence: Fix a sleep-in-atomic-context bug in macb_halt_tx() The kernel module may sleep with holding a spinlock. The function call paths (from bottom to top) in Linux-4.16 are: [FUNC] usleep_range drivers/net/ethernet/cadence/macb_main.c, 648: usleep_range in macb_halt_tx drivers/net/ethernet/cadence/macb_main.c, 730: macb_halt_tx in macb_tx_error_task drivers/net/ethernet/cadence/macb_main.c, 721: _raw_spin_lock_irqsave in macb_tx_error_task To fix this bug, usleep_range() is replaced with udelay(). This bug is found by my static analysis tool DSAC. Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index c6707ea2d751..16e4ef7d7185 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -649,7 +649,7 @@ static int macb_halt_tx(struct macb *bp) if (!(status & MACB_BIT(TGO))) return 0; - usleep_range(10, 250); + udelay(250); } while (time_before(halt_time, timeout)); return -ETIMEDOUT; From 59a03fea131d671a57b8ed3dc446264c61d4b75f Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 1 Sep 2018 21:20:27 +0000 Subject: [PATCH 219/242] uapi: Fix linux/rds.h userspace compilation errors. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include linux/in6.h for struct in6_addr. /usr/include/linux/rds.h:156:18: error: field ‘laddr’ has incomplete type struct in6_addr laddr; ^~~~~ /usr/include/linux/rds.h:157:18: error: field ‘faddr’ has incomplete type struct in6_addr faddr; ^~~~~ /usr/include/linux/rds.h:178:18: error: field ‘laddr’ has incomplete type struct in6_addr laddr; ^~~~~ /usr/include/linux/rds.h:179:18: error: field ‘faddr’ has incomplete type struct in6_addr faddr; ^~~~~ /usr/include/linux/rds.h:198:18: error: field ‘bound_addr’ has incomplete type struct in6_addr bound_addr; ^~~~~~~~~~ /usr/include/linux/rds.h:199:18: error: field ‘connected_addr’ has incomplete type struct in6_addr connected_addr; ^~~~~~~~~~~~~~ /usr/include/linux/rds.h:219:18: error: field ‘local_addr’ has incomplete type struct in6_addr local_addr; ^~~~~~~~~~ /usr/include/linux/rds.h:221:18: error: field ‘peer_addr’ has incomplete type struct in6_addr peer_addr; ^~~~~~~~~ /usr/include/linux/rds.h:245:18: error: field ‘src_addr’ has incomplete type struct in6_addr src_addr; ^~~~~~~~ /usr/include/linux/rds.h:246:18: error: field ‘dst_addr’ has incomplete type struct in6_addr dst_addr; ^~~~~~~~ Fixes: b7ff8b1036f0 ("rds: Extend RDS API for IPv6 support") Signed-off-by: Vinson Lee Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- include/uapi/linux/rds.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index dc520e1a4123..8b73cb603c5f 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -37,6 +37,7 @@ #include #include /* For __kernel_sockaddr_storage. */ +#include /* For struct in6_addr. */ #define RDS_IB_ABI_VERSION 0x301 From 66eb02d839e8495ae6b612e2d09ff599374b80e2 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 31 Aug 2018 01:04:13 +0200 Subject: [PATCH 220/242] mac80211: fix an off-by-one issue in A-MSDU max_subframe computation Initialize 'n' to 2 in order to take into account also the first packet in the estimation of max_subframe limit for a given A-MSDU since frag_tail pointer is NULL when ieee80211_amsdu_aggregate routine analyzes the second frame. Fixes: 6e0456b54545 ("mac80211: add A-MSDU tx support") Signed-off-by: Lorenzo Bianconi Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 6ca0865de945..9b3b069e418a 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3174,7 +3174,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, void *data; bool ret = false; unsigned int orig_len; - int n = 1, nfrags, pad = 0; + int n = 2, nfrags, pad = 0; u16 hdrlen; if (!ieee80211_hw_check(&local->hw, TX_AMSDU)) From 8442938c3a2177ba16043b3a935f2c78266ad399 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 31 Aug 2018 11:10:55 +0300 Subject: [PATCH 221/242] cfg80211: fix a type issue in ieee80211_chandef_to_operating_class() The "chandef->center_freq1" variable is a u32 but "freq" is a u16 so we are truncating away the high bits. I noticed this bug because in commit 9cf0a0b4b64a ("cfg80211: Add support for 60GHz band channels 5 and 6") we made "freq <= 56160 + 2160 * 6" a valid requency when before it was only "freq <= 56160 + 2160 * 4" that was valid. It introduces a static checker warning: net/wireless/util.c:1571 ieee80211_chandef_to_operating_class() warn: always true condition '(freq <= 56160 + 2160 * 6) => (0-u16max <= 69120)' But really we probably shouldn't have been truncating the high bits away to begin with. Signed-off-by: Dan Carpenter Signed-off-by: Johannes Berg --- net/wireless/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 3c654cd7ba56..908bf5b6d89e 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1374,7 +1374,7 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef, u8 *op_class) { u8 vht_opclass; - u16 freq = chandef->center_freq1; + u32 freq = chandef->center_freq1; if (freq >= 2412 && freq <= 2472) { if (chandef->width > NL80211_CHAN_WIDTH_40) From abd76d255d69d70206c01b9cb19ba36a9c1df6a1 Mon Sep 17 00:00:00 2001 From: "Dreyfuss, Haim" Date: Fri, 31 Aug 2018 11:31:04 +0300 Subject: [PATCH 222/242] mac80211: fix WMM TXOP calculation In commit 9236c4523e5b ("mac80211: limit wmm params to comply with ETSI requirements"), we have limited the WMM parameters to comply with 802.11 and ETSI standard. Mistakenly the TXOP value was caluclated wrong. Fix it by taking the minimum between 802.11 to ETSI to make sure we are not violating both. Fixes: e552af058148 ("mac80211: limit wmm params to comply with ETSI requirements") Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/util.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index c80187d6e6bb..93b5bb849ad7 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1151,8 +1151,7 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, qparam->cw_min = max_t(u16, qparam->cw_min, wmm_ac->cw_min); qparam->cw_max = max_t(u16, qparam->cw_max, wmm_ac->cw_max); qparam->aifs = max_t(u8, qparam->aifs, wmm_ac->aifsn); - qparam->txop = !qparam->txop ? wmm_ac->cot / 32 : - min_t(u16, qparam->txop, wmm_ac->cot / 32); + qparam->txop = min_t(u16, qparam->txop, wmm_ac->cot / 32); rcu_read_unlock(); } From f3ffb6c3a28963657eb8b02a795d75f2ebbd5ef4 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 31 Aug 2018 11:31:06 +0300 Subject: [PATCH 223/242] mac80211: fix a race between restart and CSA flows We hit a problem with iwlwifi that was caused by a bug in mac80211. A bug in iwlwifi caused the firwmare to crash in certain cases in channel switch. Because of that bug, drv_pre_channel_switch would fail and trigger the restart flow. Now we had the hw restart worker which runs on the system's workqueue and the csa_connection_drop_work worker that runs on mac80211's workqueue that can run together. This is obviously problematic since the restart work wants to reconfigure the connection, while the csa_connection_drop_work worker does the exact opposite: it tries to disconnect. Fix this by cancelling the csa_connection_drop_work worker in the restart worker. Note that this can sound racy: we could have: driver iface_work CSA_work restart_work +++++++++++++++++++++++++++++++++++++++++++++ | <--drv_cs ---| -CS FAILED--> | | | cancel_work(CSA) schedule | CSA work | | | Race between those 2 But this is not possible because we flush the workqueue in the restart worker before we cancel the CSA worker. That would be bullet proof if we could guarantee that we schedule the CSA worker only from the iface_work which runs on the workqueue (and not on the system's workqueue), but unfortunately we do have an instance in which we schedule the CSA work outside the context of the workqueue (ieee80211_chswitch_done). Note also that we should probably cancel other workers like beacon_connection_loss_work and possibly others for different types of interfaces, at the very least, IBSS should suffer from the exact same problem, but for now, do the minimum to fix the actual bug that was actually experienced and reproduced. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/main.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 27cd64acaf00..66cbddd65b47 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -255,8 +255,27 @@ static void ieee80211_restart_work(struct work_struct *work) flush_work(&local->radar_detected_work); rtnl_lock(); - list_for_each_entry(sdata, &local->interfaces, list) + list_for_each_entry(sdata, &local->interfaces, list) { + /* + * XXX: there may be more work for other vif types and even + * for station mode: a good thing would be to run most of + * the iface type's dependent _stop (ieee80211_mg_stop, + * ieee80211_ibss_stop) etc... + * For now, fix only the specific bug that was seen: race + * between csa_connection_drop_work and us. + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + /* + * This worker is scheduled from the iface worker that + * runs on mac80211's workqueue, so we can't be + * scheduling this worker after the cancel right here. + * The exception is ieee80211_chswitch_done. + * Then we can have a race... + */ + cancel_work_sync(&sdata->u.mgd.csa_connection_drop_work); + } flush_delayed_work(&sdata->dec_tailroom_needed_wk); + } ieee80211_scan_cancel(local); /* make sure any new ROC will consider local->in_reconfig */ From 0007e94355fdb71a1cf5dba0754155cba08f0666 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Fri, 31 Aug 2018 11:31:10 +0300 Subject: [PATCH 224/242] mac80211: Fix station bandwidth setting after channel switch When performing a channel switch flow for a managed interface, the flow did not update the bandwidth of the AP station and the rate scale algorithm. In case of a channel width downgrade, this would result with the rate scale algorithm using a bandwidth that does not match the interface channel configuration. Fix this by updating the AP station bandwidth and rate scaling algorithm before the actual channel change in case of a bandwidth downgrade, or after the actual channel change in case of a bandwidth upgrade. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a59187c016e0..22b699460176 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -978,6 +978,10 @@ static void ieee80211_chswitch_work(struct work_struct *work) */ if (sdata->reserved_chanctx) { + struct ieee80211_supported_band *sband = NULL; + struct sta_info *mgd_sta = NULL; + enum ieee80211_sta_rx_bandwidth bw = IEEE80211_STA_RX_BW_20; + /* * with multi-vif csa driver may call ieee80211_csa_finish() * many times while waiting for other interfaces to use their @@ -986,6 +990,48 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (sdata->reserved_ready) goto out; + if (sdata->vif.bss_conf.chandef.width != + sdata->csa_chandef.width) { + /* + * For managed interface, we need to also update the AP + * station bandwidth and align the rate scale algorithm + * on the bandwidth change. Here we only consider the + * bandwidth of the new channel definition (as channel + * switch flow does not have the full HT/VHT/HE + * information), assuming that if additional changes are + * required they would be done as part of the processing + * of the next beacon from the AP. + */ + switch (sdata->csa_chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + default: + bw = IEEE80211_STA_RX_BW_20; + break; + case NL80211_CHAN_WIDTH_40: + bw = IEEE80211_STA_RX_BW_40; + break; + case NL80211_CHAN_WIDTH_80: + bw = IEEE80211_STA_RX_BW_80; + break; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + bw = IEEE80211_STA_RX_BW_160; + break; + } + + mgd_sta = sta_info_get(sdata, ifmgd->bssid); + sband = + local->hw.wiphy->bands[sdata->csa_chandef.chan->band]; + } + + if (sdata->vif.bss_conf.chandef.width > + sdata->csa_chandef.width) { + mgd_sta->sta.bandwidth = bw; + rate_control_rate_update(local, sband, mgd_sta, + IEEE80211_RC_BW_CHANGED); + } + ret = ieee80211_vif_use_reserved_context(sdata); if (ret) { sdata_info(sdata, @@ -996,6 +1042,13 @@ static void ieee80211_chswitch_work(struct work_struct *work) goto out; } + if (sdata->vif.bss_conf.chandef.width < + sdata->csa_chandef.width) { + mgd_sta->sta.bandwidth = bw; + rate_control_rate_update(local, sband, mgd_sta, + IEEE80211_RC_BW_CHANGED); + } + goto out; } From 6c18b27d6e5c6a7206364eae2b47bc8d8b2fa68f Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 31 Aug 2018 11:31:12 +0300 Subject: [PATCH 225/242] mac80211: don't Tx a deauth frame if the AP forbade Tx If the driver fails to properly prepare for the channel switch, mac80211 will disconnect. If the CSA IE had mode set to 1, it means that the clients are not allowed to send any Tx on the current channel, and that includes the deauthentication frame. Make sure that we don't send the deauthentication frame in this case. In iwlwifi, this caused a failure to flush queues since the firmware already closed the queues after having parsed the CSA IE. Then mac80211 would wait until the deauthentication frame would go out (drv_flush(drop=false)) and that would never happen. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 22b699460176..b046bf95eb3c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1270,6 +1270,16 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, cbss->beacon_interval)); return; drop_connection: + /* + * This is just so that the disconnect flow will know that + * we were trying to switch channel and failed. In case the + * mode is 1 (we are not allowed to Tx), we will know not to + * send a deauthentication frame. Those two fields will be + * reset when the disconnection worker runs. + */ + sdata->vif.csa_active = true; + sdata->csa_block_tx = csa_ie.mode; + ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); mutex_unlock(&local->chanctx_mtx); mutex_unlock(&local->mtx); @@ -2453,6 +2463,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; + bool tx; sdata_lock(sdata); if (!ifmgd->associated) { @@ -2460,6 +2471,8 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) return; } + tx = !sdata->csa_block_tx; + /* AP is probably out of range (or not reachable for another reason) so * remove the bss struct for that AP. */ @@ -2467,7 +2480,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, - true, frame_buf); + tx, frame_buf); mutex_lock(&local->mtx); sdata->vif.csa_active = false; ifmgd->csa_waiting_bcn = false; @@ -2478,7 +2491,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) } mutex_unlock(&local->mtx); - ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, + ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), tx, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); sdata_unlock(sdata); From c6e57b3896fc76299913b8cfd82d853bee8a2c84 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 31 Aug 2018 11:31:13 +0300 Subject: [PATCH 226/242] mac80211: shorten the IBSS debug messages When tracing is enabled, all the debug messages are recorded and must not exceed MAX_MSG_LEN (100) columns. Longer debug messages grant the user with: WARNING: CPU: 3 PID: 32642 at /tmp/wifi-core-20180806094828/src/iwlwifi-stack-dev/net/mac80211/./trace_msg.h:32 trace_event_raw_event_mac80211_msg_event+0xab/0xc0 [mac80211] Workqueue: phy1 ieee80211_iface_work [mac80211] RIP: 0010:trace_event_raw_event_mac80211_msg_event+0xab/0xc0 [mac80211] Call Trace: __sdata_dbg+0xbd/0x120 [mac80211] ieee80211_ibss_rx_queued_mgmt+0x15f/0x510 [mac80211] ieee80211_iface_work+0x21d/0x320 [mac80211] Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 6449a1c2283b..f0f5fedb8caa 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -947,8 +947,8 @@ static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata, if (len < IEEE80211_DEAUTH_FRAME_LEN) return; - ibss_dbg(sdata, "RX DeAuth SA=%pM DA=%pM BSSID=%pM (reason: %d)\n", - mgmt->sa, mgmt->da, mgmt->bssid, reason); + ibss_dbg(sdata, "RX DeAuth SA=%pM DA=%pM\n", mgmt->sa, mgmt->da); + ibss_dbg(sdata, "\tBSSID=%pM (reason: %d)\n", mgmt->bssid, reason); sta_info_destroy_addr(sdata, mgmt->sa); } @@ -966,9 +966,9 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); - ibss_dbg(sdata, - "RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n", - mgmt->sa, mgmt->da, mgmt->bssid, auth_transaction); + ibss_dbg(sdata, "RX Auth SA=%pM DA=%pM\n", mgmt->sa, mgmt->da); + ibss_dbg(sdata, "\tBSSID=%pM (auth_transaction=%d)\n", + mgmt->bssid, auth_transaction); if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) return; @@ -1175,10 +1175,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, rx_timestamp = drv_get_tsf(local, sdata); } - ibss_dbg(sdata, - "RX beacon SA=%pM BSSID=%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", + ibss_dbg(sdata, "RX beacon SA=%pM BSSID=%pM TSF=0x%llx\n", mgmt->sa, mgmt->bssid, - (unsigned long long)rx_timestamp, + (unsigned long long)rx_timestamp); + ibss_dbg(sdata, "\tBCN=0x%llx diff=%lld @%lu\n", (unsigned long long)beacon_timestamp, (unsigned long long)(rx_timestamp - beacon_timestamp), jiffies); @@ -1537,9 +1537,9 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, tx_last_beacon = drv_tx_last_beacon(local); - ibss_dbg(sdata, - "RX ProbeReq SA=%pM DA=%pM BSSID=%pM (tx_last_beacon=%d)\n", - mgmt->sa, mgmt->da, mgmt->bssid, tx_last_beacon); + ibss_dbg(sdata, "RX ProbeReq SA=%pM DA=%pM\n", mgmt->sa, mgmt->da); + ibss_dbg(sdata, "\tBSSID=%pM (tx_last_beacon=%d)\n", + mgmt->bssid, tx_last_beacon); if (!tx_last_beacon && is_multicast_ether_addr(mgmt->da)) return; From 36feaac35405e932ad9c321d7a2db8a7e4a4d7ca Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 31 Aug 2018 16:52:01 +0800 Subject: [PATCH 227/242] ip6_tunnel: respect ttl inherit for ip6tnl man ip-tunnel ttl section says: 0 is a special value meaning that packets inherit the TTL value. IPv4 tunnel respect this in ip_tunnel_xmit(), but IPv6 tunnel has not implement it yet. To make IPv6 behave consistently with IP tunnel, add ipv6 tunnel inherit support. Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5df2a58d945c..419960b0ba16 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1188,7 +1188,15 @@ route_lookup: init_tel_txopt(&opt, encap_limit); ipv6_push_frag_opts(skb, &opt.ops, &proto); } - hop_limit = hop_limit ? : ip6_dst_hoplimit(dst); + + if (hop_limit == 0) { + if (skb->protocol == htons(ETH_P_IP)) + hop_limit = ip_hdr(skb)->ttl; + else if (skb->protocol == htons(ETH_P_IPV6)) + hop_limit = ipv6_hdr(skb)->hop_limit; + else + hop_limit = ip6_dst_hoplimit(dst); + } /* Calculate max headroom for all the headers and adjust * needed_headroom if necessary. From 9fd0e09a4e86499639653243edfcb417a05c5c46 Mon Sep 17 00:00:00 2001 From: Anthony Wong Date: Fri, 31 Aug 2018 20:06:42 +0800 Subject: [PATCH 228/242] r8169: add support for NCube 8168 network card This card identifies itself as: Ethernet controller [0200]: NCube Device [10ff:8168] (rev 06) Subsystem: TP-LINK Technologies Co., Ltd. Device [7470:3468] Adding a new entry to rtl8169_pci_tbl makes the card work. Link: http://launchpad.net/bugs/1788730 Signed-off-by: Anthony Wong Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 1 + include/linux/pci_ids.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index ac306797590e..b08d51bf7a20 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -218,6 +218,7 @@ static const struct pci_device_id rtl8169_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8161), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8167), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 }, + { PCI_DEVICE(PCI_VENDOR_ID_NCUBE, 0x8168), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 }, { PCI_VENDOR_ID_DLINK, 0x4300, PCI_VENDOR_ID_DLINK, 0x4b10, 0, 0, RTL_CFG_1 }, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 99d366cb0e9f..d157983b84cf 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3084,4 +3084,6 @@ #define PCI_VENDOR_ID_OCZ 0x1b85 +#define PCI_VENDOR_ID_NCUBE 0x10ff + #endif /* _LINUX_PCI_IDS_H */ From c48300c92ad9f029f4dcbcf5d71ad880e3acf2fa Mon Sep 17 00:00:00 2001 From: Gleb Fotengauer-Malinovskiy Date: Mon, 3 Sep 2018 20:59:13 +0300 Subject: [PATCH 229/242] vhost: fix VHOST_GET_BACKEND_FEATURES ioctl request definition The _IOC_READ flag fits this ioctl request more because this request actually only writes to, but doesn't read from userspace. See NOTEs in include/uapi/asm-generic/ioctl.h for more information. Fixes: 429711aec282 ("vhost: switch to use new message format") Signed-off-by: Gleb Fotengauer-Malinovskiy Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/uapi/linux/vhost.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index b1e22c40c4b6..84c3de89696a 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -176,7 +176,7 @@ struct vhost_memory { #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) -#define VHOST_GET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x26, __u64) +#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) /* VHOST_NET specific defines */ From c10bbfae3ae43fae1d77e16f05a73474acf514ff Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 3 Sep 2018 10:04:55 +0300 Subject: [PATCH 230/242] net: sched: null actions array pointer before releasing action Currently, tcf_action_delete() nulls actions array pointer after putting and deleting it. However, if tcf_idr_delete_index() returns an error, pointer to action is not set to null. That results it being released second time in error handling code of tca_action_gd(). Kasan error: [ 807.367755] ================================================================== [ 807.375844] BUG: KASAN: use-after-free in tc_setup_cb_call+0x14e/0x250 [ 807.382763] Read of size 8 at addr ffff88033e636000 by task tc/2732 [ 807.391289] CPU: 0 PID: 2732 Comm: tc Tainted: G W 4.19.0-rc1+ #799 [ 807.399542] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 807.407948] Call Trace: [ 807.410763] dump_stack+0x92/0xeb [ 807.414456] print_address_description+0x70/0x360 [ 807.419549] kasan_report+0x14d/0x300 [ 807.423582] ? tc_setup_cb_call+0x14e/0x250 [ 807.428150] tc_setup_cb_call+0x14e/0x250 [ 807.432539] ? nla_put+0x65/0xe0 [ 807.436146] fl_dump+0x394/0x3f0 [cls_flower] [ 807.440890] ? fl_tmplt_dump+0x140/0x140 [cls_flower] [ 807.446327] ? lock_downgrade+0x320/0x320 [ 807.450702] ? lock_acquire+0xe2/0x220 [ 807.454819] ? is_bpf_text_address+0x5/0x140 [ 807.459475] ? memcpy+0x34/0x50 [ 807.462980] ? nla_put+0x65/0xe0 [ 807.466582] tcf_fill_node+0x341/0x430 [ 807.470717] ? tcf_block_put+0xe0/0xe0 [ 807.474859] tcf_node_dump+0xdb/0xf0 [ 807.478821] fl_walk+0x8e/0x170 [cls_flower] [ 807.483474] tcf_chain_dump+0x35a/0x4d0 [ 807.487703] ? tfilter_notify+0x170/0x170 [ 807.492091] ? tcf_fill_node+0x430/0x430 [ 807.496411] tc_dump_tfilter+0x362/0x3f0 [ 807.500712] ? tc_del_tfilter+0x850/0x850 [ 807.505104] ? kasan_unpoison_shadow+0x30/0x40 [ 807.509940] ? __mutex_unlock_slowpath+0xcf/0x410 [ 807.515031] netlink_dump+0x263/0x4f0 [ 807.519077] __netlink_dump_start+0x2a0/0x300 [ 807.523817] ? tc_del_tfilter+0x850/0x850 [ 807.528198] rtnetlink_rcv_msg+0x46a/0x6d0 [ 807.532671] ? rtnl_fdb_del+0x3f0/0x3f0 [ 807.536878] ? tc_del_tfilter+0x850/0x850 [ 807.541280] netlink_rcv_skb+0x18d/0x200 [ 807.545570] ? rtnl_fdb_del+0x3f0/0x3f0 [ 807.549773] ? netlink_ack+0x500/0x500 [ 807.553913] netlink_unicast+0x2d0/0x370 [ 807.558212] ? netlink_attachskb+0x340/0x340 [ 807.562855] ? _copy_from_iter_full+0xe9/0x3e0 [ 807.567677] ? import_iovec+0x11e/0x1c0 [ 807.571890] netlink_sendmsg+0x3b9/0x6a0 [ 807.576192] ? netlink_unicast+0x370/0x370 [ 807.580684] ? netlink_unicast+0x370/0x370 [ 807.585154] sock_sendmsg+0x6b/0x80 [ 807.589015] ___sys_sendmsg+0x4a1/0x520 [ 807.593230] ? copy_msghdr_from_user+0x210/0x210 [ 807.598232] ? do_wp_page+0x174/0x880 [ 807.602276] ? __handle_mm_fault+0x749/0x1c10 [ 807.607021] ? __handle_mm_fault+0x1046/0x1c10 [ 807.611849] ? __pmd_alloc+0x320/0x320 [ 807.615973] ? check_chain_key+0x140/0x1f0 [ 807.620450] ? check_chain_key+0x140/0x1f0 [ 807.624929] ? __fget_light+0xbc/0xd0 [ 807.628970] ? __sys_sendmsg+0xd7/0x150 [ 807.633172] __sys_sendmsg+0xd7/0x150 [ 807.637201] ? __ia32_sys_shutdown+0x30/0x30 [ 807.641846] ? up_read+0x53/0x90 [ 807.645442] ? __do_page_fault+0x484/0x780 [ 807.649949] ? do_syscall_64+0x1e/0x2c0 [ 807.654164] do_syscall_64+0x72/0x2c0 [ 807.658198] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 807.663625] RIP: 0033:0x7f42e9870150 [ 807.667568] Code: 8b 15 3c 7d 2b 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb cd 66 0f 1f 44 00 00 83 3d b9 d5 2b 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be cd 00 00 48 89 04 24 [ 807.687328] RSP: 002b:00007ffdbf595b58 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 807.695564] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f42e9870150 [ 807.703083] RDX: 0000000000000000 RSI: 00007ffdbf595b80 RDI: 0000000000000003 [ 807.710605] RBP: 00007ffdbf599d90 R08: 0000000000679bc0 R09: 000000000000000f [ 807.718127] R10: 00000000000005e7 R11: 0000000000000246 R12: 00007ffdbf599d88 [ 807.725651] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 807.735048] Allocated by task 2687: [ 807.738902] kasan_kmalloc+0xa0/0xd0 [ 807.742852] __kmalloc+0x118/0x2d0 [ 807.746615] tcf_idr_create+0x44/0x320 [ 807.750738] tcf_nat_init+0x41e/0x530 [act_nat] [ 807.755638] tcf_action_init_1+0x4e0/0x650 [ 807.760104] tcf_action_init+0x1ce/0x2d0 [ 807.764395] tcf_exts_validate+0x1d8/0x200 [ 807.768861] fl_change+0x55a/0x26b4 [cls_flower] [ 807.773845] tc_new_tfilter+0x748/0xa20 [ 807.778051] rtnetlink_rcv_msg+0x56a/0x6d0 [ 807.782517] netlink_rcv_skb+0x18d/0x200 [ 807.786804] netlink_unicast+0x2d0/0x370 [ 807.791095] netlink_sendmsg+0x3b9/0x6a0 [ 807.795387] sock_sendmsg+0x6b/0x80 [ 807.799240] ___sys_sendmsg+0x4a1/0x520 [ 807.803445] __sys_sendmsg+0xd7/0x150 [ 807.807473] do_syscall_64+0x72/0x2c0 [ 807.811506] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 807.818776] Freed by task 2728: [ 807.822283] __kasan_slab_free+0x122/0x180 [ 807.826752] kfree+0xf4/0x2f0 [ 807.830080] __tcf_action_put+0x5a/0xb0 [ 807.834281] tcf_action_put_many+0x46/0x70 [ 807.838747] tca_action_gd+0x232/0xc40 [ 807.842862] tc_ctl_action+0x215/0x230 [ 807.846977] rtnetlink_rcv_msg+0x56a/0x6d0 [ 807.851444] netlink_rcv_skb+0x18d/0x200 [ 807.855731] netlink_unicast+0x2d0/0x370 [ 807.860021] netlink_sendmsg+0x3b9/0x6a0 [ 807.864312] sock_sendmsg+0x6b/0x80 [ 807.868166] ___sys_sendmsg+0x4a1/0x520 [ 807.872372] __sys_sendmsg+0xd7/0x150 [ 807.876401] do_syscall_64+0x72/0x2c0 [ 807.880431] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 807.887704] The buggy address belongs to the object at ffff88033e636000 which belongs to the cache kmalloc-256 of size 256 [ 807.900909] The buggy address is located 0 bytes inside of 256-byte region [ffff88033e636000, ffff88033e636100) [ 807.913155] The buggy address belongs to the page: [ 807.918322] page:ffffea000cf98d80 count:1 mapcount:0 mapping:ffff88036f80ee00 index:0x0 compound_mapcount: 0 [ 807.928831] flags: 0x5fff8000008100(slab|head) [ 807.933647] raw: 005fff8000008100 ffffea000db44f00 0000000400000004 ffff88036f80ee00 [ 807.942050] raw: 0000000000000000 0000000080190019 00000001ffffffff 0000000000000000 [ 807.950456] page dumped because: kasan: bad access detected [ 807.958240] Memory state around the buggy address: [ 807.963405] ffff88033e635f00: fc fc fc fc fb fb fb fb fb fb fb fc fc fc fc fb [ 807.971288] ffff88033e635f80: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc [ 807.979166] >ffff88033e636000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 807.994882] ^ [ 807.998477] ffff88033e636080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 808.006352] ffff88033e636100: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb [ 808.014230] ================================================================== [ 808.022108] Disabling lock debugging due to kernel taint Fixes: edfaf94fa705 ("net_sched: improve and refactor tcf_action_put_many()") Signed-off-by: Vlad Buslov Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 316c98bb87e4..e12f8ef7baa4 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1179,6 +1179,7 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[]) struct tcf_idrinfo *idrinfo = a->idrinfo; u32 act_index = a->tcfa_index; + actions[i] = NULL; if (tcf_action_put(a)) { /* last reference, action was deleted concurrently */ module_put(ops->owner); @@ -1190,7 +1191,6 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[]) if (ret < 0) return ret; } - actions[i] = NULL; } return 0; } From bf68066fccb10fce6bbffdda24ee2ae314c9c5b2 Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Mon, 3 Sep 2018 10:26:28 +0300 Subject: [PATCH 231/242] net/ibm/emac: wrong emac_calc_base call was used by typo __emac_calc_base_mr1 was used instead of __emac4_calc_base_mr1 by copy-paste mistake for emac4syn. Fixes: 45d6e545505fd32edb812f085be7de45b6a5c0af ("net/ibm/emac: add 8192 rx/tx fifo size") Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/emac/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 354c0982847b..372664686309 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -494,9 +494,6 @@ static u32 __emac_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_s case 16384: ret |= EMAC_MR1_RFS_16K; break; - case 8192: - ret |= EMAC4_MR1_RFS_8K; - break; case 4096: ret |= EMAC_MR1_RFS_4K; break; @@ -537,6 +534,9 @@ static u32 __emac4_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_ case 16384: ret |= EMAC4_MR1_RFS_16K; break; + case 8192: + ret |= EMAC4_MR1_RFS_8K; + break; case 4096: ret |= EMAC4_MR1_RFS_4K; break; From af8a2b8ba7678b4695f9e854ba9abae1076beabe Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 3 Sep 2018 15:47:10 +0800 Subject: [PATCH 232/242] sctp: fix invalid reference to the index variable of the iterator Now in sctp_apply_peer_addr_params(), if SPP_IPV6_FLOWLABEL flag is set and trans is NULL, it would use trans as the index variable to traverse transport_addr_list, then trans is set as the last transport of it. Later, if SPP_DSCP flag is set, it would enter into the wrong branch as trans is actually an invalid reference. So fix it by using a new index variable to traverse transport_addr_list for both SPP_DSCP and SPP_IPV6_FLOWLABEL flags process. Fixes: 0b0dce7a36fb ("sctp: add spp_ipv6_flowlabel and spp_dscp for sctp_paddrparams") Reported-by: Julia Lawall Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index aa76586a1a1c..a0ccfa4b8220 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2663,14 +2663,15 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, SCTP_FLOWLABEL_VAL_MASK; trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK; } else if (asoc) { - list_for_each_entry(trans, - &asoc->peer.transport_addr_list, + struct sctp_transport *t; + + list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { - if (trans->ipaddr.sa.sa_family != AF_INET6) + if (t->ipaddr.sa.sa_family != AF_INET6) continue; - trans->flowlabel = params->spp_ipv6_flowlabel & - SCTP_FLOWLABEL_VAL_MASK; - trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK; + t->flowlabel = params->spp_ipv6_flowlabel & + SCTP_FLOWLABEL_VAL_MASK; + t->flowlabel |= SCTP_FLOWLABEL_SET_MASK; } asoc->flowlabel = params->spp_ipv6_flowlabel & SCTP_FLOWLABEL_VAL_MASK; @@ -2687,12 +2688,13 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, trans->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK; trans->dscp |= SCTP_DSCP_SET_MASK; } else if (asoc) { - list_for_each_entry(trans, - &asoc->peer.transport_addr_list, + struct sctp_transport *t; + + list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { - trans->dscp = params->spp_dscp & - SCTP_DSCP_VAL_MASK; - trans->dscp |= SCTP_DSCP_SET_MASK; + t->dscp = params->spp_dscp & + SCTP_DSCP_VAL_MASK; + t->dscp |= SCTP_DSCP_SET_MASK; } asoc->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK; asoc->dscp |= SCTP_DSCP_SET_MASK; From 741880e1f2f59b20125dc480765d2546cec66080 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 3 Sep 2018 15:47:11 +0800 Subject: [PATCH 233/242] sctp: not traverse asoc trans list if non-ipv6 trans exists for ipv6_flowlabel When users set params.spp_address and get a trans, ipv6_flowlabel flag should be applied into this trans. But even if this one is not an ipv6 trans, it should not go to apply it into all other transes of the asoc but simply ignore it. Fixes: 0b0dce7a36fb ("sctp: add spp_ipv6_flowlabel and spp_dscp for sctp_paddrparams") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a0ccfa4b8220..f73e9d38d5ba 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2658,10 +2658,12 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, } if (params->spp_flags & SPP_IPV6_FLOWLABEL) { - if (trans && trans->ipaddr.sa.sa_family == AF_INET6) { - trans->flowlabel = params->spp_ipv6_flowlabel & - SCTP_FLOWLABEL_VAL_MASK; - trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK; + if (trans) { + if (trans->ipaddr.sa.sa_family == AF_INET6) { + trans->flowlabel = params->spp_ipv6_flowlabel & + SCTP_FLOWLABEL_VAL_MASK; + trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK; + } } else if (asoc) { struct sctp_transport *t; From 6b95c3e9697254dab0c8eafc6ab9d5e10d2eca4e Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 3 Sep 2018 04:23:17 -0400 Subject: [PATCH 234/242] bnxt_en: Fix firmware signaled resource change logic in open. When the driver detects that resources have changed during open, it should reset the rx and tx rings to 0. This will properly setup the init sequence to initialize the default rings again. We also need to signal the RDMA driver to stop and clear its interrupts. We then call the RoCE driver to restart if a new set of default rings is successfully reserved. Fixes: 25e1acd6b92b ("bnxt_en: Notify firmware about IF state changes.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8bb1e38b1681..6a1baf375ac3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6684,6 +6684,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) hw_resc->resv_rx_rings = 0; hw_resc->resv_hw_ring_grps = 0; hw_resc->resv_vnics = 0; + bp->tx_nr_rings = 0; + bp->rx_nr_rings = 0; } return rc; } @@ -8769,20 +8771,25 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp) if (bp->tx_nr_rings) return 0; + bnxt_ulp_irq_stop(bp); + bnxt_clear_int_mode(bp); rc = bnxt_set_dflt_rings(bp, true); if (rc) { netdev_err(bp->dev, "Not enough rings available.\n"); - return rc; + goto init_dflt_ring_err; } rc = bnxt_init_int_mode(bp); if (rc) - return rc; + goto init_dflt_ring_err; + bp->tx_nr_rings_per_tc = bp->tx_nr_rings; if (bnxt_rfs_supported(bp) && bnxt_rfs_capable(bp)) { bp->flags |= BNXT_FLAG_RFS; bp->dev->features |= NETIF_F_NTUPLE; } - return 0; +init_dflt_ring_err: + bnxt_ulp_irq_restart(bp, rc); + return rc; } int bnxt_restore_pf_fw_resources(struct bnxt *bp) From ad95c27bdb930105f3eea02621bda157caf2862d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 3 Sep 2018 04:23:18 -0400 Subject: [PATCH 235/242] bnxt_en: Clean up unused functions. Remove unused bnxt_subtract_ulp_resources(). Change bnxt_get_max_func_irqs() to static since it is only locally used. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 - drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 15 --------------- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h | 1 - 4 files changed, 1 insertion(+), 18 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6a1baf375ac3..6472ce447f87 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5918,7 +5918,7 @@ void bnxt_set_max_func_cp_rings(struct bnxt *bp, unsigned int max) bp->hw_resc.max_cp_rings = max; } -unsigned int bnxt_get_max_func_irqs(struct bnxt *bp) +static unsigned int bnxt_get_max_func_irqs(struct bnxt *bp) { struct bnxt_hw_resc *hw_resc = &bp->hw_resc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index fefa011320e0..c4c77b9fa77b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1482,7 +1482,6 @@ unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp); void bnxt_set_max_func_stat_ctxs(struct bnxt *bp, unsigned int max); unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp); void bnxt_set_max_func_cp_rings(struct bnxt *bp, unsigned int max); -unsigned int bnxt_get_max_func_irqs(struct bnxt *bp); int bnxt_get_avail_msix(struct bnxt *bp, int num); int bnxt_reserve_rings(struct bnxt *bp); void bnxt_tx_disable(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index c37b2842f972..deac73e8e0f7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -220,21 +220,6 @@ int bnxt_get_ulp_msix_base(struct bnxt *bp) return 0; } -void bnxt_subtract_ulp_resources(struct bnxt *bp, int ulp_id) -{ - ASSERT_RTNL(); - if (bnxt_ulp_registered(bp->edev, ulp_id)) { - struct bnxt_en_dev *edev = bp->edev; - unsigned int msix_req, max; - - msix_req = edev->ulp_tbl[ulp_id].msix_requested; - max = bnxt_get_max_func_cp_rings(bp); - bnxt_set_max_func_cp_rings(bp, max - msix_req); - max = bnxt_get_max_func_stat_ctxs(bp); - bnxt_set_max_func_stat_ctxs(bp, max - 1); - } -} - static int bnxt_send_msg(struct bnxt_en_dev *edev, int ulp_id, struct bnxt_fw_msg *fw_msg) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h index df48ac71729f..d9bea37cd211 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h @@ -90,7 +90,6 @@ static inline bool bnxt_ulp_registered(struct bnxt_en_dev *edev, int ulp_id) int bnxt_get_ulp_msix_num(struct bnxt *bp); int bnxt_get_ulp_msix_base(struct bnxt *bp); -void bnxt_subtract_ulp_resources(struct bnxt *bp, int ulp_id); void bnxt_ulp_stop(struct bnxt *bp); void bnxt_ulp_start(struct bnxt *bp); void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs); From 00fe9c326d2027f2437dea38ef0e82f9d02d94c0 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 3 Sep 2018 04:23:19 -0400 Subject: [PATCH 236/242] bnxt_en: Do not adjust max_cp_rings by the ones used by RDMA. Currently, the driver adjusts the bp->hw_resc.max_cp_rings by the number of MSIX vectors used by RDMA. There is one code path in open that needs to check the true max_cp_rings including any used by RDMA. This code is now checking for the reduced max_cp_rings which will fail when the number of cp rings is very small. To fix this in a clean way, we don't adjust max_cp_rings anymore. Instead, we add a helper bnxt_get_max_func_cp_rings_for_en() to get the reduced max_cp_rings when appropriate. Fixes: ec86f14ea506 ("bnxt_en: Add ULP calls to stop and restart IRQs.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 ++++--- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 7 ++++--- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 5 ----- 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6472ce447f87..cecbb1d1f587 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5913,9 +5913,9 @@ unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp) return bp->hw_resc.max_cp_rings; } -void bnxt_set_max_func_cp_rings(struct bnxt *bp, unsigned int max) +unsigned int bnxt_get_max_func_cp_rings_for_en(struct bnxt *bp) { - bp->hw_resc.max_cp_rings = max; + return bp->hw_resc.max_cp_rings - bnxt_get_ulp_msix_num(bp); } static unsigned int bnxt_get_max_func_irqs(struct bnxt *bp) @@ -8631,7 +8631,8 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx, *max_tx = hw_resc->max_tx_rings; *max_rx = hw_resc->max_rx_rings; - *max_cp = min_t(int, hw_resc->max_irqs, hw_resc->max_cp_rings); + *max_cp = min_t(int, bnxt_get_max_func_cp_rings_for_en(bp), + hw_resc->max_irqs); *max_cp = min_t(int, *max_cp, hw_resc->max_stat_ctxs); max_ring_grps = hw_resc->max_hw_ring_grps; if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index c4c77b9fa77b..bde384630a75 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1481,7 +1481,7 @@ int bnxt_hwrm_set_coal(struct bnxt *); unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp); void bnxt_set_max_func_stat_ctxs(struct bnxt *bp, unsigned int max); unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp); -void bnxt_set_max_func_cp_rings(struct bnxt *bp, unsigned int max); +unsigned int bnxt_get_max_func_cp_rings_for_en(struct bnxt *bp); int bnxt_get_avail_msix(struct bnxt *bp, int num); int bnxt_reserve_rings(struct bnxt *bp); void bnxt_tx_disable(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 6d583bcd2a81..fcd085a9853a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -451,7 +451,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs) bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_RESOURCE_CFG, -1, -1); - vf_cp_rings = hw_resc->max_cp_rings - bp->cp_nr_rings; + vf_cp_rings = bnxt_get_max_func_cp_rings_for_en(bp) - bp->cp_nr_rings; vf_stat_ctx = hw_resc->max_stat_ctxs - bp->num_stat_ctxs; if (bp->flags & BNXT_FLAG_AGG_RINGS) vf_rx_rings = hw_resc->max_rx_rings - bp->rx_nr_rings * 2; @@ -549,7 +549,8 @@ static int bnxt_hwrm_func_cfg(struct bnxt *bp, int num_vfs) max_stat_ctxs = hw_resc->max_stat_ctxs; /* Remaining rings are distributed equally amongs VF's for now */ - vf_cp_rings = (hw_resc->max_cp_rings - bp->cp_nr_rings) / num_vfs; + vf_cp_rings = (bnxt_get_max_func_cp_rings_for_en(bp) - + bp->cp_nr_rings) / num_vfs; vf_stat_ctx = (max_stat_ctxs - bp->num_stat_ctxs) / num_vfs; if (bp->flags & BNXT_FLAG_AGG_RINGS) vf_rx_rings = (hw_resc->max_rx_rings - bp->rx_nr_rings * 2) / @@ -643,7 +644,7 @@ static int bnxt_sriov_enable(struct bnxt *bp, int *num_vfs) */ vfs_supported = *num_vfs; - avail_cp = hw_resc->max_cp_rings - bp->cp_nr_rings; + avail_cp = bnxt_get_max_func_cp_rings_for_en(bp) - bp->cp_nr_rings; avail_stat = hw_resc->max_stat_ctxs - bp->num_stat_ctxs; avail_cp = min_t(int, avail_cp, avail_stat); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index deac73e8e0f7..beee61292d5e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -169,7 +169,6 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id, edev->ulp_tbl[ulp_id].msix_requested = avail_msix; } bnxt_fill_msix_vecs(bp, ent); - bnxt_set_max_func_cp_rings(bp, max_cp_rings - avail_msix); edev->flags |= BNXT_EN_FLAG_MSIX_REQUESTED; return avail_msix; } @@ -178,7 +177,6 @@ static int bnxt_free_msix_vecs(struct bnxt_en_dev *edev, int ulp_id) { struct net_device *dev = edev->net; struct bnxt *bp = netdev_priv(dev); - int max_cp_rings, msix_requested; ASSERT_RTNL(); if (ulp_id != BNXT_ROCE_ULP) @@ -187,9 +185,6 @@ static int bnxt_free_msix_vecs(struct bnxt_en_dev *edev, int ulp_id) if (!(edev->flags & BNXT_EN_FLAG_MSIX_REQUESTED)) return 0; - max_cp_rings = bnxt_get_max_func_cp_rings(bp); - msix_requested = edev->ulp_tbl[ulp_id].msix_requested; - bnxt_set_max_func_cp_rings(bp, max_cp_rings + msix_requested); edev->ulp_tbl[ulp_id].msix_requested = 0; edev->flags &= ~BNXT_EN_FLAG_MSIX_REQUESTED; if (netif_running(dev)) { From 9cc1bf3928b31e515ed15477b3c7eb653d0b3b42 Mon Sep 17 00:00:00 2001 From: Zhenbo Gao Date: Mon, 3 Sep 2018 16:36:45 +0800 Subject: [PATCH 237/242] tipc: correct spelling errors for struct tipc_bc_base's comment Trivial fix for two spelling mistakes. Signed-off-by: Zhenbo Gao Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bcast.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 9ee6cfea56dd..d8026543bf4c 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -51,12 +51,12 @@ const char tipc_bclink_name[] = "broadcast-link"; * struct tipc_bc_base - base structure for keeping broadcast send state * @link: broadcast send link structure * @inputq: data input queue; will only carry SOCK_WAKEUP messages - * @dest: array keeping number of reachable destinations per bearer + * @dests: array keeping number of reachable destinations per bearer * @primary_bearer: a bearer having links to all broadcast destinations, if any * @bcast_support: indicates if primary bearer, if any, supports broadcast * @rcast_support: indicates if all peer nodes support replicast * @rc_ratio: dest count as percentage of cluster size where send method changes - * @bc_threshold: calculated drom rc_ratio; if dests > threshold use broadcast + * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast */ struct tipc_bc_base { struct tipc_link *link; From a484ef3442d2f05fa59edf4f6d14a8169d1b94a6 Mon Sep 17 00:00:00 2001 From: Zhenbo Gao Date: Mon, 3 Sep 2018 16:36:46 +0800 Subject: [PATCH 238/242] tipc: correct spelling errors for tipc_topsrv_queue_evt() comments tipc_conn_queue_evt -> tipc_topsrv_queue_evt tipc_send_work -> tipc_conn_send_work tipc_send_to_sock -> tipc_conn_send_to_sock Signed-off-by: Zhenbo Gao Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/topsrv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index c8e34ef22c30..2627b5d812e9 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -313,8 +313,8 @@ static void tipc_conn_send_work(struct work_struct *work) conn_put(con); } -/* tipc_conn_queue_evt() - interrupt level call from a subscription instance - * The queued work is launched into tipc_send_work()->tipc_send_to_sock() +/* tipc_topsrv_queue_evt() - interrupt level call from a subscription instance + * The queued work is launched into tipc_conn_send_work()->tipc_conn_send_to_sock() */ void tipc_topsrv_queue_evt(struct net *net, int conid, u32 event, struct tipc_event *evt) From 639505d4397b8c654a8e2616f9cb70ece40c83f9 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 3 Sep 2018 18:06:24 +0300 Subject: [PATCH 239/242] net/mlx5: Fix SQ offset in QPs with small RQ Correct the formula for calculating the RQ page remainder, which should be in byte granularity. The result will be non-zero only for RQs smaller than PAGE_SIZE, as an RQ size is a power of 2. Divide this by the SQ stride (MLX5_SEND_WQE_BB) to get the SQ offset in strides granularity. Fixes: d7037ad73daa ("net/mlx5: Fix QP fragmented buffer allocation") Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/wq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 86478a6b99c5..c8c315eb5128 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -139,14 +139,15 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, struct mlx5_wq_ctrl *wq_ctrl) { u32 sq_strides_offset; + u32 rq_pg_remainder; int err; mlx5_fill_fbc(MLX5_GET(qpc, qpc, log_rq_stride) + 4, MLX5_GET(qpc, qpc, log_rq_size), &wq->rq.fbc); - sq_strides_offset = - ((wq->rq.fbc.frag_sz_m1 + 1) % PAGE_SIZE) / MLX5_SEND_WQE_BB; + rq_pg_remainder = mlx5_wq_cyc_get_byte_size(&wq->rq) % PAGE_SIZE; + sq_strides_offset = rq_pg_remainder / MLX5_SEND_WQE_BB; mlx5_fill_fbc_offset(ilog2(MLX5_SEND_WQE_BB), MLX5_GET(qpc, qpc, log_sq_size), From 6d784f1625ea68783cc1fb17de8f6cd3e1660c3f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 3 Sep 2018 11:08:15 -0700 Subject: [PATCH 240/242] act_ife: fix a potential use-after-free Immediately after module_put(), user could delete this module, so e->ops could be already freed before we call e->ops->release(). Fix this by moving module_put() after ops->release(). Fixes: ef6980b6becb ("introduce IFE action") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 196430aefe87..fc412769a1be 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -400,7 +400,6 @@ static void _tcf_ife_cleanup(struct tc_action *a) struct tcf_meta_info *e, *n; list_for_each_entry_safe(e, n, &ife->metalist, metalist) { - module_put(e->ops->owner); list_del(&e->metalist); if (e->metaval) { if (e->ops->release) @@ -408,6 +407,7 @@ static void _tcf_ife_cleanup(struct tc_action *a) else kfree(e->metaval); } + module_put(e->ops->owner); kfree(e); } } From 84cb8eb26cb9ce3c79928094962a475a9d850a53 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 4 Sep 2018 00:44:42 +0300 Subject: [PATCH 241/242] net: sched: action_ife: take reference to meta module Recent refactoring of add_metainfo() caused use_all_metadata() to add metainfo to ife action metalist without taking reference to module. This causes warning in module_put called from ife action cleanup function. Implement add_metainfo_and_get_ops() function that returns with reference to module taken if metainfo was added successfully, and call it from use_all_metadata(), instead of calling __add_metainfo() directly. Example warning: [ 646.344393] WARNING: CPU: 1 PID: 2278 at kernel/module.c:1139 module_put+0x1cb/0x230 [ 646.352437] Modules linked in: act_meta_skbtcindex act_meta_mark act_meta_skbprio act_ife ife veth nfsv3 nfs fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c tun ebtable_filter ebtables ip6table_filter ip6_tables bridge stp llc mlx5_ib ib_uverbs ib_core intel_rapl sb_edac x86_pkg_temp_thermal mlx5_core coretemp kvm_intel kvm nfsd igb irqbypass crct10dif_pclmul devlink crc32_pclmul mei_me joydev ses crc32c_intel enclosure auth_rpcgss i2c_algo_bit ioatdma ptp mei pps_core ghash_clmulni_intel iTCO_wdt iTCO_vendor_support pcspkr dca ipmi_ssif lpc_ich target_core_mod i2c_i801 ipmi_si ipmi_devintf pcc_cpufreq wmi ipmi_msghandler nfs_acl lockd acpi_pad acpi_power_meter grace sunrpc mpt3sas raid_class scsi_transport_sas [ 646.425631] CPU: 1 PID: 2278 Comm: tc Not tainted 4.19.0-rc1+ #799 [ 646.432187] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 646.440595] RIP: 0010:module_put+0x1cb/0x230 [ 646.445238] Code: f3 66 94 02 e8 26 ff fa ff 85 c0 74 11 0f b6 1d 51 30 94 02 80 fb 01 77 60 83 e3 01 74 13 65 ff 0d 3a 83 db 73 e9 2b ff ff ff <0f> 0b e9 00 ff ff ff e8 59 01 fb ff 85 c0 75 e4 48 c7 c2 20 62 6b [ 646.464997] RSP: 0018:ffff880354d37068 EFLAGS: 00010286 [ 646.470599] RAX: 0000000000000000 RBX: ffffffffc0a52518 RCX: ffffffff8c2668db [ 646.478118] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffffffffc0a52518 [ 646.485641] RBP: ffffffffc0a52180 R08: fffffbfff814a4a4 R09: fffffbfff814a4a3 [ 646.493164] R10: ffffffffc0a5251b R11: fffffbfff814a4a4 R12: 1ffff1006a9a6e0d [ 646.500687] R13: 00000000ffffffff R14: ffff880362bab890 R15: dead000000000100 [ 646.508213] FS: 00007f4164c99800(0000) GS:ffff88036fe40000(0000) knlGS:0000000000000000 [ 646.516961] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 646.523080] CR2: 00007f41638b8420 CR3: 0000000351df0004 CR4: 00000000001606e0 [ 646.530595] Call Trace: [ 646.533408] ? find_symbol_in_section+0x260/0x260 [ 646.538509] tcf_ife_cleanup+0x11b/0x200 [act_ife] [ 646.543695] tcf_action_cleanup+0x29/0xa0 [ 646.548078] __tcf_action_put+0x5a/0xb0 [ 646.552289] ? nla_put+0x65/0xe0 [ 646.555889] __tcf_idr_release+0x48/0x60 [ 646.560187] tcf_generic_walker+0x448/0x6b0 [ 646.564764] ? tcf_action_dump_1+0x450/0x450 [ 646.569411] ? __lock_is_held+0x84/0x110 [ 646.573720] ? tcf_ife_walker+0x10c/0x20f [act_ife] [ 646.578982] tca_action_gd+0x972/0xc40 [ 646.583129] ? tca_get_fill.constprop.17+0x250/0x250 [ 646.588471] ? mark_lock+0xcf/0x980 [ 646.592324] ? check_chain_key+0x140/0x1f0 [ 646.596832] ? debug_show_all_locks+0x240/0x240 [ 646.601839] ? memset+0x1f/0x40 [ 646.605350] ? nla_parse+0xca/0x1a0 [ 646.609217] tc_ctl_action+0x215/0x230 [ 646.613339] ? tcf_action_add+0x220/0x220 [ 646.617748] rtnetlink_rcv_msg+0x56a/0x6d0 [ 646.622227] ? rtnl_fdb_del+0x3f0/0x3f0 [ 646.626466] netlink_rcv_skb+0x18d/0x200 [ 646.630752] ? rtnl_fdb_del+0x3f0/0x3f0 [ 646.634959] ? netlink_ack+0x500/0x500 [ 646.639106] netlink_unicast+0x2d0/0x370 [ 646.643409] ? netlink_attachskb+0x340/0x340 [ 646.648050] ? _copy_from_iter_full+0xe9/0x3e0 [ 646.652870] ? import_iovec+0x11e/0x1c0 [ 646.657083] netlink_sendmsg+0x3b9/0x6a0 [ 646.661388] ? netlink_unicast+0x370/0x370 [ 646.665877] ? netlink_unicast+0x370/0x370 [ 646.670351] sock_sendmsg+0x6b/0x80 [ 646.674212] ___sys_sendmsg+0x4a1/0x520 [ 646.678443] ? copy_msghdr_from_user+0x210/0x210 [ 646.683463] ? lock_downgrade+0x320/0x320 [ 646.687849] ? debug_show_all_locks+0x240/0x240 [ 646.692760] ? do_raw_spin_unlock+0xa2/0x130 [ 646.697418] ? _raw_spin_unlock+0x24/0x30 [ 646.701798] ? __handle_mm_fault+0x1819/0x1c10 [ 646.706619] ? __pmd_alloc+0x320/0x320 [ 646.710738] ? debug_show_all_locks+0x240/0x240 [ 646.715649] ? restore_nameidata+0x7b/0xa0 [ 646.720117] ? check_chain_key+0x140/0x1f0 [ 646.724590] ? check_chain_key+0x140/0x1f0 [ 646.729070] ? __fget_light+0xbc/0xd0 [ 646.733121] ? __sys_sendmsg+0xd7/0x150 [ 646.737329] __sys_sendmsg+0xd7/0x150 [ 646.741359] ? __ia32_sys_shutdown+0x30/0x30 [ 646.746003] ? up_read+0x53/0x90 [ 646.749601] ? __do_page_fault+0x484/0x780 [ 646.754105] ? do_syscall_64+0x1e/0x2c0 [ 646.758320] do_syscall_64+0x72/0x2c0 [ 646.762353] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 646.767776] RIP: 0033:0x7f4163872150 [ 646.771713] Code: 8b 15 3c 7d 2b 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb cd 66 0f 1f 44 00 00 83 3d b9 d5 2b 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be cd 00 00 48 89 04 24 [ 646.791474] RSP: 002b:00007ffdef7d6b58 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 646.799721] RAX: ffffffffffffffda RBX: 0000000000000024 RCX: 00007f4163872150 [ 646.807240] RDX: 0000000000000000 RSI: 00007ffdef7d6bd0 RDI: 0000000000000003 [ 646.814760] RBP: 000000005b8b9482 R08: 0000000000000001 R09: 0000000000000000 [ 646.822286] R10: 00000000000005e7 R11: 0000000000000246 R12: 00007ffdef7dad20 [ 646.829807] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000679bc0 [ 646.837360] irq event stamp: 6083 [ 646.841043] hardirqs last enabled at (6081): [] __call_rcu+0x17d/0x500 [ 646.849882] hardirqs last disabled at (6083): [] trace_hardirqs_off_thunk+0x1a/0x1c [ 646.859775] softirqs last enabled at (5968): [] __do_softirq+0x4a1/0x6ee [ 646.868784] softirqs last disabled at (6082): [] tcf_ife_cleanup+0x39/0x200 [act_ife] [ 646.878845] ---[ end trace b1b8c12ffe51e657 ]--- Fixes: 5ffe57da29b3 ("act_ife: fix a potential deadlock") Signed-off-by: Vlad Buslov Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index fc412769a1be..06a3d4801878 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -326,6 +326,20 @@ static int __add_metainfo(const struct tcf_meta_ops *ops, return ret; } +static int add_metainfo_and_get_ops(const struct tcf_meta_ops *ops, + struct tcf_ife_info *ife, u32 metaid, + bool exists) +{ + int ret; + + if (!try_module_get(ops->owner)) + return -ENOENT; + ret = __add_metainfo(ops, ife, metaid, NULL, 0, true, exists); + if (ret) + module_put(ops->owner); + return ret; +} + static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, int len, bool exists) { @@ -349,7 +363,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists) read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = __add_metainfo(o, ife, o->metaid, NULL, 0, true, exists); + rc = add_metainfo_and_get_ops(o, ife, o->metaid, exists); if (rc == 0) installed += 1; } From a33710bdb6b284f8f1e24f1119d167037b374ebb Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 4 Sep 2018 04:23:56 +0200 Subject: [PATCH 242/242] net: phy: sfp: Handle unimplemented hwmon limits and alarms Not all SFPs implement the registers containing sensor limits and alarms. Luckily, there is a bit indicating if they are implemented or not. Add checking for this bit, when deciding if the hwmon attributes should be visible. Fixes: 1323061a018a ("net: phy: sfp: Add HWMON support for module sensors") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 4637d980310e..52fffb98fde9 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -398,7 +398,6 @@ static umode_t sfp_hwmon_is_visible(const void *data, switch (type) { case hwmon_temp: switch (attr) { - case hwmon_temp_input: case hwmon_temp_min_alarm: case hwmon_temp_max_alarm: case hwmon_temp_lcrit_alarm: @@ -407,13 +406,16 @@ static umode_t sfp_hwmon_is_visible(const void *data, case hwmon_temp_max: case hwmon_temp_lcrit: case hwmon_temp_crit: + if (!(sfp->id.ext.enhopts & SFP_ENHOPTS_ALARMWARN)) + return 0; + /* fall through */ + case hwmon_temp_input: return 0444; default: return 0; } case hwmon_in: switch (attr) { - case hwmon_in_input: case hwmon_in_min_alarm: case hwmon_in_max_alarm: case hwmon_in_lcrit_alarm: @@ -422,13 +424,16 @@ static umode_t sfp_hwmon_is_visible(const void *data, case hwmon_in_max: case hwmon_in_lcrit: case hwmon_in_crit: + if (!(sfp->id.ext.enhopts & SFP_ENHOPTS_ALARMWARN)) + return 0; + /* fall through */ + case hwmon_in_input: return 0444; default: return 0; } case hwmon_curr: switch (attr) { - case hwmon_curr_input: case hwmon_curr_min_alarm: case hwmon_curr_max_alarm: case hwmon_curr_lcrit_alarm: @@ -437,6 +442,10 @@ static umode_t sfp_hwmon_is_visible(const void *data, case hwmon_curr_max: case hwmon_curr_lcrit: case hwmon_curr_crit: + if (!(sfp->id.ext.enhopts & SFP_ENHOPTS_ALARMWARN)) + return 0; + /* fall through */ + case hwmon_curr_input: return 0444; default: return 0; @@ -452,7 +461,6 @@ static umode_t sfp_hwmon_is_visible(const void *data, channel == 1) return 0; switch (attr) { - case hwmon_power_input: case hwmon_power_min_alarm: case hwmon_power_max_alarm: case hwmon_power_lcrit_alarm: @@ -461,6 +469,10 @@ static umode_t sfp_hwmon_is_visible(const void *data, case hwmon_power_max: case hwmon_power_lcrit: case hwmon_power_crit: + if (!(sfp->id.ext.enhopts & SFP_ENHOPTS_ALARMWARN)) + return 0; + /* fall through */ + case hwmon_power_input: return 0444; default: return 0;