From 924726888f660b2a86382a5dd051ec9ca1b18190 Mon Sep 17 00:00:00 2001 From: "Leonidas P. Papadakos" Date: Fri, 1 Mar 2019 00:29:23 +0200 Subject: [PATCH 001/116] arm64: dts: rockchip: fix rk3328-roc-cc gmac2io tx/rx_delay The rk3328-roc-cc board exhibits tx stability issues with large packets, as does the rock64 board, which was fixed with this patch https://patchwork.kernel.org/patch/10178969/ A similar patch was merged for the rk3328-roc-cc here https://patchwork.kernel.org/patch/10804863/ but it doesn't include the tx/rx_delay tweaks, and I find that they help with an issue where large transfers would bring the ethernet link down, causing a link reset regularly. Signed-off-by: Leonidas P. Papadakos Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts index 33c44e857247..0e34354b2092 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts @@ -108,8 +108,8 @@ snps,reset-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>; snps,reset-active-low; snps,reset-delays-us = <0 10000 50000>; - tx_delay = <0x25>; - rx_delay = <0x11>; + tx_delay = <0x24>; + rx_delay = <0x18>; status = "okay"; }; From eb523a4960b6d61bfda1229907ea58841f0340ae Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Mon, 18 Feb 2019 15:59:26 -0300 Subject: [PATCH 002/116] arm64: dts: rockchip: add DDC bus on Rock Pi 4 A DDC I2C bus specifier is required for DDC EDID probing to work properly. Fixes: 1b5715c602fda ("arm64: dts: rockchip: add ROCK Pi 4 DTS support") Signed-off-by: Ezequiel Garcia Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts index 4a543f2117d4..844eac939a97 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts @@ -158,6 +158,7 @@ }; &hdmi { + ddc-i2c-bus = <&i2c3>; pinctrl-names = "default"; pinctrl-0 = <&hdmi_cec>; status = "okay"; From 6b2fde3dbfab6ebc45b0cd605e17ca5057ff9a3b Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Sun, 24 Feb 2019 21:51:22 +0000 Subject: [PATCH 003/116] ARM: dts: rockchip: fix rk3288 cpu opp node reference The following error can be seen during boot: of: /cpus/cpu@501: Couldn't find opp node Change cpu nodes to use operating-points-v2 in order to fix this. Fixes: ce76de984649 ("ARM: dts: rockchip: convert rk3288 to operating-points-v2") Cc: stable@vger.kernel.org Signed-off-by: Jonas Karlman Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index ca7d52daa8fb..09868dcee34b 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -70,7 +70,7 @@ compatible = "arm,cortex-a12"; reg = <0x501>; resets = <&cru SRST_CORE1>; - operating-points = <&cpu_opp_table>; + operating-points-v2 = <&cpu_opp_table>; #cooling-cells = <2>; /* min followed by max */ clock-latency = <40000>; clocks = <&cru ARMCLK>; @@ -80,7 +80,7 @@ compatible = "arm,cortex-a12"; reg = <0x502>; resets = <&cru SRST_CORE2>; - operating-points = <&cpu_opp_table>; + operating-points-v2 = <&cpu_opp_table>; #cooling-cells = <2>; /* min followed by max */ clock-latency = <40000>; clocks = <&cru ARMCLK>; @@ -90,7 +90,7 @@ compatible = "arm,cortex-a12"; reg = <0x503>; resets = <&cru SRST_CORE3>; - operating-points = <&cpu_opp_table>; + operating-points-v2 = <&cpu_opp_table>; #cooling-cells = <2>; /* min followed by max */ clock-latency = <40000>; clocks = <&cru ARMCLK>; From a8772e5d826d0f61f8aa9c284b3ab49035d5273d Mon Sep 17 00:00:00 2001 From: Tomohiro Mayama Date: Sun, 10 Mar 2019 01:10:12 +0900 Subject: [PATCH 004/116] arm64: dts: rockchip: Fix vcc_host1_5v GPIO polarity on rk3328-rock64 This patch makes USB ports functioning again. Fixes: 955bebde057e ("arm64: dts: rockchip: add rk3328-rock64 board") Cc: stable@vger.kernel.org Suggested-by: Robin Murphy Signed-off-by: Tomohiro Mayama Tested-by: Katsuhiro Suzuki Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328-rock64.dts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index 2157a528276b..79b4d1d4b5d6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -46,8 +46,7 @@ vcc_host1_5v: vcc_otg_5v: vcc-host1-5v-regulator { compatible = "regulator-fixed"; - enable-active-high; - gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_HIGH>; + gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; pinctrl-0 = <&usb20_host_drv>; regulator-name = "vcc_host1_5v"; From 8dbc4d5ddb59f49cb3e85bccf42a4720b27a6576 Mon Sep 17 00:00:00 2001 From: David Summers Date: Sat, 9 Mar 2019 15:39:21 +0000 Subject: [PATCH 005/116] ARM: dts: rockchip: Fix SD card detection on rk3288-tinker The Problem: On ASUS Tinker Board S, when booting from the eMMC, and there is card in the sd slot, there are constant errors. Also when warm reboot, uboot can not access the sd slot Cause: Identified by Robin Murphy @ ARM. The Card Detect on rk3288 devices is pulled up by vccio-sd; so when the regulator powers this off, card detect gives spurious errors. A second problem, is during power down, vccio-sd apprears to be powered down. This causes a problem when warm rebooting from the sd card. This was identified by Jonas Karlman. History: A common fault on these rk3288 board, which impliment the reference design. When this arose before: http://lists.infradead.org/pipermail/linux-arm-kernel/2014-August/281153.html And Ulf and Jaehoon clearly said this was a broken card detect design, which should be solved via polling Solution: Hence broken-cd is set as a property. This cures the errors. The powering down of vccio-sd during reboot is cured by adding regulator-boot-on. This solutions has been fairly widely reviewed and tested. Fixes: e58c5e739d6f ("ARM: dts: rockchip: move shared tinker-board nodes to a common dtsi") Cc: stable@vger.kernel.org [Heiko: slightly inaccurate fixes but tinker is a sbc (aka like a Pi) where we can hopefully expect people not to rely on overly old stable kernels] Signed-off-by: David Summers Reviewed-by: Jonas Karlman Tested-by: Jonas Karlman Reviewed-by: Robin Murphy Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288-tinker.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3288-tinker.dtsi b/arch/arm/boot/dts/rk3288-tinker.dtsi index aa107ee41b8b..ef653c3209bc 100644 --- a/arch/arm/boot/dts/rk3288-tinker.dtsi +++ b/arch/arm/boot/dts/rk3288-tinker.dtsi @@ -254,6 +254,7 @@ }; vccio_sd: LDO_REG5 { + regulator-boot-on; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; regulator-name = "vccio_sd"; @@ -430,7 +431,7 @@ bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; - card-detect-delay = <200>; + broken-cd; disable-wp; /* wp not hooked up */ pinctrl-names = "default"; pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>; From 6fd8b9780ec1a49ac46e0aaf8775247205e66231 Mon Sep 17 00:00:00 2001 From: Peter Geis Date: Wed, 13 Mar 2019 18:45:36 +0000 Subject: [PATCH 006/116] arm64: dts: rockchip: fix rk3328 rgmii high tx error rate Several rk3328 based boards experience high rgmii tx error rates. This is due to several pins in the rk3328.dtsi rgmii pinmux that are missing a defined pull strength setting. This causes the pinmux driver to default to 2ma (bit mask 00). These pins are only defined in the rk3328.dtsi, and are not listed in the rk3328 specification. The TRM only lists them as "Reserved" (RK3328 TRM V1.1, 3.3.3 Detail Register Description, GRF_GPIO0B_IOMUX, GRF_GPIO0C_IOMUX, GRF_GPIO0D_IOMUX). However, removal of these pins from the rgmii pinmux definition causes the interface to fail to transmit. Also, the rgmii tx and rx pins defined in the dtsi are not consistent with the rk3328 specification, with tx pins currently set to 12ma and rx pins set to 2ma. Fix this by setting tx pins to 8ma and the rx pins to 4ma, consistent with the specification. Defining the drive strength for the undefined pins eliminated the high tx packet error rate observed under heavy data transfers. Aligning the drive strength to the TRM values eliminated the occasional packet retry errors under iperf3 testing. This allows much higher data rates with no recorded tx errors. Tested on the rk3328-roc-cc board. Fixes: 52e02d377a72 ("arm64: dts: rockchip: add core dtsi file for RK3328 SoCs") Cc: stable@vger.kernel.org Signed-off-by: Peter Geis Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 44 ++++++++++++------------ 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 84f14b132e8f..c55a3f1a87ff 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -1642,50 +1642,50 @@ rgmiim1_pins: rgmiim1-pins { rockchip,pins = /* mac_txclk */ - <1 RK_PB4 2 &pcfg_pull_none_12ma>, + <1 RK_PB4 2 &pcfg_pull_none_8ma>, /* mac_rxclk */ - <1 RK_PB5 2 &pcfg_pull_none_2ma>, + <1 RK_PB5 2 &pcfg_pull_none_4ma>, /* mac_mdio */ - <1 RK_PC3 2 &pcfg_pull_none_2ma>, + <1 RK_PC3 2 &pcfg_pull_none_4ma>, /* mac_txen */ - <1 RK_PD1 2 &pcfg_pull_none_12ma>, + <1 RK_PD1 2 &pcfg_pull_none_8ma>, /* mac_clk */ - <1 RK_PC5 2 &pcfg_pull_none_2ma>, + <1 RK_PC5 2 &pcfg_pull_none_4ma>, /* mac_rxdv */ - <1 RK_PC6 2 &pcfg_pull_none_2ma>, + <1 RK_PC6 2 &pcfg_pull_none_4ma>, /* mac_mdc */ - <1 RK_PC7 2 &pcfg_pull_none_2ma>, + <1 RK_PC7 2 &pcfg_pull_none_4ma>, /* mac_rxd1 */ - <1 RK_PB2 2 &pcfg_pull_none_2ma>, + <1 RK_PB2 2 &pcfg_pull_none_4ma>, /* mac_rxd0 */ - <1 RK_PB3 2 &pcfg_pull_none_2ma>, + <1 RK_PB3 2 &pcfg_pull_none_4ma>, /* mac_txd1 */ - <1 RK_PB0 2 &pcfg_pull_none_12ma>, + <1 RK_PB0 2 &pcfg_pull_none_8ma>, /* mac_txd0 */ - <1 RK_PB1 2 &pcfg_pull_none_12ma>, + <1 RK_PB1 2 &pcfg_pull_none_8ma>, /* mac_rxd3 */ - <1 RK_PB6 2 &pcfg_pull_none_2ma>, + <1 RK_PB6 2 &pcfg_pull_none_4ma>, /* mac_rxd2 */ - <1 RK_PB7 2 &pcfg_pull_none_2ma>, + <1 RK_PB7 2 &pcfg_pull_none_4ma>, /* mac_txd3 */ - <1 RK_PC0 2 &pcfg_pull_none_12ma>, + <1 RK_PC0 2 &pcfg_pull_none_8ma>, /* mac_txd2 */ - <1 RK_PC1 2 &pcfg_pull_none_12ma>, + <1 RK_PC1 2 &pcfg_pull_none_8ma>, /* mac_txclk */ - <0 RK_PB0 1 &pcfg_pull_none>, + <0 RK_PB0 1 &pcfg_pull_none_8ma>, /* mac_txen */ - <0 RK_PB4 1 &pcfg_pull_none>, + <0 RK_PB4 1 &pcfg_pull_none_8ma>, /* mac_clk */ - <0 RK_PD0 1 &pcfg_pull_none>, + <0 RK_PD0 1 &pcfg_pull_none_4ma>, /* mac_txd1 */ - <0 RK_PC0 1 &pcfg_pull_none>, + <0 RK_PC0 1 &pcfg_pull_none_8ma>, /* mac_txd0 */ - <0 RK_PC1 1 &pcfg_pull_none>, + <0 RK_PC1 1 &pcfg_pull_none_8ma>, /* mac_txd3 */ - <0 RK_PC7 1 &pcfg_pull_none>, + <0 RK_PC7 1 &pcfg_pull_none_8ma>, /* mac_txd2 */ - <0 RK_PC6 1 &pcfg_pull_none>; + <0 RK_PC6 1 &pcfg_pull_none_8ma>; }; rmiim1_pins: rmiim1-pins { From 09f91381fa5de1d44bc323d8bf345f5d57b3d9b5 Mon Sep 17 00:00:00 2001 From: Peter Geis Date: Wed, 13 Mar 2019 19:02:30 +0000 Subject: [PATCH 007/116] arm64: dts: rockchip: fix rk3328 sdmmc0 write errors Various rk3328 based boards experience occasional sdmmc0 write errors. This is due to the rk3328.dtsi tx drive levels being set to 4ma, vs 8ma per the rk3328 datasheet default settings. Fix this by setting the tx signal pins to 8ma. Inspiration from tonymac32's patch, https://github.com/ayufan-rock64/linux-kernel/commit/dc1212b347e0da17c5460bcc0a56b07d02bac3f8 Fixes issues on the rk3328-roc-cc and the rk3328-rock64 (as per the above commit message). Tested on the rk3328-roc-cc board. Fixes: 52e02d377a72 ("arm64: dts: rockchip: add core dtsi file for RK3328 SoCs") Cc: stable@vger.kernel.org Signed-off-by: Peter Geis Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index c55a3f1a87ff..dabef1a21649 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -1445,11 +1445,11 @@ sdmmc0 { sdmmc0_clk: sdmmc0-clk { - rockchip,pins = <1 RK_PA6 1 &pcfg_pull_none_4ma>; + rockchip,pins = <1 RK_PA6 1 &pcfg_pull_none_8ma>; }; sdmmc0_cmd: sdmmc0-cmd { - rockchip,pins = <1 RK_PA4 1 &pcfg_pull_up_4ma>; + rockchip,pins = <1 RK_PA4 1 &pcfg_pull_up_8ma>; }; sdmmc0_dectn: sdmmc0-dectn { @@ -1461,14 +1461,14 @@ }; sdmmc0_bus1: sdmmc0-bus1 { - rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_4ma>; + rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_8ma>; }; sdmmc0_bus4: sdmmc0-bus4 { - rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_4ma>, - <1 RK_PA1 1 &pcfg_pull_up_4ma>, - <1 RK_PA2 1 &pcfg_pull_up_4ma>, - <1 RK_PA3 1 &pcfg_pull_up_4ma>; + rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_8ma>, + <1 RK_PA1 1 &pcfg_pull_up_8ma>, + <1 RK_PA2 1 &pcfg_pull_up_8ma>, + <1 RK_PA3 1 &pcfg_pull_up_8ma>; }; sdmmc0_gpio: sdmmc0-gpio { From d6752e185c3168771787a02dc6a55f32260943cc Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 15 Mar 2019 11:51:12 -0700 Subject: [PATCH 008/116] rtc: cros-ec: Fail suspend/resume if wake IRQ can't be configured If we encounter a failure during suspend where this RTC was programmed to wakeup the system from suspend, but that wakeup couldn't be configured because the system didn't support wakeup interrupts, we'll run into the following warning: Unbalanced IRQ 166 wake disable WARNING: CPU: 7 PID: 3071 at kernel/irq/manage.c:669 irq_set_irq_wake+0x108/0x278 This happens because the suspend process isn't aborted when the RTC fails to configure the wakeup IRQ. Instead, we continue suspending the system and then another suspend callback fails the suspend process and "unwinds" the previously suspended drivers by calling their resume callbacks. When we get back to resuming this RTC driver, we'll call disable_irq_wake() on an IRQ that hasn't been configured for wake. Let's just fail suspend/resume here if we can't configure the system to wake and the user has chosen to wakeup with this device. This fixes this warning and makes the code more robust in case there are systems out there that can't wakeup from suspend on this line but the user has chosen to do so. Cc: Enric Balletbo i Serra Cc: Evan Green Cc: Benson Leung Cc: Guenter Roeck Signed-off-by: Stephen Boyd Acked-By: Benson Leung Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cros-ec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c index e5444296075e..4d6bf9304ceb 100644 --- a/drivers/rtc/rtc-cros-ec.c +++ b/drivers/rtc/rtc-cros-ec.c @@ -298,7 +298,7 @@ static int cros_ec_rtc_suspend(struct device *dev) struct cros_ec_rtc *cros_ec_rtc = dev_get_drvdata(&pdev->dev); if (device_may_wakeup(dev)) - enable_irq_wake(cros_ec_rtc->cros_ec->irq); + return enable_irq_wake(cros_ec_rtc->cros_ec->irq); return 0; } @@ -309,7 +309,7 @@ static int cros_ec_rtc_resume(struct device *dev) struct cros_ec_rtc *cros_ec_rtc = dev_get_drvdata(&pdev->dev); if (device_may_wakeup(dev)) - disable_irq_wake(cros_ec_rtc->cros_ec->irq); + return disable_irq_wake(cros_ec_rtc->cros_ec->irq); return 0; } From 15d82d22498784966df8e4696174a16b02cc1052 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 20 Mar 2019 11:32:14 +0100 Subject: [PATCH 009/116] rtc: sh: Fix invalid alarm warning for non-enabled alarm When no alarm has been programmed on RSK-RZA1, an error message is printed during boot: rtc rtc0: invalid alarm value: 2019-03-14T255:255:255 sh_rtc_read_alarm_value() returns 0xff when querying a hardware alarm field that is not enabled. __rtc_read_alarm() validates the received alarm values, and fills in missing fields when needed. While 0xff is handled fine for the year, month, and day fields, and corrected as considered being out-of-range, this is not the case for the hour, minute, and second fields, where -1 is expected for missing fields. Fix this by returning -1 instead, as this value is handled fine for all fields. Signed-off-by: Geert Uytterhoeven Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-sh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index d417b203cbc5..1d3de2a3d1a4 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -374,7 +374,7 @@ static int sh_rtc_set_time(struct device *dev, struct rtc_time *tm) static inline int sh_rtc_read_alarm_value(struct sh_rtc *rtc, int reg_off) { unsigned int byte; - int value = 0xff; /* return 0xff for ignored values */ + int value = -1; /* return -1 for ignored values */ byte = readb(rtc->regbase + reg_off); if (byte & AR_ENB) { From 22e7d5148d9a0b2e5bd20913f55cce89b58cd990 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 20 Mar 2019 13:10:23 +0100 Subject: [PATCH 010/116] rtc: sd3078: fix manufacturer name The proper manufacturer name is Shenzhen whwave. Signed-off-by: Alexandre Belloni --- drivers/rtc/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index a71734c41693..f933c06bff4f 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -667,9 +667,9 @@ config RTC_DRV_S5M will be called rtc-s5m. config RTC_DRV_SD3078 - tristate "ZXW Crystal SD3078" + tristate "ZXW Shenzhen whwave SD3078" help - If you say yes here you get support for the ZXW Crystal + If you say yes here you get support for the ZXW Shenzhen whwave SD3078 RTC chips. This driver can also be built as a module. If so, the module From 8efd6365417a044db03009724ecc1a9521524913 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Wed, 13 Mar 2019 17:28:37 -0500 Subject: [PATCH 011/116] arm64: dts: stratix10: add the sysmgr-syscon property from the gmac's The gmac ethernet driver uses the "altr,sysmgr-syscon" property to configure phy settings for the gmac controller. Add the "altr,sysmgr-syscon" property to all gmac nodes. This patch fixes: [ 0.917530] socfpga-dwmac ff800000.ethernet: No sysmgr-syscon node found [ 0.924209] socfpga-dwmac ff800000.ethernet: Unable to parse OF data Cc: stable@vger.kernel.org Reported-by: Ley Foon Tan Signed-off-by: Dinh Nguyen --- arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 7c649f6b14cb..cd7c76e58b09 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -162,6 +162,7 @@ rx-fifo-depth = <16384>; snps,multicast-filter-bins = <256>; iommus = <&smmu 1>; + altr,sysmgr-syscon = <&sysmgr 0x44 0>; status = "disabled"; }; @@ -179,6 +180,7 @@ rx-fifo-depth = <16384>; snps,multicast-filter-bins = <256>; iommus = <&smmu 2>; + altr,sysmgr-syscon = <&sysmgr 0x48 0>; status = "disabled"; }; @@ -196,6 +198,7 @@ rx-fifo-depth = <16384>; snps,multicast-filter-bins = <256>; iommus = <&smmu 3>; + altr,sysmgr-syscon = <&sysmgr 0x4c 0>; status = "disabled"; }; From 3e2cf62efec52fb49daed437cc486c3cb9a0afa2 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 19 Mar 2019 21:19:52 +0100 Subject: [PATCH 012/116] ARM: OMAP1: ams-delta: Fix broken GPIO ID allocation In order to request dynamic allocationn of GPIO IDs, a negative number should be passed as a base GPIO ID via platform data. Unfortuntely, commit 771e53c4d1a1 ("ARM: OMAP1: ams-delta: Drop board specific global GPIO numbers") didn't follow that rule while switching to dynamically allocated GPIO IDs for Amstrad Delta latches, making their IDs overlapping with those already assigned to OMAP GPIO devices. Fix it. Fixes: 771e53c4d1a1 ("ARM: OMAP1: ams-delta: Drop board specific global GPIO numbers") Signed-off-by: Janusz Krzysztofik Cc: stable@vger.kernel.org Acked-by: Aaro Koskinen Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/board-ams-delta.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index be30c3c061b4..1b15d593837e 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -182,6 +182,7 @@ static struct resource latch1_resources[] = { static struct bgpio_pdata latch1_pdata = { .label = LATCH1_LABEL, + .base = -1, .ngpio = LATCH1_NGPIO, }; @@ -219,6 +220,7 @@ static struct resource latch2_resources[] = { static struct bgpio_pdata latch2_pdata = { .label = LATCH2_LABEL, + .base = -1, .ngpio = LATCH2_NGPIO, }; From 30645307e5d2c8a4caf978558c66121ac91ad17e Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 23 Feb 2019 14:20:42 +0100 Subject: [PATCH 013/116] ARM: OMAP2+: add missing of_node_put after of_device_is_available Add an of_node_put when a tested device node is not available. The semantic patch that fixes this problem is as follows (http://coccinelle.lip6.fr): // @@ identifier f; local idexpression e; expression x; @@ e = f(...); ... when != of_node_put(e) when != x = e when != e = x when any if (<+...of_device_is_available(e)...+>) { ... when != of_node_put(e) ( return e; | + of_node_put(e); return ...; ) } // Fixes: e0c827aca0730 ("drm/omap: Populate DSS children in omapdss driver") Signed-off-by: Julia Lawall Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/display.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 1444b4b4bd9f..439e143cad7b 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -250,8 +250,10 @@ static int __init omapdss_init_of(void) if (!node) return 0; - if (!of_device_is_available(node)) + if (!of_device_is_available(node)) { + of_node_put(node); return 0; + } pdev = of_find_device_by_node(node); From 4f96dc0a3e79ec257a2b082dab3ee694ff88c317 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 15 Mar 2019 12:59:09 +0200 Subject: [PATCH 014/116] ARM: dts: am335x-evm: Correct the regulators for the audio codec Correctly map the regulators used by tlv320aic3106. Both 1.8V and 3.3V for the codec is derived from VBAT via fixed regulators. Cc: # v4.14+ Signed-off-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-evm.dts | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts index dce5be5df97b..edcff79879e7 100644 --- a/arch/arm/boot/dts/am335x-evm.dts +++ b/arch/arm/boot/dts/am335x-evm.dts @@ -57,6 +57,24 @@ enable-active-high; }; + /* TPS79501 */ + v1_8d_reg: fixedregulator-v1_8d { + compatible = "regulator-fixed"; + regulator-name = "v1_8d"; + vin-supply = <&vbat>; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + + /* TPS79501 */ + v3_3d_reg: fixedregulator-v3_3d { + compatible = "regulator-fixed"; + regulator-name = "v3_3d"; + vin-supply = <&vbat>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + matrix_keypad: matrix_keypad0 { compatible = "gpio-matrix-keypad"; debounce-delay-ms = <5>; @@ -499,10 +517,10 @@ status = "okay"; /* Regulators */ - AVDD-supply = <&vaux2_reg>; - IOVDD-supply = <&vaux2_reg>; - DRVDD-supply = <&vaux2_reg>; - DVDD-supply = <&vbat>; + AVDD-supply = <&v3_3d_reg>; + IOVDD-supply = <&v3_3d_reg>; + DRVDD-supply = <&v3_3d_reg>; + DVDD-supply = <&v1_8d_reg>; }; }; From 6691370646e844be98bb6558c024269791d20bd7 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 15 Mar 2019 12:59:17 +0200 Subject: [PATCH 015/116] ARM: dts: am335x-evmsk: Correct the regulators for the audio codec Correctly map the regulators used by tlv320aic3106. Both 1.8V and 3.3V for the codec is derived from VBAT via fixed regulators. Cc: # v4.14+ Signed-off-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-evmsk.dts | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts index b128998097ce..2c2d8b5b8cf5 100644 --- a/arch/arm/boot/dts/am335x-evmsk.dts +++ b/arch/arm/boot/dts/am335x-evmsk.dts @@ -73,6 +73,24 @@ enable-active-high; }; + /* TPS79518 */ + v1_8d_reg: fixedregulator-v1_8d { + compatible = "regulator-fixed"; + regulator-name = "v1_8d"; + vin-supply = <&vbat>; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + + /* TPS78633 */ + v3_3d_reg: fixedregulator-v3_3d { + compatible = "regulator-fixed"; + regulator-name = "v3_3d"; + vin-supply = <&vbat>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + leds { pinctrl-names = "default"; pinctrl-0 = <&user_leds_s0>; @@ -501,10 +519,10 @@ status = "okay"; /* Regulators */ - AVDD-supply = <&vaux2_reg>; - IOVDD-supply = <&vaux2_reg>; - DRVDD-supply = <&vaux2_reg>; - DVDD-supply = <&vbat>; + AVDD-supply = <&v3_3d_reg>; + IOVDD-supply = <&v3_3d_reg>; + DRVDD-supply = <&v3_3d_reg>; + DVDD-supply = <&v1_8d_reg>; }; }; From d040e4e8deeaa8257d6aa260e29ad69832b5d630 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 20 Mar 2019 13:14:00 -0700 Subject: [PATCH 016/116] ARM: dts: rockchip: Fix gpu opp node names for rk3288 The device tree compiler yells like this: Warning (unit_address_vs_reg): /gpu-opp-table/opp@100000000: node has a unit name, but no reg property Let's match the cpu opp node names and use a dash. Signed-off-by: Douglas Anderson Reviewed-by: Matthias Kaehlcke Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 09868dcee34b..df0c5456c94f 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -1282,27 +1282,27 @@ gpu_opp_table: gpu-opp-table { compatible = "operating-points-v2"; - opp@100000000 { + opp-100000000 { opp-hz = /bits/ 64 <100000000>; opp-microvolt = <950000>; }; - opp@200000000 { + opp-200000000 { opp-hz = /bits/ 64 <200000000>; opp-microvolt = <950000>; }; - opp@300000000 { + opp-300000000 { opp-hz = /bits/ 64 <300000000>; opp-microvolt = <1000000>; }; - opp@400000000 { + opp-400000000 { opp-hz = /bits/ 64 <400000000>; opp-microvolt = <1100000>; }; - opp@500000000 { + opp-500000000 { opp-hz = /bits/ 64 <500000000>; opp-microvolt = <1200000>; }; - opp@600000000 { + opp-600000000 { opp-hz = /bits/ 64 <600000000>; opp-microvolt = <1250000>; }; From 282e2e078ba5338c72150477b743794bc7523917 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 20 Mar 2019 13:14:01 -0700 Subject: [PATCH 017/116] ARM: dts: rockchip: Remove #address/#size-cells from rk3288 mipi_dsi They are pointless. As dtc points out: Warning (avoid_unnecessary_addr_size): /mipi@ff960000: unnecessary #address-cells/#size-cells without "ranges" or child "reg" property Let's remove them. Signed-off-by: Douglas Anderson Reviewed-by: Matthias Kaehlcke Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index df0c5456c94f..a024d1e7e74c 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -1119,8 +1119,6 @@ clock-names = "ref", "pclk"; power-domains = <&power RK3288_PD_VIO>; rockchip,grf = <&grf>; - #address-cells = <1>; - #size-cells = <0>; status = "disabled"; ports { From 1a96665143c355b1019ed13b927266185d2a1e4f Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 20 Mar 2019 13:14:02 -0700 Subject: [PATCH 018/116] ARM: dts: rockchip: Remove #address/#size-cells from rk3288-veyron gpio-keys They are pointless. As dtc points out: Warning (avoid_unnecessary_addr_size): /gpio-keys: unnecessary #address-cells/#size-cells without "ranges" or child "reg" property Let's remove them. Signed-off-by: Douglas Anderson Reviewed-by: Matthias Kaehlcke Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288-veyron.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi index 0bc2409f6903..192dbc089ade 100644 --- a/arch/arm/boot/dts/rk3288-veyron.dtsi +++ b/arch/arm/boot/dts/rk3288-veyron.dtsi @@ -25,8 +25,6 @@ gpio_keys: gpio-keys { compatible = "gpio-keys"; - #address-cells = <1>; - #size-cells = <0>; pinctrl-names = "default"; pinctrl-0 = <&pwr_key_l>; From a6256b3a92cbaf3f5ff034ce09d5665607e2d7a4 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 4 Mar 2019 11:49:16 +0100 Subject: [PATCH 019/116] dt-bindings: reset: meson-g12a: Add missing USB2 PHY resets The G12A Documentation lacked these 2 reset lines, but they are present and used for each USB 2 PHYs. Add them to the dt-bindings for the upcoming USB support. Fixes: dbfc54534dfc ("dt-bindings: reset: meson: add g12a bindings") Signed-off-by: Neil Armstrong Reviewed-by: Martin Blumenstingl Signed-off-by: Philipp Zabel --- include/dt-bindings/reset/amlogic,meson-g12a-reset.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/dt-bindings/reset/amlogic,meson-g12a-reset.h b/include/dt-bindings/reset/amlogic,meson-g12a-reset.h index 8063e8314eef..6d487c5eba2c 100644 --- a/include/dt-bindings/reset/amlogic,meson-g12a-reset.h +++ b/include/dt-bindings/reset/amlogic,meson-g12a-reset.h @@ -51,7 +51,10 @@ #define RESET_SD_EMMC_A 44 #define RESET_SD_EMMC_B 45 #define RESET_SD_EMMC_C 46 -/* 47-60 */ +/* 47 */ +#define RESET_USB_PHY20 48 +#define RESET_USB_PHY21 49 +/* 50-60 */ #define RESET_AUDIO_CODEC 61 /* 62-63 */ /* RESET2 */ From 13e8a05b922457761ddef39cfff6231bd4ed9eef Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 18 Mar 2019 22:03:52 +0800 Subject: [PATCH 020/116] reset: meson-audio-arb: Fix missing .owner setting of reset_controller_dev Set .owner to prevent module unloading while being used. Signed-off-by: Axel Lin Fixes: d903779b58be ("reset: meson: add meson audio arb driver") Signed-off-by: Philipp Zabel --- drivers/reset/reset-meson-audio-arb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/reset/reset-meson-audio-arb.c b/drivers/reset/reset-meson-audio-arb.c index 91751617b37a..c53a2185a039 100644 --- a/drivers/reset/reset-meson-audio-arb.c +++ b/drivers/reset/reset-meson-audio-arb.c @@ -130,6 +130,7 @@ static int meson_audio_arb_probe(struct platform_device *pdev) arb->rstc.nr_resets = ARRAY_SIZE(axg_audio_arb_reset_bits); arb->rstc.ops = &meson_audio_arb_rstc_ops; arb->rstc.of_node = dev->of_node; + arb->rstc.owner = THIS_MODULE; /* * Enable general : From 7d56bedb2730dc2ea8abf0fd7240ee99ecfee3c9 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 26 Mar 2019 10:32:23 -0700 Subject: [PATCH 021/116] ARM: dts: Fix dcan clkctrl clock for am3 We must not use legacy clock defines for dts clckctrl clocks as the offsets will be wrong. Fixes: 87fc89ced3a7 ("ARM: dts: am335x: Move l4 child devices to probe them with ti-sysc") Cc: Tero Kristo Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am33xx-l4.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi index f459ec316a22..ca6d9f02a800 100644 --- a/arch/arm/boot/dts/am33xx-l4.dtsi +++ b/arch/arm/boot/dts/am33xx-l4.dtsi @@ -1762,7 +1762,7 @@ reg = <0xcc000 0x4>; reg-names = "rev"; /* Domains (P, C): per_pwrdm, l4ls_clkdm */ - clocks = <&l4ls_clkctrl AM3_D_CAN0_CLKCTRL 0>; + clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN0_CLKCTRL 0>; clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; @@ -1785,7 +1785,7 @@ reg = <0xd0000 0x4>; reg-names = "rev"; /* Domains (P, C): per_pwrdm, l4ls_clkdm */ - clocks = <&l4ls_clkctrl AM3_D_CAN1_CLKCTRL 0>; + clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN1_CLKCTRL 0>; clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; From b4e9e931e9bb2f5b302ce66640832f5a3e57e8c4 Mon Sep 17 00:00:00 2001 From: Iuliana Prodan Date: Fri, 22 Mar 2019 14:12:30 +0200 Subject: [PATCH 022/116] crypto: caam - fix copy of next buffer for xcbc and cmac MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a side effect of adding xcbc support, when the next_buffer is not copied. The issue occurs, when there is stored from previous state a blocksize buffer and received, a less than blocksize, from user. In this case, the nents for req->src is 0, and the next_buffer is not copied. An example is: { .tap = { 17, 15, 8 }, .psize = 40, .np = 3, .ksize = 16, } Fixes: 12b8567f6fa4 ("crypto: caam - add support for xcbc(aes)") Signed-off-by: Iuliana Prodan Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamhash.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index b1eadc6652b5..7205d9f4029e 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -865,19 +865,18 @@ static int ahash_update_ctx(struct ahash_request *req) if (ret) goto unmap_ctx; - if (mapped_nents) { + if (mapped_nents) sg_to_sec4_sg_last(req->src, mapped_nents, edesc->sec4_sg + sec4_sg_src_index, 0); - if (*next_buflen) - scatterwalk_map_and_copy(next_buf, req->src, - to_hash - *buflen, - *next_buflen, 0); - } else { + else sg_to_sec4_set_last(edesc->sec4_sg + sec4_sg_src_index - 1); - } + if (*next_buflen) + scatterwalk_map_and_copy(next_buf, req->src, + to_hash - *buflen, + *next_buflen, 0); desc = edesc->hw_desc; edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, From e7dfb6d04e4715be1f3eb2c60d97b753fd2e4516 Mon Sep 17 00:00:00 2001 From: David Engraf Date: Mon, 11 Mar 2019 08:57:42 +0100 Subject: [PATCH 023/116] ARM: dts: at91: Fix typo in ISC_D0 on PC9 The function argument for the ISC_D0 on PC9 was incorrect. According to the documentation it should be 'C' aka 3. Signed-off-by: David Engraf Reviewed-by: Nicolas Ferre Signed-off-by: Ludovic Desroches Fixes: 7f16cb676c00 ("ARM: at91/dt: add sama5d2 pinmux") Cc: # v4.4+ --- arch/arm/boot/dts/sama5d2-pinfunc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sama5d2-pinfunc.h b/arch/arm/boot/dts/sama5d2-pinfunc.h index 1c01a6f843d8..28a2e45752fe 100644 --- a/arch/arm/boot/dts/sama5d2-pinfunc.h +++ b/arch/arm/boot/dts/sama5d2-pinfunc.h @@ -518,7 +518,7 @@ #define PIN_PC9__GPIO PINMUX_PIN(PIN_PC9, 0, 0) #define PIN_PC9__FIQ PINMUX_PIN(PIN_PC9, 1, 3) #define PIN_PC9__GTSUCOMP PINMUX_PIN(PIN_PC9, 2, 1) -#define PIN_PC9__ISC_D0 PINMUX_PIN(PIN_PC9, 2, 1) +#define PIN_PC9__ISC_D0 PINMUX_PIN(PIN_PC9, 3, 1) #define PIN_PC9__TIOA4 PINMUX_PIN(PIN_PC9, 4, 2) #define PIN_PC10 74 #define PIN_PC10__GPIO PINMUX_PIN(PIN_PC10, 0, 0) From 872e192fab643887f143106eb56443d87e5e87c1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 27 Mar 2019 18:11:03 +0000 Subject: [PATCH 024/116] scsi: qedi: remove declaration of nvm_image from stack The nvm_image is a large struct qedi_nvm_iscsi_image object of over 24K so don't declare it on the stack just for a sizeof requirement; use sizeof on struct qedi_nvm_iscsi_image instead. Fixes: c77a2fa3ff8f ("scsi: qedi: Add the CRC size within iSCSI NVM image") Signed-off-by: Colin Ian King Acked-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index e74a62448ba4..e5db9a9954dc 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1392,10 +1392,8 @@ static void qedi_free_nvm_iscsi_cfg(struct qedi_ctx *qedi) static int qedi_alloc_nvm_iscsi_cfg(struct qedi_ctx *qedi) { - struct qedi_nvm_iscsi_image nvm_image; - qedi->iscsi_image = dma_alloc_coherent(&qedi->pdev->dev, - sizeof(nvm_image), + sizeof(struct qedi_nvm_iscsi_image), &qedi->nvm_buf_dma, GFP_KERNEL); if (!qedi->iscsi_image) { QEDI_ERR(&qedi->dbg_ctx, "Could not allocate NVM BUF.\n"); @@ -2236,14 +2234,13 @@ static void qedi_boot_release(void *data) static int qedi_get_boot_info(struct qedi_ctx *qedi) { int ret = 1; - struct qedi_nvm_iscsi_image nvm_image; QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, "Get NVM iSCSI CFG image\n"); ret = qedi_ops->common->nvm_get_image(qedi->cdev, QED_NVM_IMAGE_ISCSI_CFG, (char *)qedi->iscsi_image, - sizeof(nvm_image)); + sizeof(struct qedi_nvm_iscsi_image)); if (ret) QEDI_ERR(&qedi->dbg_ctx, "Could not get NVM image. ret = %d\n", ret); From 42b1bd33dcdef4ffd98f695e188bab82f9fa46d8 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 29 Mar 2019 17:01:18 +0300 Subject: [PATCH 025/116] block/bfq: fix ifdef for CONFIG_BFQ_GROUP_IOSCHED=y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace BFQ_GROUP_IOSCHED_ENABLED with CONFIG_BFQ_GROUP_IOSCHED. Code under these ifdefs never worked, something might be broken. Fixes: 0471559c2fbd ("block, bfq: add/remove entity weights correctly") Fixes: 73d58118498b ("block, bfq: consider also ioprio classes in symmetry detection") Reviewed-by: Holger Hoffstätte Signed-off-by: Konstantin Khlebnikov Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 2 +- block/bfq-wf2q.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 4c592496a16a..fac188dd78fa 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -674,7 +674,7 @@ static bool bfq_symmetric_scenario(struct bfq_data *bfqd) * at least two nodes. */ return !(varied_queue_weights || multiple_classes_busy -#ifdef BFQ_GROUP_IOSCHED_ENABLED +#ifdef CONFIG_BFQ_GROUP_IOSCHED || bfqd->num_groups_with_pending_reqs > 0 #endif ); diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 63311d1ff1ed..a11bef75483d 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1012,7 +1012,7 @@ static void __bfq_activate_entity(struct bfq_entity *entity, entity->on_st = true; } -#ifdef BFQ_GROUP_IOSCHED_ENABLED +#ifdef CONFIG_BFQ_GROUP_IOSCHED if (!bfq_entity_to_bfqq(entity)) { /* bfq_group */ struct bfq_group *bfqg = container_of(entity, struct bfq_group, entity); From 0d74e6a3b6421d98eeafbed26f29156d469bc0b5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 13 Mar 2019 07:56:02 -0400 Subject: [PATCH 026/116] dm integrity: change memcmp to strncmp in dm_integrity_ctr If the string opt_string is small, the function memcmp can access bytes that are beyond the terminating nul character. In theory, it could cause segfault, if opt_string were located just below some unmapped memory. Change from memcmp to strncmp so that we don't read bytes beyond the end of the string. Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index d57d997a52c8..33fac437569f 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3185,7 +3185,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) journal_watermark = val; else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1) sync_msec = val; - else if (!memcmp(opt_string, "meta_device:", strlen("meta_device:"))) { + else if (!strncmp(opt_string, "meta_device:", strlen("meta_device:"))) { if (ic->meta_dev) { dm_put_device(ti, ic->meta_dev); ic->meta_dev = NULL; @@ -3204,17 +3204,17 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } ic->sectors_per_block = val >> SECTOR_SHIFT; - } else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { + } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error, "Invalid internal_hash argument"); if (r) goto bad; - } else if (!memcmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) { + } else if (!strncmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) { r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error, "Invalid journal_crypt argument"); if (r) goto bad; - } else if (!memcmp(opt_string, "journal_mac:", strlen("journal_mac:"))) { + } else if (!strncmp(opt_string, "journal_mac:", strlen("journal_mac:"))) { r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error, "Invalid journal_mac argument"); if (r) From 5efedc9b62b5cf0ccc84ed427a07f0d2485091c4 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 22 Mar 2019 22:16:34 +0800 Subject: [PATCH 027/116] dm integrity: make dm_integrity_init and dm_integrity_exit static Fix sparse warnings: drivers/md/dm-integrity.c:3619:12: warning: symbol 'dm_integrity_init' was not declared. Should it be static? drivers/md/dm-integrity.c:3638:6: warning: symbol 'dm_integrity_exit' was not declared. Should it be static? Signed-off-by: YueHaibing Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 33fac437569f..94b865c5ce71 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3616,7 +3616,7 @@ static struct target_type integrity_target = { .io_hints = dm_integrity_io_hints, }; -int __init dm_integrity_init(void) +static int __init dm_integrity_init(void) { int r; @@ -3635,7 +3635,7 @@ int __init dm_integrity_init(void) return r; } -void dm_integrity_exit(void) +static void __exit dm_integrity_exit(void) { dm_unregister_target(&integrity_target); kmem_cache_destroy(journal_io_cache); From 93fc91675a6c84d6ab355188aea398bda2cc51f8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 21 Mar 2019 15:00:09 -0700 Subject: [PATCH 028/116] dm init: fix const confusion for dm_allowed_targets array A non const pointer to const cannot be marked initconst. Mark the array actually const. Fixes: 6bbc923dfcf5 dm: add support to directly boot to a mapped device Signed-off-by: Andi Kleen Signed-off-by: Mike Snitzer --- drivers/md/dm-init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c index b53f30f16b4d..4b76f84424c3 100644 --- a/drivers/md/dm-init.c +++ b/drivers/md/dm-init.c @@ -36,7 +36,7 @@ struct dm_device { struct list_head list; }; -const char *dm_allowed_targets[] __initconst = { +const char * const dm_allowed_targets[] __initconst = { "crypt", "delay", "linear", From 75ae193626de3238ca5fb895868ec91c94e63b1b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 21 Mar 2019 16:46:12 -0400 Subject: [PATCH 029/116] dm: revert 8f50e358153d ("dm: limit the max bio size as BIO_MAX_PAGES * PAGE_SIZE") The limit was already incorporated to dm-crypt with commit 4e870e948fba ("dm crypt: fix error with too large bios"), so we don't need to apply it globally to all targets. The quantity BIO_MAX_PAGES * PAGE_SIZE is wrong anyway because the variable ti->max_io_len it is supposed to be in the units of 512-byte sectors not in bytes. Reduction of the limit to 1048576 sectors could even cause data corruption in rare cases - suppose that we have a dm-striped device with stripe size 768MiB. The target will call dm_set_target_max_io_len with the value 1572864. The buggy code would reduce it to 1048576. Now, the dm-core will errorneously split the bios on 1048576-sector boundary insetad of 1572864-sector boundary and pass these stripe-crossing bios to the striped target. Cc: stable@vger.kernel.org # v4.16+ Fixes: 8f50e358153d ("dm: limit the max bio size as BIO_MAX_PAGES * PAGE_SIZE") Signed-off-by: Mikulas Patocka Acked-by: Ming Lei Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 68d24056d0b1..89b04169658d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1042,15 +1042,7 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) return -EINVAL; } - /* - * BIO based queue uses its own splitting. When multipage bvecs - * is switched on, size of the incoming bio may be too big to - * be handled in some targets, such as crypt. - * - * When these targets are ready for the big bio, we can remove - * the limit. - */ - ti->max_io_len = min_t(uint32_t, len, BIO_MAX_PAGES * PAGE_SIZE); + ti->max_io_len = (uint32_t) len; return 0; } From eb40c0acdc342b815d4d03ae6abb09e80c0f2988 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 26 Mar 2019 20:20:58 +0100 Subject: [PATCH 030/116] dm table: propagate BDI_CAP_STABLE_WRITES to fix sporadic checksum errors Some devices don't use blk_integrity but still want stable pages because they do their own checksumming. Examples include rbd and iSCSI when data digests are negotiated. Stacking DM (and thus LVM) on top of these devices results in sporadic checksum errors. Set BDI_CAP_STABLE_WRITES if any underlying device has it set. Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index ba9481f1bf3c..cde3b49b2a91 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1844,6 +1844,36 @@ static bool dm_table_supports_secure_erase(struct dm_table *t) return true; } +static int device_requires_stable_pages(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && bdi_cap_stable_pages_required(q->backing_dev_info); +} + +/* + * If any underlying device requires stable pages, a table must require + * them as well. Only targets that support iterate_devices are considered: + * don't want error, zero, etc to require stable pages. + */ +static bool dm_table_requires_stable_pages(struct dm_table *t) +{ + struct dm_target *ti; + unsigned i; + + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + + if (ti->type->iterate_devices && + ti->type->iterate_devices(ti, device_requires_stable_pages, NULL)) + return true; + } + + return false; +} + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1896,6 +1926,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, dm_table_verify_integrity(t); + /* + * Some devices don't use blk_integrity but still want stable pages + * because they do their own checksumming. + */ + if (dm_table_requires_stable_pages(t)) + q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; + else + q->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; + /* * Determine whether or not this queue's I/O timings contribute * to the entropy pool, Only request-based targets use this. From b9a1ff504b9492ad6beb7d5606e0e3365d4d8499 Mon Sep 17 00:00:00 2001 From: Shenghui Wang Date: Mon, 1 Apr 2019 21:40:36 +0800 Subject: [PATCH 031/116] block: use blk_free_flush_queue() to free hctx->fq in blk_mq_init_hctx kfree() can leak the hctx->fq->flush_rq field. Reviewed-by: Ming Lei Signed-off-by: Shenghui Wang Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 3ff3d7b49969..f3b0d33bdf88 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2332,7 +2332,7 @@ static int blk_mq_init_hctx(struct request_queue *q, return 0; free_fq: - kfree(hctx->fq); + blk_free_flush_queue(hctx->fq); exit_hctx: if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); From ff3b74b8e1675c802e09157a56c97ca38a659b9d Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Tue, 26 Mar 2019 21:19:25 +0800 Subject: [PATCH 032/116] blk-mq: add trace block plug and unplug for multiple queues For now, we just trace plug for single queue device or drivers provide .commit_rqs, and have not trace plug for multiple queues device. But, unplug events will be recorded when call blk_mq_flush_plug_list(). Then, trace events will be asymmetrical, just have unplug and without plug. This patch add trace plug and unplug for multiple queues device in blk_mq_make_request(). After that, we can accurately trace plug and unplug for multiple queues. Reviewed-by: Christoph Hellwig Signed-off-by: Yufen Yu Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index f3b0d33bdf88..22074a1e37cd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2003,11 +2003,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) plug->rq_count--; } blk_add_rq_to_plug(plug, rq); + trace_block_plug(q); blk_mq_put_ctx(data.ctx); if (same_queue_rq) { data.hctx = same_queue_rq->mq_hctx; + trace_block_unplug(q, 1, true); blk_mq_try_issue_directly(data.hctx, same_queue_rq, &cookie, false, true); } From 882c5e552ffd06856de42261460f46e18319d259 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 2 Apr 2019 12:26:36 +0200 Subject: [PATCH 033/116] rtc: da9063: set uie_unsupported when relevant The DA9063AD doesn't support alarms on any seconds and its granularity is the minute. Set uie_unsupported in that case. Reported-by: Wolfram Sang Reported-by: Geert Uytterhoeven Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Acked-by: Steve Twiss Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-da9063.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c index b4e054c64bad..69b54e5556c0 100644 --- a/drivers/rtc/rtc-da9063.c +++ b/drivers/rtc/rtc-da9063.c @@ -480,6 +480,13 @@ static int da9063_rtc_probe(struct platform_device *pdev) da9063_data_to_tm(data, &rtc->alarm_time, rtc); rtc->rtc_sync = false; + /* + * TODO: some models have alarms on a minute boundary but still support + * real hardware interrupts. Add this once the core supports it. + */ + if (config->rtc_data_start != RTC_SEC) + rtc->rtc_dev->uie_unsupported = 1; + irq_alarm = platform_get_irq_byname(pdev, "ALARM"); ret = devm_request_threaded_irq(&pdev->dev, irq_alarm, NULL, da9063_alarm_event, From 25adf50fe25d506d3fc12070a5ff4be858a1ac1b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 3 Apr 2019 09:52:40 -0600 Subject: [PATCH 034/116] io_uring: fix double free in case of fileset regitration failure Will Deacon reported the following KASAN complaint: [ 149.890370] ================================================================== [ 149.891266] BUG: KASAN: double-free or invalid-free in io_sqe_files_unregister+0xa8/0x140 [ 149.892218] [ 149.892411] CPU: 113 PID: 3974 Comm: io_uring_regist Tainted: G B 5.1.0-rc3-00012-g40b114779944 #3 [ 149.893623] Hardware name: linux,dummy-virt (DT) [ 149.894169] Call trace: [ 149.894539] dump_backtrace+0x0/0x228 [ 149.895172] show_stack+0x14/0x20 [ 149.895747] dump_stack+0xe8/0x124 [ 149.896335] print_address_description+0x60/0x258 [ 149.897148] kasan_report_invalid_free+0x78/0xb8 [ 149.897936] __kasan_slab_free+0x1fc/0x228 [ 149.898641] kasan_slab_free+0x10/0x18 [ 149.899283] kfree+0x70/0x1f8 [ 149.899798] io_sqe_files_unregister+0xa8/0x140 [ 149.900574] io_ring_ctx_wait_and_kill+0x190/0x3c0 [ 149.901402] io_uring_release+0x2c/0x48 [ 149.902068] __fput+0x18c/0x510 [ 149.902612] ____fput+0xc/0x18 [ 149.903146] task_work_run+0xf0/0x148 [ 149.903778] do_notify_resume+0x554/0x748 [ 149.904467] work_pending+0x8/0x10 [ 149.905060] [ 149.905331] Allocated by task 3974: [ 149.905934] __kasan_kmalloc.isra.0.part.1+0x48/0xf8 [ 149.906786] __kasan_kmalloc.isra.0+0xb8/0xd8 [ 149.907531] kasan_kmalloc+0xc/0x18 [ 149.908134] __kmalloc+0x168/0x248 [ 149.908724] __arm64_sys_io_uring_register+0x2b8/0x15a8 [ 149.909622] el0_svc_common+0x100/0x258 [ 149.910281] el0_svc_handler+0x48/0xc0 [ 149.910928] el0_svc+0x8/0xc [ 149.911425] [ 149.911696] Freed by task 3974: [ 149.912242] __kasan_slab_free+0x114/0x228 [ 149.912955] kasan_slab_free+0x10/0x18 [ 149.913602] kfree+0x70/0x1f8 [ 149.914118] __arm64_sys_io_uring_register+0xc2c/0x15a8 [ 149.915009] el0_svc_common+0x100/0x258 [ 149.915670] el0_svc_handler+0x48/0xc0 [ 149.916317] el0_svc+0x8/0xc [ 149.916817] [ 149.917101] The buggy address belongs to the object at ffff8004ce07ed00 [ 149.917101] which belongs to the cache kmalloc-128 of size 128 [ 149.919197] The buggy address is located 0 bytes inside of [ 149.919197] 128-byte region [ffff8004ce07ed00, ffff8004ce07ed80) [ 149.921142] The buggy address belongs to the page: [ 149.921953] page:ffff7e0013381f00 count:1 mapcount:0 mapping:ffff800503417c00 index:0x0 compound_mapcount: 0 [ 149.923595] flags: 0x1ffff00000010200(slab|head) [ 149.924388] raw: 1ffff00000010200 dead000000000100 dead000000000200 ffff800503417c00 [ 149.925706] raw: 0000000000000000 0000000080400040 00000001ffffffff 0000000000000000 [ 149.927011] page dumped because: kasan: bad access detected [ 149.927956] [ 149.928224] Memory state around the buggy address: [ 149.929054] ffff8004ce07ec00: 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc [ 149.930274] ffff8004ce07ec80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 149.931494] >ffff8004ce07ed00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 149.932712] ^ [ 149.933281] ffff8004ce07ed80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 149.934508] ffff8004ce07ee00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 149.935725] ================================================================== which is due to a failure in registrering a fileset. This frees the ctx->user_files pointer, but doesn't clear it. When the io_uring instance is later freed through the normal channels, we free this pointer again. At this point it's invalid. Ensure we clear the pointer when we free it for the error case. Reported-by: Will Deacon Tested-by: Will Deacon Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index bbdbd56cf2ac..07d6ef195d05 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2215,6 +2215,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, fput(ctx->user_files[i]); kfree(ctx->user_files); + ctx->user_files = NULL; ctx->nr_user_files = 0; return ret; } From 58ccd2d31e502c37e108b285bf3d343eb00c235b Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 3 Apr 2019 11:37:07 +0800 Subject: [PATCH 035/116] paride/pf: Fix potential NULL pointer dereference Syzkaller report this: pf: pf version 1.04, major 47, cluster 64, nice 0 pf: No ATAPI disk detected kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI CPU: 0 PID: 9887 Comm: syz-executor.0 Tainted: G C 5.1.0-rc3+ #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:pf_init+0x7af/0x1000 [pf] Code: 46 77 d2 48 89 d8 48 c1 e8 03 80 3c 28 00 74 08 48 89 df e8 03 25 a6 d2 4c 8b 23 49 8d bc 24 80 05 00 00 48 89 f8 48 c1 e8 03 <80> 3c 28 00 74 05 e8 e6 24 a6 d2 49 8b bc 24 80 05 00 00 e8 79 34 RSP: 0018:ffff8881abcbf998 EFLAGS: 00010202 RAX: 00000000000000b0 RBX: ffffffffc1e4a8a8 RCX: ffffffffaec50788 RDX: 0000000000039b10 RSI: ffffc9000153c000 RDI: 0000000000000580 RBP: dffffc0000000000 R08: ffffed103ee44e59 R09: ffffed103ee44e59 R10: 0000000000000001 R11: ffffed103ee44e58 R12: 0000000000000000 R13: ffffffffc1e4b028 R14: 0000000000000000 R15: 0000000000000020 FS: 00007f1b78a91700(0000) GS:ffff8881f7200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f6d72b207f8 CR3: 00000001d5790004 CR4: 00000000007606f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? 0xffffffffc1e50000 do_one_initcall+0xbc/0x47d init/main.c:901 do_init_module+0x1b5/0x547 kernel/module.c:3456 load_module+0x6405/0x8c10 kernel/module.c:3804 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f1b78a90c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003 RBP: 00007f1b78a90c70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f1b78a916bc R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004 Modules linked in: pf(+) paride gpio_tps65218 tps65218 i2c_cht_wc ati_remote dc395x act_meta_skbtcindex act_ife ife ecdh_generic rc_xbox_dvd sky81452_regulator v4l2_fwnode leds_blinkm snd_usb_hiface comedi(C) aes_ti slhc cfi_cmdset_0020 mtd cfi_util sx8654 mdio_gpio of_mdio fixed_phy mdio_bitbang libphy alcor_pci matrix_keymap hid_uclogic usbhid scsi_transport_fc videobuf2_v4l2 videobuf2_dma_sg snd_soc_pcm179x_spi snd_soc_pcm179x_codec i2c_demux_pinctrl mdev snd_indigodj isl6405 mii enc28j60 cmac adt7316_i2c(C) adt7316(C) fmc_trivial fmc nf_reject_ipv4 authenc rc_dtt200u rtc_ds1672 dvb_usb_dibusb_mc dvb_usb_dibusb_mc_common dib3000mc dibx000_common dvb_usb_dibusb_common dvb_usb dvb_core videobuf2_common videobuf2_vmalloc videobuf2_memops regulator_haptic adf7242 mac802154 ieee802154 s5h1409 da9034_ts snd_intel8x0m wmi cx24120 usbcore sdhci_cadence sdhci_pltfm sdhci mmc_core joydev i2c_algo_bit scsi_transport_iscsi iscsi_boot_sysfs ves1820 lockd grace nfs_acl auth_rpcgss sunrp c ip_vs snd_soc_adau7002 snd_cs4281 snd_rawmidi gameport snd_opl3_lib snd_seq_device snd_hwdep snd_ac97_codec ad7418 hid_primax hid snd_soc_cs4265 snd_soc_core snd_pcm_dmaengine snd_pcm snd_timer ac97_bus snd_compress snd soundcore ti_adc108s102 eeprom_93cx6 i2c_algo_pca mlxreg_hotplug st_pressure st_sensors industrialio_triggered_buffer kfifo_buf industrialio v4l2_common videodev media snd_soc_adau_utils rc_pinnacle_grey rc_core pps_gpio leds_lm3692x nandcore ledtrig_pattern iptable_security iptable_raw iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun mousedev ppdev tpm kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel ide_pci_generic aes_x86_64 piix crypto_simd input_leds psmouse cryp td glue_helper ide_core intel_agp serio_raw intel_gtt agpgart ata_generic i2c_piix4 pata_acpi parport_pc parport rtc_cmos floppy sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: paride] Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace 7a818cf5f210d79e ]--- If alloc_disk fails in pf_init_units, pf->disk will be NULL, however in pf_detect and pf_exit, it's not check this before free.It may result a NULL pointer dereference. Also when register_blkdev failed, blk_cleanup_queue() and blk_mq_free_tag_set() should be called to free resources. Reported-by: Hulk Robot Fixes: 6ce59025f118 ("paride/pf: cleanup queues when detection fails") Signed-off-by: YueHaibing Signed-off-by: Jens Axboe --- drivers/block/paride/pf.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index 103b617cdc31..35e6e271b219 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -762,6 +762,8 @@ static int pf_detect(void) printk("%s: No ATAPI disk detected\n", name); for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { + if (!pf->disk) + continue; blk_cleanup_queue(pf->disk->queue); pf->disk->queue = NULL; blk_mq_free_tag_set(&pf->tag_set); @@ -1029,8 +1031,13 @@ static int __init pf_init(void) pf_busy = 0; if (register_blkdev(major, name)) { - for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) + for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { + if (!pf->disk) + continue; + blk_cleanup_queue(pf->disk->queue); + blk_mq_free_tag_set(&pf->tag_set); put_disk(pf->disk); + } return -EBUSY; } @@ -1051,6 +1058,9 @@ static void __exit pf_exit(void) int unit; unregister_blkdev(major, name); for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { + if (!pf->disk) + continue; + if (pf->present) del_gendisk(pf->disk); From 426b046b748d1f47e096e05bdcc6fb4172791307 Mon Sep 17 00:00:00 2001 From: Louis Taylor Date: Wed, 3 Apr 2019 12:36:20 -0600 Subject: [PATCH 036/116] vfio/pci: use correct format characters When compiling with -Wformat, clang emits the following warnings: drivers/vfio/pci/vfio_pci.c:1601:5: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~ drivers/vfio/pci/vfio_pci.c:1601:13: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~ drivers/vfio/pci/vfio_pci.c:1601:21: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~~~~ drivers/vfio/pci/vfio_pci.c:1601:32: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~~~~ drivers/vfio/pci/vfio_pci.c:1605:5: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~ drivers/vfio/pci/vfio_pci.c:1605:13: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~ drivers/vfio/pci/vfio_pci.c:1605:21: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~~~~ drivers/vfio/pci/vfio_pci.c:1605:32: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~~~~ The types of these arguments are unconditionally defined, so this patch updates the format character to the correct ones for unsigned ints. Link: https://github.com/ClangBuiltLinux/linux/issues/378 Signed-off-by: Louis Taylor Reviewed-by: Nick Desaulniers Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index a25659b5a5d1..3fa20e95a6bb 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1661,11 +1661,11 @@ static void __init vfio_pci_fill_ids(void) rc = pci_add_dynid(&vfio_pci_driver, vendor, device, subvendor, subdevice, class, class_mask, 0); if (rc) - pr_warn("failed to add dynamic id [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x (%d)\n", + pr_warn("failed to add dynamic id [%04x:%04x[%04x:%04x]] class %#08x/%08x (%d)\n", vendor, device, subvendor, subdevice, class, class_mask, rc); else - pr_info("add [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x\n", + pr_info("add [%04x:%04x[%04x:%04x]] class %#08x/%08x\n", vendor, device, subvendor, subdevice, class, class_mask); } From e39dd513d5f2ae2041c593d42fd0d8b24e7e950b Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Wed, 3 Apr 2019 12:36:21 -0600 Subject: [PATCH 037/116] vfio/spapr_tce: Make symbol 'tce_iommu_driver_ops' static Fixes the following sparse warning: drivers/vfio/vfio_iommu_spapr_tce.c:1401:36: warning: symbol 'tce_iommu_driver_ops' was not declared. Should it be static? Fixes: 5ffd229c0273 ("powerpc/vfio: Implement IOMMU driver for VFIO") Signed-off-by: Wang Hai Reviewed-by: Alexey Kardashevskiy Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_spapr_tce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 8dbb270998f4..6b64e45a5269 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -1398,7 +1398,7 @@ unlock_exit: mutex_unlock(&container->lock); } -const struct vfio_iommu_driver_ops tce_iommu_driver_ops = { +static const struct vfio_iommu_driver_ops tce_iommu_driver_ops = { .name = "iommu-vfio-powerpc", .owner = THIS_MODULE, .open = tce_iommu_open, From 492855939bdb59c6f947b0b5b44af9ad82b7e38c Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 3 Apr 2019 12:36:21 -0600 Subject: [PATCH 038/116] vfio/type1: Limit DMA mappings per container Memory backed DMA mappings are accounted against a user's locked memory limit, including multiple mappings of the same memory. This accounting bounds the number of such mappings that a user can create. However, DMA mappings that are not backed by memory, such as DMA mappings of device MMIO via mmaps, do not make use of page pinning and therefore do not count against the user's locked memory limit. These mappings still consume memory, but the memory is not well associated to the process for the purpose of oom killing a task. To add bounding on this use case, we introduce a limit to the total number of concurrent DMA mappings that a user is allowed to create. This limit is exposed as a tunable module option where the default value of 64K is expected to be well in excess of any reasonable use case (a large virtual machine configuration would typically only make use of tens of concurrent mappings). This fixes CVE-2019-3882. Reviewed-by: Eric Auger Tested-by: Eric Auger Reviewed-by: Peter Xu Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 73652e21efec..d0f731c9920a 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -58,12 +58,18 @@ module_param_named(disable_hugepages, MODULE_PARM_DESC(disable_hugepages, "Disable VFIO IOMMU support for IOMMU hugepages."); +static unsigned int dma_entry_limit __read_mostly = U16_MAX; +module_param_named(dma_entry_limit, dma_entry_limit, uint, 0644); +MODULE_PARM_DESC(dma_entry_limit, + "Maximum number of user DMA mappings per container (65535)."); + struct vfio_iommu { struct list_head domain_list; struct vfio_domain *external_domain; /* domain for external user */ struct mutex lock; struct rb_root dma_list; struct blocking_notifier_head notifier; + unsigned int dma_avail; bool v2; bool nesting; }; @@ -836,6 +842,7 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) vfio_unlink_dma(iommu, dma); put_task_struct(dma->task); kfree(dma); + iommu->dma_avail++; } static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu) @@ -1081,12 +1088,18 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, goto out_unlock; } + if (!iommu->dma_avail) { + ret = -ENOSPC; + goto out_unlock; + } + dma = kzalloc(sizeof(*dma), GFP_KERNEL); if (!dma) { ret = -ENOMEM; goto out_unlock; } + iommu->dma_avail--; dma->iova = iova; dma->vaddr = vaddr; dma->prot = prot; @@ -1583,6 +1596,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) INIT_LIST_HEAD(&iommu->domain_list); iommu->dma_list = RB_ROOT; + iommu->dma_avail = dma_entry_limit; mutex_init(&iommu->lock); BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); From 1cb1d2c64e812928fe0a40b8f7e74523d0283dbe Mon Sep 17 00:00:00 2001 From: Xose Vazquez Perez Date: Sat, 30 Mar 2019 15:43:31 +0100 Subject: [PATCH 039/116] scsi: core: add new RDAC LENOVO/DE_Series device Blacklist "Universal Xport" LUN. It's used for in-band storage array management. Also add model to the rdac dh family. Cc: Martin Wilck Cc: Hannes Reinecke Cc: NetApp RDAC team Cc: Christophe Varoqui Cc: James E.J. Bottomley Cc: Martin K. Petersen Cc: SCSI ML Cc: DM ML Signed-off-by: Xose Vazquez Perez Reviewed-by: Martin Wilck Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_devinfo.c | 1 + drivers/scsi/scsi_dh.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index c4cbfd07b916..a08ff3bd6310 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -238,6 +238,7 @@ static struct { {"NETAPP", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"LSI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"ENGENIO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, + {"LENOVO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36}, {"SONY", "CD-ROM CDU-8001", NULL, BLIST_BORKEN}, {"SONY", "TSL", NULL, BLIST_FORCELUN}, /* DDS3 & DDS4 autoloaders */ diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c index 5a58cbf3a75d..c14006ac98f9 100644 --- a/drivers/scsi/scsi_dh.c +++ b/drivers/scsi/scsi_dh.c @@ -75,6 +75,7 @@ static const struct scsi_dh_blist scsi_dh_blist[] = { {"NETAPP", "INF-01-00", "rdac", }, {"LSI", "INF-01-00", "rdac", }, {"ENGENIO", "INF-01-00", "rdac", }, + {"LENOVO", "DE_Series", "rdac", }, {NULL, NULL, NULL }, }; From 382e06d11e075a40b4094b6ef809f8d4bcc7ab2a Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 1 Apr 2019 16:10:52 +0000 Subject: [PATCH 040/116] scsi: storvsc: Fix calculation of sub-channel count When the number of sub-channels offered by Hyper-V is >= the number of CPUs in the VM, calculate the correct number of sub-channels. The current code produces one too many. This scenario arises only when the number of CPUs is artificially restricted (for example, with maxcpus= on the kernel boot line), because Hyper-V normally offers a sub-channel count < number of CPUs. While the current code doesn't break, the extra sub-channel is unbalanced across the CPUs (for example, a total of 5 channels on a VM with 4 CPUs). Signed-off-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov Reviewed-by: Long Li Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 84380bae20f1..e186743033f4 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -668,13 +668,22 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns) { struct device *dev = &device->device; struct storvsc_device *stor_device; - int num_cpus = num_online_cpus(); int num_sc; struct storvsc_cmd_request *request; struct vstor_packet *vstor_packet; int ret, t; - num_sc = ((max_chns > num_cpus) ? num_cpus : max_chns); + /* + * If the number of CPUs is artificially restricted, such as + * with maxcpus=1 on the kernel boot line, Hyper-V could offer + * sub-channels >= the number of CPUs. These sub-channels + * should not be created. The primary channel is already created + * and assigned to one CPU, so check against # CPUs - 1. + */ + num_sc = min((int)(num_online_cpus() - 1), max_chns); + if (!num_sc) + return; + stor_device = get_out_stor_device(device); if (!stor_device) return; From ed2e63aaec4fbf248275d2d83c4cfd65e069594f Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 1 Apr 2019 21:42:06 +0000 Subject: [PATCH 041/116] scsi: storvsc: Reduce default ring buffer size to 128 Kbytes Reduce the default VMbus channel ring buffer size for storvsc SCSI devices from 1 Mbyte to 128 Kbytes. Measurements show that ring buffer sizes above 128 Kbytes do not increase performance even at very high IOPS rates, so don't waste the memory. Also remove the dependence on PAGE_SIZE, since the ring buffer size should not change on architectures where PAGE_SIZE is not 4 Kbytes. Signed-off-by: Michael Kelley Reviewed-by: Haiyang Zhang Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index e186743033f4..8472de1007ff 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -385,7 +385,7 @@ enum storvsc_request_type { * This is the end of Protocol specific defines. */ -static int storvsc_ringbuffer_size = (256 * PAGE_SIZE); +static int storvsc_ringbuffer_size = (128 * 1024); static u32 max_outstanding_req_per_channel; static int storvsc_vcpus_per_sub_channel = 4; From 4eb01535886644d79a58fe652e0782a194bc3402 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 3 Apr 2019 11:10:34 -0700 Subject: [PATCH 042/116] scsi: lpfc: Fix missing wakeups on abort threads Abort thread wakeups, on some wqe types, are not happening. The thread wakeup logic is dependent upon the LPFC_DRIVER_ABORTED flag. However, on these wqes, the completion handler running prior to the io completion routine ends up clearing the flag. Rework the wakeup logic to look at a non-null waitq element which must be set if the abort thread is waiting. This is reverting the change in the indicated patch. Fixes: c2017260eea2d ("scsi: lpfc: Rework locking on SCSI io completion") Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_scsi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index c98f264f1d83..a497b2c0cb79 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -3878,10 +3878,9 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, * wake up the thread. */ spin_lock(&lpfc_cmd->buf_lock); - if (unlikely(lpfc_cmd->cur_iocbq.iocb_flag & LPFC_DRIVER_ABORTED)) { - lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED; - if (lpfc_cmd->waitq) - wake_up(lpfc_cmd->waitq); + lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED; + if (lpfc_cmd->waitq) { + wake_up(lpfc_cmd->waitq); lpfc_cmd->waitq = NULL; } spin_unlock(&lpfc_cmd->buf_lock); From 2201f31f2c6d6030cbd2f7085455e2172725b1c5 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 3 Apr 2019 20:19:25 -0700 Subject: [PATCH 043/116] xtensa: use actual syscall number in do_syscall_trace_leave Syscall may alter pt_regs structure passed to it, resulting in a mismatch between syscall entry end syscall exit entries in the ftrace. Temporary restore syscall field of the pt_regs for the duration of do_syscall_trace_leave. Signed-off-by: Max Filippov --- arch/xtensa/kernel/entry.S | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index e50f5124dc6f..e54af8b7e0f8 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -1860,6 +1860,8 @@ ENTRY(system_call) l32i a7, a2, PT_SYSCALL 1: + s32i a7, a1, 4 + /* syscall = sys_call_table[syscall_nr] */ movi a4, sys_call_table @@ -1893,8 +1895,12 @@ ENTRY(system_call) retw 1: + l32i a4, a1, 4 + l32i a3, a2, PT_SYSCALL + s32i a4, a2, PT_SYSCALL mov a6, a2 call4 do_syscall_trace_leave + s32i a3, a2, PT_SYSCALL retw ENDPROC(system_call) From 2663147dc7465cb29040a05cc4286fdd839978b5 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 3 Apr 2019 20:22:42 -0700 Subject: [PATCH 044/116] xtensa: fix initialization of pt_regs::syscall in start_thread New pt_regs should indicate that there's no syscall, not that there's syscall #0. While at it wrap macro body in do/while and parenthesize macro arguments. Signed-off-by: Max Filippov --- arch/xtensa/include/asm/processor.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index f7dd895b2353..0c14018d1c26 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -187,15 +187,18 @@ struct thread_struct { /* Clearing a0 terminates the backtrace. */ #define start_thread(regs, new_pc, new_sp) \ - memset(regs, 0, sizeof(*regs)); \ - regs->pc = new_pc; \ - regs->ps = USER_PS_VALUE; \ - regs->areg[1] = new_sp; \ - regs->areg[0] = 0; \ - regs->wmask = 1; \ - regs->depc = 0; \ - regs->windowbase = 0; \ - regs->windowstart = 1; + do { \ + memset((regs), 0, sizeof(*(regs))); \ + (regs)->pc = (new_pc); \ + (regs)->ps = USER_PS_VALUE; \ + (regs)->areg[1] = (new_sp); \ + (regs)->areg[0] = 0; \ + (regs)->wmask = 1; \ + (regs)->depc = 0; \ + (regs)->windowbase = 0; \ + (regs)->windowstart = 1; \ + (regs)->syscall = NO_SYSCALL; \ + } while (0) /* Forward declaration */ struct task_struct; From ba5e60c9b75dec92d4c695b928f69300b17d7686 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Tue, 2 Apr 2019 22:12:38 +0800 Subject: [PATCH 045/116] arm/mach-at91/pm : fix possible object reference leak of_find_device_by_node() takes a reference to the struct device when it finds a match via get_device. When returning error we should call put_device. Reviewed-by: Mukesh Ojha Signed-off-by: Peng Hao Signed-off-by: Ludovic Desroches --- arch/arm/mach-at91/pm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 51e808adb00c..2a757dcaa1a5 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -591,13 +591,13 @@ static int __init at91_pm_backup_init(void) np = of_find_compatible_node(NULL, NULL, "atmel,sama5d2-securam"); if (!np) - goto securam_fail; + goto securam_fail_no_ref_dev; pdev = of_find_device_by_node(np); of_node_put(np); if (!pdev) { pr_warn("%s: failed to find securam device!\n", __func__); - goto securam_fail; + goto securam_fail_no_ref_dev; } sram_pool = gen_pool_get(&pdev->dev, NULL); @@ -620,6 +620,8 @@ static int __init at91_pm_backup_init(void) return 0; securam_fail: + put_device(&pdev->dev); +securam_fail_no_ref_dev: iounmap(pm_data.sfrbu); pm_data.sfrbu = NULL; return ret; From 631b7abacd02b88f4b0795c08b54ad4fc3e7c7c0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 7 Nov 2016 16:26:35 -0500 Subject: [PATCH 046/116] ptrace: Remove maxargs from task_current_syscall() task_current_syscall() has a single user that passes in 6 for maxargs, which is the maximum arguments that can be used to get system calls from syscall_get_arguments(). Instead of passing in a number of arguments to grab, just get 6 arguments. The args argument even specifies that it's an array of 6 items. This will also allow changing syscall_get_arguments() to not get a variable number of arguments, but always grab 6. Linus also suggested not passing in a bunch of arguments to task_current_syscall() but to instead pass in a pointer to a structure, and just fill the structure. struct seccomp_data has almost all the parameters that is needed except for the stack pointer (sp). As seccomp_data is part of uapi, and I'm afraid to change it, a new structure was created "syscall_info", which includes seccomp_data and adds the "sp" field. Link: http://lkml.kernel.org/r/20161107213233.466776454@goodmis.org Cc: Andy Lutomirski Cc: Alexey Dobriyan Cc: Oleg Nesterov Cc: Kees Cook Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Thomas Gleixner Signed-off-by: Steven Rostedt (VMware) --- fs/proc/base.c | 17 +++++++------ include/linux/ptrace.h | 11 +++++--- lib/syscall.c | 57 ++++++++++++++++++------------------------ 3 files changed, 42 insertions(+), 43 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index ddef482f1334..6a803a0b75df 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -616,24 +616,25 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns, static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { - long nr; - unsigned long args[6], sp, pc; + struct syscall_info info; + u64 *args = &info.data.args[0]; int res; res = lock_trace(task); if (res) return res; - if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) + if (task_current_syscall(task, &info)) seq_puts(m, "running\n"); - else if (nr < 0) - seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc); + else if (info.data.nr < 0) + seq_printf(m, "%d 0x%llx 0x%llx\n", + info.data.nr, info.sp, info.data.instruction_pointer); else seq_printf(m, - "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", - nr, + "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n", + info.data.nr, args[0], args[1], args[2], args[3], args[4], args[5], - sp, pc); + info.sp, info.data.instruction_pointer); unlock_trace(task); return 0; diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index edb9b040c94c..d5084ebd9f03 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -9,6 +9,13 @@ #include /* For BUG_ON. */ #include /* For task_active_pid_ns. */ #include +#include + +/* Add sp to seccomp_data, as seccomp is user API, we don't want to modify it */ +struct syscall_info { + __u64 sp; + struct seccomp_data data; +}; extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags); @@ -407,9 +414,7 @@ static inline void user_single_step_report(struct pt_regs *regs) #define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) #endif -extern int task_current_syscall(struct task_struct *target, long *callno, - unsigned long args[6], unsigned int maxargs, - unsigned long *sp, unsigned long *pc); +extern int task_current_syscall(struct task_struct *target, struct syscall_info *info); extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact); #endif diff --git a/lib/syscall.c b/lib/syscall.c index 1a7077f20eae..e8467e17b9a2 100644 --- a/lib/syscall.c +++ b/lib/syscall.c @@ -5,16 +5,14 @@ #include #include -static int collect_syscall(struct task_struct *target, long *callno, - unsigned long args[6], unsigned int maxargs, - unsigned long *sp, unsigned long *pc) +static int collect_syscall(struct task_struct *target, struct syscall_info *info) { struct pt_regs *regs; if (!try_get_task_stack(target)) { /* Task has no stack, so the task isn't in a syscall. */ - *sp = *pc = 0; - *callno = -1; + memset(info, 0, sizeof(*info)); + info->data.nr = -1; return 0; } @@ -24,12 +22,13 @@ static int collect_syscall(struct task_struct *target, long *callno, return -EAGAIN; } - *sp = user_stack_pointer(regs); - *pc = instruction_pointer(regs); + info->sp = user_stack_pointer(regs); + info->data.instruction_pointer = instruction_pointer(regs); - *callno = syscall_get_nr(target, regs); - if (*callno != -1L && maxargs > 0) - syscall_get_arguments(target, regs, 0, maxargs, args); + info->data.nr = syscall_get_nr(target, regs); + if (info->data.nr != -1L) + syscall_get_arguments(target, regs, 0, 6, + (unsigned long *)&info->data.args[0]); put_task_stack(target); return 0; @@ -38,41 +37,35 @@ static int collect_syscall(struct task_struct *target, long *callno, /** * task_current_syscall - Discover what a blocked task is doing. * @target: thread to examine - * @callno: filled with system call number or -1 - * @args: filled with @maxargs system call arguments - * @maxargs: number of elements in @args to fill - * @sp: filled with user stack pointer - * @pc: filled with user PC + * @info: structure with the following fields: + * .sp - filled with user stack pointer + * .data.nr - filled with system call number or -1 + * .data.args - filled with @maxargs system call arguments + * .data.instruction_pointer - filled with user PC * - * If @target is blocked in a system call, returns zero with *@callno - * set to the the call's number and @args filled in with its arguments. - * Registers not used for system call arguments may not be available and - * it is not kosher to use &struct user_regset calls while the system + * If @target is blocked in a system call, returns zero with @info.data.nr + * set to the the call's number and @info.data.args filled in with its + * arguments. Registers not used for system call arguments may not be available + * and it is not kosher to use &struct user_regset calls while the system * call is still in progress. Note we may get this result if @target * has finished its system call but not yet returned to user mode, such * as when it's stopped for signal handling or syscall exit tracing. * * If @target is blocked in the kernel during a fault or exception, - * returns zero with *@callno set to -1 and does not fill in @args. - * If so, it's now safe to examine @target using &struct user_regset - * get() calls as long as we're sure @target won't return to user mode. + * returns zero with *@info.data.nr set to -1 and does not fill in + * @info.data.args. If so, it's now safe to examine @target using + * &struct user_regset get() calls as long as we're sure @target won't return + * to user mode. * * Returns -%EAGAIN if @target does not remain blocked. - * - * Returns -%EINVAL if @maxargs is too large (maximum is six). */ -int task_current_syscall(struct task_struct *target, long *callno, - unsigned long args[6], unsigned int maxargs, - unsigned long *sp, unsigned long *pc) +int task_current_syscall(struct task_struct *target, struct syscall_info *info) { long state; unsigned long ncsw; - if (unlikely(maxargs > 6)) - return -EINVAL; - if (target == current) - return collect_syscall(target, callno, args, maxargs, sp, pc); + return collect_syscall(target, info); state = target->state; if (unlikely(!state)) @@ -80,7 +73,7 @@ int task_current_syscall(struct task_struct *target, long *callno, ncsw = wait_task_inactive(target, state); if (unlikely(!ncsw) || - unlikely(collect_syscall(target, callno, args, maxargs, sp, pc)) || + unlikely(collect_syscall(target, info)) || unlikely(wait_task_inactive(target, state) != ncsw)) return -EAGAIN; From d08e411397cb6fcb3d3fb075c27a41975c99e88f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 7 Nov 2016 16:26:36 -0500 Subject: [PATCH 047/116] tracing/syscalls: Pass in hardcoded 6 into syscall_get_arguments() The only users that calls syscall_get_arguments() with a variable and not a hard coded '6' is ftrace_syscall_enter(). syscall_get_arguments() can be optimized by removing a variable input, and always grabbing 6 arguments regardless of what the system call actually uses. Change ftrace_syscall_enter() to pass the 6 args into a local stack array and copy the necessary arguments into the trace event as needed. This is needed to remove two parameters from syscall_get_arguments(). Link: http://lkml.kernel.org/r/20161107213233.627583542@goodmis.org Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_syscalls.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index f93a56d2db27..e9f5bbbad6d9 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -314,6 +314,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) struct ring_buffer_event *event; struct ring_buffer *buffer; unsigned long irq_flags; + unsigned long args[6]; int pc; int syscall_nr; int size; @@ -347,7 +348,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) entry = ring_buffer_event_data(event); entry->nr = syscall_nr; - syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); + syscall_get_arguments(current, regs, 0, 6, args); + memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args); event_trigger_unlock_commit(trace_file, buffer, event, entry, irq_flags, pc); @@ -583,6 +585,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; struct hlist_head *head; + unsigned long args[6]; bool valid_prog_array; int syscall_nr; int rctx; @@ -613,8 +616,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) return; rec->nr = syscall_nr; - syscall_get_arguments(current, regs, 0, sys_data->nb_args, - (unsigned long *)&rec->args); + syscall_get_arguments(current, regs, 0, 6, args); + memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args); if ((valid_prog_array && !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) || From 10a16997db3d99fc02c026cf2c6e6c670acafab0 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Fri, 29 Mar 2019 20:12:21 +0300 Subject: [PATCH 048/116] riscv: Fix syscall_get_arguments() and syscall_set_arguments() RISC-V syscall arguments are located in orig_a0,a1..a5 fields of struct pt_regs. Due to an off-by-one bug and a bug in pointer arithmetic syscall_get_arguments() was reading s3..s7 fields instead of a1..a5. Likewise, syscall_set_arguments() was writing s3..s7 fields instead of a1..a5. Link: http://lkml.kernel.org/r/20190329171221.GA32456@altlinux.org Fixes: e2c0cdfba7f69 ("RISC-V: User-facing API") Cc: Ingo Molnar Cc: Kees Cook Cc: Andy Lutomirski Cc: Will Drewry Cc: Albert Ou Cc: linux-riscv@lists.infradead.org Cc: stable@vger.kernel.org # v4.15+ Acked-by: Palmer Dabbelt Signed-off-by: Dmitry V. Levin Signed-off-by: Steven Rostedt (VMware) --- arch/riscv/include/asm/syscall.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index bba3da6ef157..6ea9e1804233 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -79,10 +79,11 @@ static inline void syscall_get_arguments(struct task_struct *task, if (i == 0) { args[0] = regs->orig_a0; args++; - i++; n--; + } else { + i--; } - memcpy(args, ®s->a1 + i * sizeof(regs->a1), n * sizeof(args[0])); + memcpy(args, ®s->a1 + i, n * sizeof(args[0])); } static inline void syscall_set_arguments(struct task_struct *task, @@ -94,10 +95,11 @@ static inline void syscall_set_arguments(struct task_struct *task, if (i == 0) { regs->orig_a0 = args[0]; args++; - i++; n--; - } - memcpy(®s->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0)); + } else { + i--; + } + memcpy(®s->a1 + i, args, n * sizeof(regs->a1)); } static inline int syscall_get_arch(void) From ed3bb007021b9bddb90afae28a19f08ed8890add Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Fri, 29 Mar 2019 20:12:30 +0300 Subject: [PATCH 049/116] csky: Fix syscall_get_arguments() and syscall_set_arguments() C-SKY syscall arguments are located in orig_a0,a1,a2,a3,regs[0],regs[1] fields of struct pt_regs. Due to an off-by-one bug and a bug in pointer arithmetic syscall_get_arguments() was reading orig_a0,regs[1..5] fields instead. Likewise, syscall_set_arguments() was writing orig_a0,regs[1..5] fields instead. Link: http://lkml.kernel.org/r/20190329171230.GB32456@altlinux.org Fixes: 4859bfca11c7d ("csky: System Call") Cc: Ingo Molnar Cc: Kees Cook Cc: Andy Lutomirski Cc: Will Drewry Cc: stable@vger.kernel.org # v4.20+ Tested-by: Guo Ren Acked-by: Guo Ren Signed-off-by: Dmitry V. Levin Signed-off-by: Steven Rostedt (VMware) --- arch/csky/include/asm/syscall.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h index d637445737b7..9a9cd81e66c1 100644 --- a/arch/csky/include/asm/syscall.h +++ b/arch/csky/include/asm/syscall.h @@ -49,10 +49,11 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, if (i == 0) { args[0] = regs->orig_a0; args++; - i++; n--; + } else { + i--; } - memcpy(args, ®s->a1 + i * sizeof(regs->a1), n * sizeof(args[0])); + memcpy(args, ®s->a1 + i, n * sizeof(args[0])); } static inline void @@ -63,10 +64,11 @@ syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, if (i == 0) { regs->orig_a0 = args[0]; args++; - i++; n--; + } else { + i--; } - memcpy(®s->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0)); + memcpy(®s->a1 + i, args, n * sizeof(regs->a1)); } static inline int From bcc816dfe51ab86ca94663c7b225f2d6eb0fddb9 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Thu, 4 Apr 2019 10:57:44 +0800 Subject: [PATCH 050/116] blk-mq: do not reset plug->rq_count before the list is sorted We would never be able to sort the list if we first reset plug->rq_count which is used in conditional check later. Fixes: ce5b009cff19 ("block: improve logic around when to sort a plug list") Reviewed-by: Ming Lei Signed-off-by: Dongli Zhang Signed-off-by: Jens Axboe --- block/blk-mq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 22074a1e37cd..ac61bcc67a89 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1711,11 +1711,12 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) unsigned int depth; list_splice_init(&plug->mq_list, &list); - plug->rq_count = 0; if (plug->rq_count > 2 && plug->multiple_queues) list_sort(NULL, &list, plug_rq_cmp); + plug->rq_count = 0; + this_q = NULL; this_hctx = NULL; this_ctx = NULL; From 1c41860864c8ae0387ef7d44f0000e99cbb2e06d Mon Sep 17 00:00:00 2001 From: Wei Li Date: Mon, 1 Apr 2019 11:55:57 +0800 Subject: [PATCH 051/116] arm64: fix wrong check of on_sdei_stack in nmi context When doing unwind_frame() in the context of pseudo nmi (need enable CONFIG_ARM64_PSEUDO_NMI), reaching the bottom of the stack (fp == 0, pc != 0), function on_sdei_stack() will return true while the sdei acpi table is not inited in fact. This will cause a "NULL pointer dereference" oops when going on. Reviewed-by: Julien Thierry Signed-off-by: Wei Li Signed-off-by: Catalin Marinas --- arch/arm64/kernel/sdei.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 5ba4465e44f0..ea94cf8f9dc6 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -94,6 +94,9 @@ static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info) unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); unsigned long high = low + SDEI_STACK_SIZE; + if (!low) + return false; + if (sp < low || sp >= high) return false; @@ -111,6 +114,9 @@ static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info) unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); unsigned long high = low + SDEI_STACK_SIZE; + if (!low) + return false; + if (sp < low || sp >= high) return false; From ada770b1e74a77fff2d5f539bf6c42c25f4784db Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 4 Apr 2019 11:08:40 -0700 Subject: [PATCH 052/116] xtensa: fix return_address return_address returns the address that is one level higher in the call stack than requested in its argument, because level 0 corresponds to its caller's return address. Use requested level as the number of stack frames to skip. This fixes the address reported by might_sleep and friends. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/kernel/stacktrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c index 174c11f13bba..b9f82510c650 100644 --- a/arch/xtensa/kernel/stacktrace.c +++ b/arch/xtensa/kernel/stacktrace.c @@ -253,10 +253,14 @@ static int return_address_cb(struct stackframe *frame, void *data) return 1; } +/* + * level == 0 is for the return address from the caller of this function, + * not from this function itself. + */ unsigned long return_address(unsigned level) { struct return_addr_data r = { - .skip = level + 1, + .skip = level, }; walk_stackframe(stack_pointer(NULL), return_address_cb, &r); return r.addr; From bcb44433bba5eaff293888ef22ffa07f1f0347d6 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 3 Apr 2019 12:23:11 -0400 Subject: [PATCH 053/116] dm: disable DISCARD if the underlying storage no longer supports it Storage devices which report supporting discard commands like WRITE_SAME_16 with unmap, but reject discard commands sent to the storage device. This is a clear storage firmware bug but it doesn't change the fact that should a program cause discards to be sent to a multipath device layered on this buggy storage, all paths can end up failed at the same time from the discards, causing possible I/O loss. The first discard to a path will fail with Illegal Request, Invalid field in cdb, e.g.: kernel: sd 8:0:8:19: [sdfn] tag#0 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE kernel: sd 8:0:8:19: [sdfn] tag#0 Sense Key : Illegal Request [current] kernel: sd 8:0:8:19: [sdfn] tag#0 Add. Sense: Invalid field in cdb kernel: sd 8:0:8:19: [sdfn] tag#0 CDB: Write same(16) 93 08 00 00 00 00 00 a0 08 00 00 00 80 00 00 00 kernel: blk_update_request: critical target error, dev sdfn, sector 10487808 The SCSI layer converts this to the BLK_STS_TARGET error number, the sd device disables its support for discard on this path, and because of the BLK_STS_TARGET error multipath fails the discard without failing any path or retrying down a different path. But subsequent discards can cause path failures. Any discards sent to the path which already failed a discard ends up failing with EIO from blk_cloned_rq_check_limits with an "over max size limit" error since the discard limit was set to 0 by the sd driver for the path. As the error is EIO, this now fails the path and multipath tries to send the discard down the next path. This cycle continues as discards are sent until all paths fail. Fix this by training DM core to disable DISCARD if the underlying storage already did so. Also, fix branching in dm_done() and clone_endio() to reflect the mutually exclussive nature of the IO operations in question. Cc: stable@vger.kernel.org Reported-by: David Jeffery Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 1 + drivers/md/dm-rq.c | 11 +++++++---- drivers/md/dm.c | 20 ++++++++++++++++---- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 95c6d86ab5e8..c4ef1fceead6 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -115,6 +115,7 @@ struct mapped_device { struct srcu_struct io_barrier; }; +void disable_discard(struct mapped_device *md); void disable_write_same(struct mapped_device *md); void disable_write_zeroes(struct mapped_device *md); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 09773636602d..b66745bd08bb 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -222,11 +222,14 @@ static void dm_done(struct request *clone, blk_status_t error, bool mapped) } if (unlikely(error == BLK_STS_TARGET)) { - if (req_op(clone) == REQ_OP_WRITE_SAME && - !clone->q->limits.max_write_same_sectors) + if (req_op(clone) == REQ_OP_DISCARD && + !clone->q->limits.max_discard_sectors) + disable_discard(tio->md); + else if (req_op(clone) == REQ_OP_WRITE_SAME && + !clone->q->limits.max_write_same_sectors) disable_write_same(tio->md); - if (req_op(clone) == REQ_OP_WRITE_ZEROES && - !clone->q->limits.max_write_zeroes_sectors) + else if (req_op(clone) == REQ_OP_WRITE_ZEROES && + !clone->q->limits.max_write_zeroes_sectors) disable_write_zeroes(tio->md); } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 89b04169658d..043f0761e4a0 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -945,6 +945,15 @@ static void dec_pending(struct dm_io *io, blk_status_t error) } } +void disable_discard(struct mapped_device *md) +{ + struct queue_limits *limits = dm_get_queue_limits(md); + + /* device doesn't really support DISCARD, disable it */ + limits->max_discard_sectors = 0; + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, md->queue); +} + void disable_write_same(struct mapped_device *md) { struct queue_limits *limits = dm_get_queue_limits(md); @@ -970,11 +979,14 @@ static void clone_endio(struct bio *bio) dm_endio_fn endio = tio->ti->type->end_io; if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) { - if (bio_op(bio) == REQ_OP_WRITE_SAME && - !bio->bi_disk->queue->limits.max_write_same_sectors) + if (bio_op(bio) == REQ_OP_DISCARD && + !bio->bi_disk->queue->limits.max_discard_sectors) + disable_discard(md); + else if (bio_op(bio) == REQ_OP_WRITE_SAME && + !bio->bi_disk->queue->limits.max_write_same_sectors) disable_write_same(md); - if (bio_op(bio) == REQ_OP_WRITE_ZEROES && - !bio->bi_disk->queue->limits.max_write_zeroes_sectors) + else if (bio_op(bio) == REQ_OP_WRITE_ZEROES && + !bio->bi_disk->queue->limits.max_write_zeroes_sectors) disable_write_zeroes(md); } From d9b8a67b3b95a5c5aae6422b8113adc1c2485f2b Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Sun, 3 Mar 2019 15:04:18 +0800 Subject: [PATCH 054/116] mtd: cfi: fix deadloop in cfi_cmdset_0002.c do_write_buffer In function do_write_buffer(), in the for loop, there is a case chip_ready() returns 1 while chip_good() returns 0, so it never break the loop. To fix this, chip_good() is enough and it should timeout if it stay bad for a while. Fixes: dfeae1073583("mtd: cfi_cmdset_0002: Change write buffer to check correct value") Signed-off-by: Yi Huaijie Signed-off-by: Liu Jian Reviewed-by: Tokunori Ikegami Signed-off-by: Richard Weinberger --- drivers/mtd/chips/cfi_cmdset_0002.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 72428b6bfc47..7b7286b4d81e 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -1876,7 +1876,11 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip, continue; } - if (time_after(jiffies, timeo) && !chip_ready(map, adr)) + /* + * We check "time_after" and "!chip_good" before checking "chip_good" to avoid + * the failure due to scheduling. + */ + if (time_after(jiffies, timeo) && !chip_good(map, adr, datum)) break; if (chip_good(map, adr, datum)) { From ecae26fae15abb7d433557afbd15467ce1c444f5 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 4 Apr 2019 18:42:05 -0700 Subject: [PATCH 055/116] xtensa: fix format string warning in init_pmd Use %lu instead of %zu to fix the following warning introduced with recent memblock refactoring: xtensa/mm/mmu.c:36:9: warning: format '%zu' expects argument of type 'size_t', but argument 3 has type 'long unsigned int Signed-off-by: Max Filippov --- arch/xtensa/mm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c index 2fb7d1172228..03678c4afc39 100644 --- a/arch/xtensa/mm/mmu.c +++ b/arch/xtensa/mm/mmu.c @@ -33,7 +33,7 @@ static void * __init init_pmd(unsigned long vaddr, unsigned long n_pages) pte = memblock_alloc_low(n_pages * sizeof(pte_t), PAGE_SIZE); if (!pte) - panic("%s: Failed to allocate %zu bytes align=%lx\n", + panic("%s: Failed to allocate %lu bytes align=%lx\n", __func__, n_pages * sizeof(pte_t), PAGE_SIZE); for (i = 0; i < n_pages; ++i) From ad94dc3a7eb5fa6ff469dbcf401c44b14ad50595 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Wed, 3 Apr 2019 07:26:36 +0200 Subject: [PATCH 056/116] xen: use struct_size() helper in kzalloc() struct privcmd_buf_vma_private has a zero-sized array at the end (pages), use the new struct_size() helper to determine the proper allocation size and avoid potential type mistakes. Signed-off-by: Andrea Righi Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/xen/privcmd-buf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/xen/privcmd-buf.c b/drivers/xen/privcmd-buf.c index de01a6d0059d..a1c61e351d3f 100644 --- a/drivers/xen/privcmd-buf.c +++ b/drivers/xen/privcmd-buf.c @@ -140,8 +140,7 @@ static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; - vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *), - GFP_KERNEL); + vma_priv = kzalloc(struct_size(vma_priv, pages, count), GFP_KERNEL); if (!vma_priv) return -ENOMEM; From 42d8644bd77dd2d747e004e367cb0c895a606f39 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 Apr 2019 18:12:17 +0300 Subject: [PATCH 057/116] xen: Prevent buffer overflow in privcmd ioctl The "call" variable comes from the user in privcmd_ioctl_hypercall(). It's an offset into the hypercall_page[] which has (PAGE_SIZE / 32) elements. We need to put an upper bound on it to prevent an out of bounds access. Cc: stable@vger.kernel.org Fixes: 1246ae0bb992 ("xen: add variable hypercall caller") Signed-off-by: Dan Carpenter Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/hypercall.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index de6f0d59a24f..2863c2026655 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -206,6 +206,9 @@ xen_single_call(unsigned int call, __HYPERCALL_DECLS; __HYPERCALL_5ARG(a1, a2, a3, a4, a5); + if (call >= PAGE_SIZE / sizeof(hypercall_page[0])) + return -EINVAL; + asm volatile(CALL_NOSPEC : __HYPERCALL_5PARAM : [thunk_target] "a" (&hypercall_page[call]) From b35f549df1d7520d37ba1e6d4a8d4df6bd52d136 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 7 Nov 2016 16:26:37 -0500 Subject: [PATCH 058/116] syscalls: Remove start and number from syscall_get_arguments() args At Linux Plumbers, Andy Lutomirski approached me and pointed out that the function call syscall_get_arguments() implemented in x86 was horribly written and not optimized for the standard case of passing in 0 and 6 for the starting index and the number of system calls to get. When looking at all the users of this function, I discovered that all instances pass in only 0 and 6 for these arguments. Instead of having this function handle different cases that are never used, simply rewrite it to return the first 6 arguments of a system call. This should help out the performance of tracing system calls by ptrace, ftrace and perf. Link: http://lkml.kernel.org/r/20161107213233.754809394@goodmis.org Cc: Oleg Nesterov Cc: Kees Cook Cc: Andy Lutomirski Cc: Dominik Brodowski Cc: Dave Martin Cc: "Dmitry V. Levin" Cc: x86@kernel.org Cc: linux-snps-arc@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: uclinux-h8-devel@lists.sourceforge.jp Cc: linux-hexagon@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-riscv@lists.infradead.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linux-arch@vger.kernel.org Acked-by: Paul Burton # MIPS parts Acked-by: Max Filippov # For xtensa changes Acked-by: Will Deacon # For the arm64 bits Reviewed-by: Thomas Gleixner # for x86 Reviewed-by: Dmitry V. Levin Reported-by: Andy Lutomirski Signed-off-by: Steven Rostedt (VMware) --- arch/arc/include/asm/syscall.h | 7 ++- arch/arm/include/asm/syscall.h | 23 ++------- arch/arm64/include/asm/syscall.h | 22 ++------ arch/c6x/include/asm/syscall.h | 41 +++------------ arch/csky/include/asm/syscall.h | 14 ++--- arch/h8300/include/asm/syscall.h | 34 +++---------- arch/hexagon/include/asm/syscall.h | 4 +- arch/ia64/include/asm/syscall.h | 5 +- arch/microblaze/include/asm/syscall.h | 4 +- arch/mips/include/asm/syscall.h | 3 +- arch/mips/kernel/ptrace.c | 2 +- arch/nds32/include/asm/syscall.h | 33 +++--------- arch/nios2/include/asm/syscall.h | 42 +++------------ arch/openrisc/include/asm/syscall.h | 6 +-- arch/parisc/include/asm/syscall.h | 30 +++-------- arch/powerpc/include/asm/syscall.h | 8 ++- arch/riscv/include/asm/syscall.h | 13 ++--- arch/s390/include/asm/syscall.h | 17 ++----- arch/sh/include/asm/syscall_32.h | 26 +++------- arch/sh/include/asm/syscall_64.h | 4 +- arch/sparc/include/asm/syscall.h | 4 +- arch/um/include/asm/syscall-generic.h | 39 +++----------- arch/x86/include/asm/syscall.h | 73 +++++++-------------------- arch/xtensa/include/asm/syscall.h | 16 ++---- include/asm-generic/syscall.h | 11 ++-- include/trace/events/syscalls.h | 2 +- kernel/seccomp.c | 2 +- kernel/trace/trace_syscalls.c | 4 +- lib/syscall.c | 2 +- 29 files changed, 113 insertions(+), 378 deletions(-) diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h index 29de09804306..c7a4201ed62b 100644 --- a/arch/arc/include/asm/syscall.h +++ b/arch/arc/include/asm/syscall.h @@ -55,12 +55,11 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, */ static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) + unsigned long *args) { unsigned long *inside_ptregs = &(regs->r0); - inside_ptregs -= i; - - BUG_ON((i + n) > 6); + unsigned int n = 6; + unsigned int i = 0; while (n--) { args[i++] = (*inside_ptregs); diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h index 06dea6bce293..db969a2972ae 100644 --- a/arch/arm/include/asm/syscall.h +++ b/arch/arm/include/asm/syscall.h @@ -55,29 +55,12 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - if (n == 0) - return; + args[0] = regs->ARM_ORIG_r0; + args++; - if (i + n > SYSCALL_MAX_ARGS) { - unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i; - unsigned int n_bad = n + i - SYSCALL_MAX_ARGS; - pr_warn("%s called with max args %d, handling only %d\n", - __func__, i + n, SYSCALL_MAX_ARGS); - memset(args_bad, 0, n_bad * sizeof(args[0])); - n = SYSCALL_MAX_ARGS - i; - } - - if (i == 0) { - args[0] = regs->ARM_ORIG_r0; - args++; - i++; - n--; - } - - memcpy(args, ®s->ARM_r0 + i, n * sizeof(args[0])); + memcpy(args, ®s->ARM_r0 + 1, 5 * sizeof(args[0])); } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index ad8be16a39c9..55b2dab21023 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -65,28 +65,12 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - if (n == 0) - return; + args[0] = regs->orig_x0; + args++; - if (i + n > SYSCALL_MAX_ARGS) { - unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i; - unsigned int n_bad = n + i - SYSCALL_MAX_ARGS; - pr_warning("%s called with max args %d, handling only %d\n", - __func__, i + n, SYSCALL_MAX_ARGS); - memset(args_bad, 0, n_bad * sizeof(args[0])); - } - - if (i == 0) { - args[0] = regs->orig_x0; - args++; - i++; - n--; - } - - memcpy(args, ®s->regs[i], n * sizeof(args[0])); + memcpy(args, ®s->regs[1], 5 * sizeof(args[0])); } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/c6x/include/asm/syscall.h b/arch/c6x/include/asm/syscall.h index ae2be315ee9c..06db3251926b 100644 --- a/arch/c6x/include/asm/syscall.h +++ b/arch/c6x/include/asm/syscall.h @@ -46,40 +46,15 @@ static inline void syscall_set_return_value(struct task_struct *task, } static inline void syscall_get_arguments(struct task_struct *task, - struct pt_regs *regs, unsigned int i, - unsigned int n, unsigned long *args) + struct pt_regs *regs, + unsigned long *args) { - switch (i) { - case 0: - if (!n--) - break; - *args++ = regs->a4; - case 1: - if (!n--) - break; - *args++ = regs->b4; - case 2: - if (!n--) - break; - *args++ = regs->a6; - case 3: - if (!n--) - break; - *args++ = regs->b6; - case 4: - if (!n--) - break; - *args++ = regs->a8; - case 5: - if (!n--) - break; - *args++ = regs->b8; - case 6: - if (!n--) - break; - default: - BUG(); - } + *args++ = regs->a4; + *args++ = regs->b4; + *args++ = regs->a6; + *args++ = regs->b6; + *args++ = regs->a8; + *args = regs->b8; } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h index 9a9cd81e66c1..c691fe92edc6 100644 --- a/arch/csky/include/asm/syscall.h +++ b/arch/csky/include/asm/syscall.h @@ -43,17 +43,11 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) + unsigned long *args) { - BUG_ON(i + n > 6); - if (i == 0) { - args[0] = regs->orig_a0; - args++; - n--; - } else { - i--; - } - memcpy(args, ®s->a1 + i, n * sizeof(args[0])); + args[0] = regs->orig_a0; + args++; + memcpy(args, ®s->a1, 5 * sizeof(args[0])); } static inline void diff --git a/arch/h8300/include/asm/syscall.h b/arch/h8300/include/asm/syscall.h index 924990401237..ddd483c6ca95 100644 --- a/arch/h8300/include/asm/syscall.h +++ b/arch/h8300/include/asm/syscall.h @@ -17,34 +17,14 @@ syscall_get_nr(struct task_struct *task, struct pt_regs *regs) static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) + unsigned long *args) { - BUG_ON(i + n > 6); - - while (n > 0) { - switch (i) { - case 0: - *args++ = regs->er1; - break; - case 1: - *args++ = regs->er2; - break; - case 2: - *args++ = regs->er3; - break; - case 3: - *args++ = regs->er4; - break; - case 4: - *args++ = regs->er5; - break; - case 5: - *args++ = regs->er6; - break; - } - i++; - n--; - } + *args++ = regs->er1; + *args++ = regs->er2; + *args++ = regs->er3; + *args++ = regs->er4; + *args++ = regs->er5; + *args = regs->er6; } diff --git a/arch/hexagon/include/asm/syscall.h b/arch/hexagon/include/asm/syscall.h index 4af9c7b6f13a..ae3a1e24fabd 100644 --- a/arch/hexagon/include/asm/syscall.h +++ b/arch/hexagon/include/asm/syscall.h @@ -37,10 +37,8 @@ static inline long syscall_get_nr(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - BUG_ON(i + n > 6); - memcpy(args, &(®s->r00)[i], n * sizeof(args[0])); + memcpy(args, &(®s->r00)[0], 6 * sizeof(args[0])); } #endif diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h index 1d0b875fec44..8204c1ff70ce 100644 --- a/arch/ia64/include/asm/syscall.h +++ b/arch/ia64/include/asm/syscall.h @@ -63,12 +63,9 @@ extern void ia64_syscall_get_set_arguments(struct task_struct *task, unsigned long *args, int rw); static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - BUG_ON(i + n > 6); - - ia64_syscall_get_set_arguments(task, regs, i, n, args, 0); + ia64_syscall_get_set_arguments(task, regs, 0, 6, args, 0); } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h index 220decd605a4..4b23e44e5c3c 100644 --- a/arch/microblaze/include/asm/syscall.h +++ b/arch/microblaze/include/asm/syscall.h @@ -82,9 +82,11 @@ static inline void microblaze_set_syscall_arg(struct pt_regs *regs, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { + unsigned int i = 0; + unsigned int n = 6; + while (n--) *args++ = microblaze_get_syscall_arg(regs, i++); } diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index 6cf8ffb5367e..a2b4748655df 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -116,9 +116,10 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { + unsigned int i = 0; + unsigned int n = 6; int ret; /* O32 ABI syscall() */ diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 0057c910bc2f..3a62f80958e1 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -1419,7 +1419,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) sd.nr = syscall; sd.arch = syscall_get_arch(); - syscall_get_arguments(current, regs, 0, 6, args); + syscall_get_arguments(current, regs, args); for (i = 0; i < 6; i++) sd.args[i] = args[i]; sd.instruction_pointer = KSTK_EIP(current); diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h index f7e5e86765fe..89a6ec8731d8 100644 --- a/arch/nds32/include/asm/syscall.h +++ b/arch/nds32/include/asm/syscall.h @@ -108,42 +108,21 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, * syscall_get_arguments - extract system call parameter values * @task: task of interest, must be blocked * @regs: task_pt_regs() of @task - * @i: argument index [0,5] - * @n: number of arguments; n+i must be [1,6]. * @args: array filled with argument values * - * Fetches @n arguments to the system call starting with the @i'th argument - * (from 0 through 5). Argument @i is stored in @args[0], and so on. - * An arch inline version is probably optimal when @i and @n are constants. + * Fetches 6 arguments to the system call (from 0 through 5). The first + * argument is stored in @args[0], and so on. * * It's only valid to call this when @task is stopped for tracing on * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. - * It's invalid to call this with @i + @n > 6; we only support system calls - * taking up to 6 arguments. */ #define SYSCALL_MAX_ARGS 6 void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) + unsigned long *args) { - if (n == 0) - return; - if (i + n > SYSCALL_MAX_ARGS) { - unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i; - unsigned int n_bad = n + i - SYSCALL_MAX_ARGS; - pr_warning("%s called with max args %d, handling only %d\n", - __func__, i + n, SYSCALL_MAX_ARGS); - memset(args_bad, 0, n_bad * sizeof(args[0])); - memset(args_bad, 0, n_bad * sizeof(args[0])); - } - - if (i == 0) { - args[0] = regs->orig_r0; - args++; - i++; - n--; - } - - memcpy(args, ®s->uregs[0] + i, n * sizeof(args[0])); + args[0] = regs->orig_r0; + args++; + memcpy(args, ®s->uregs[0] + 1, 5 * sizeof(args[0])); } /** diff --git a/arch/nios2/include/asm/syscall.h b/arch/nios2/include/asm/syscall.h index 9de220854c4a..792bd449d839 100644 --- a/arch/nios2/include/asm/syscall.h +++ b/arch/nios2/include/asm/syscall.h @@ -58,42 +58,14 @@ static inline void syscall_set_return_value(struct task_struct *task, } static inline void syscall_get_arguments(struct task_struct *task, - struct pt_regs *regs, unsigned int i, unsigned int n, - unsigned long *args) + struct pt_regs *regs, unsigned long *args) { - BUG_ON(i + n > 6); - - switch (i) { - case 0: - if (!n--) - break; - *args++ = regs->r4; - case 1: - if (!n--) - break; - *args++ = regs->r5; - case 2: - if (!n--) - break; - *args++ = regs->r6; - case 3: - if (!n--) - break; - *args++ = regs->r7; - case 4: - if (!n--) - break; - *args++ = regs->r8; - case 5: - if (!n--) - break; - *args++ = regs->r9; - case 6: - if (!n--) - break; - default: - BUG(); - } + *args++ = regs->r4; + *args++ = regs->r5; + *args++ = regs->r6; + *args++ = regs->r7; + *args++ = regs->r8; + *args = regs->r9; } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h index 2db9f1cf0694..72607860cd55 100644 --- a/arch/openrisc/include/asm/syscall.h +++ b/arch/openrisc/include/asm/syscall.h @@ -56,11 +56,9 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) + unsigned long *args) { - BUG_ON(i + n > 6); - - memcpy(args, ®s->gpr[3 + i], n * sizeof(args[0])); + memcpy(args, ®s->gpr[3], 6 * sizeof(args[0])); } static inline void diff --git a/arch/parisc/include/asm/syscall.h b/arch/parisc/include/asm/syscall.h index 8bff1a58c97f..62a6d477fae0 100644 --- a/arch/parisc/include/asm/syscall.h +++ b/arch/parisc/include/asm/syscall.h @@ -18,29 +18,15 @@ static inline long syscall_get_nr(struct task_struct *tsk, } static inline void syscall_get_arguments(struct task_struct *tsk, - struct pt_regs *regs, unsigned int i, - unsigned int n, unsigned long *args) + struct pt_regs *regs, + unsigned long *args) { - BUG_ON(i); - - switch (n) { - case 6: - args[5] = regs->gr[21]; - case 5: - args[4] = regs->gr[22]; - case 4: - args[3] = regs->gr[23]; - case 3: - args[2] = regs->gr[24]; - case 2: - args[1] = regs->gr[25]; - case 1: - args[0] = regs->gr[26]; - case 0: - break; - default: - BUG(); - } + args[5] = regs->gr[21]; + args[4] = regs->gr[22]; + args[3] = regs->gr[23]; + args[2] = regs->gr[24]; + args[1] = regs->gr[25]; + args[0] = regs->gr[26]; } static inline long syscall_get_return_value(struct task_struct *task, diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 1a0e7a8b1c81..5c9b9dc82b7e 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -65,22 +65,20 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { unsigned long val, mask = -1UL; - - BUG_ON(i + n > 6); + unsigned int n = 6; #ifdef CONFIG_COMPAT if (test_tsk_thread_flag(task, TIF_32BIT)) mask = 0xffffffff; #endif while (n--) { - if (n == 0 && i == 0) + if (n == 0) val = regs->orig_gpr3; else - val = regs->gpr[3 + i + n]; + val = regs->gpr[3 + n]; args[n] = val & mask; } diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index 6ea9e1804233..6adca1804be1 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -72,18 +72,11 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - BUG_ON(i + n > 6); - if (i == 0) { - args[0] = regs->orig_a0; - args++; - n--; - } else { - i--; - } - memcpy(args, ®s->a1 + i, n * sizeof(args[0])); + args[0] = regs->orig_a0; + args++; + memcpy(args, ®s->a1, 5 * sizeof(args[0])); } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 96f9a9151fde..ee0b1f6aa36d 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -56,27 +56,20 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { unsigned long mask = -1UL; + unsigned int n = 6; - /* - * No arguments for this syscall, there's nothing to do. - */ - if (!n) - return; - - BUG_ON(i + n > 6); #ifdef CONFIG_COMPAT if (test_tsk_thread_flag(task, TIF_31BIT)) mask = 0xffffffff; #endif while (n-- > 0) - if (i + n > 0) - args[n] = regs->gprs[2 + i + n] & mask; - if (i == 0) - args[0] = regs->orig_gpr2 & mask; + if (n > 0) + args[n] = regs->gprs[2 + n] & mask; + + args[0] = regs->orig_gpr2 & mask; } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h index 6e118799831c..2bf1199a0595 100644 --- a/arch/sh/include/asm/syscall_32.h +++ b/arch/sh/include/asm/syscall_32.h @@ -48,30 +48,16 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - /* - * Do this simply for now. If we need to start supporting - * fetching arguments from arbitrary indices, this will need some - * extra logic. Presently there are no in-tree users that depend - * on this behaviour. - */ - BUG_ON(i); /* Argument pattern is: R4, R5, R6, R7, R0, R1 */ - switch (n) { - case 6: args[5] = regs->regs[1]; - case 5: args[4] = regs->regs[0]; - case 4: args[3] = regs->regs[7]; - case 3: args[2] = regs->regs[6]; - case 2: args[1] = regs->regs[5]; - case 1: args[0] = regs->regs[4]; - case 0: - break; - default: - BUG(); - } + args[5] = regs->regs[1]; + args[4] = regs->regs[0]; + args[3] = regs->regs[7]; + args[2] = regs->regs[6]; + args[1] = regs->regs[5]; + args[0] = regs->regs[4]; } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h index 43882580c7f9..4e8f6460c703 100644 --- a/arch/sh/include/asm/syscall_64.h +++ b/arch/sh/include/asm/syscall_64.h @@ -47,11 +47,9 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - BUG_ON(i + n > 6); - memcpy(args, ®s->regs[2 + i], n * sizeof(args[0])); + memcpy(args, ®s->regs[2], 6 * sizeof(args[0])); } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h index 053989e3f6a6..872dfee852d6 100644 --- a/arch/sparc/include/asm/syscall.h +++ b/arch/sparc/include/asm/syscall.h @@ -96,11 +96,11 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { int zero_extend = 0; unsigned int j; + unsigned int n = 6; #ifdef CONFIG_SPARC64 if (test_tsk_thread_flag(task, TIF_32BIT)) @@ -108,7 +108,7 @@ static inline void syscall_get_arguments(struct task_struct *task, #endif for (j = 0; j < n; j++) { - unsigned long val = regs->u_regs[UREG_I0 + i + j]; + unsigned long val = regs->u_regs[UREG_I0 + j]; if (zero_extend) args[j] = (u32) val; diff --git a/arch/um/include/asm/syscall-generic.h b/arch/um/include/asm/syscall-generic.h index 9fb9cf8cd39a..25d00acd1322 100644 --- a/arch/um/include/asm/syscall-generic.h +++ b/arch/um/include/asm/syscall-generic.h @@ -53,43 +53,16 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { const struct uml_pt_regs *r = ®s->regs; - switch (i) { - case 0: - if (!n--) - break; - *args++ = UPT_SYSCALL_ARG1(r); - case 1: - if (!n--) - break; - *args++ = UPT_SYSCALL_ARG2(r); - case 2: - if (!n--) - break; - *args++ = UPT_SYSCALL_ARG3(r); - case 3: - if (!n--) - break; - *args++ = UPT_SYSCALL_ARG4(r); - case 4: - if (!n--) - break; - *args++ = UPT_SYSCALL_ARG5(r); - case 5: - if (!n--) - break; - *args++ = UPT_SYSCALL_ARG6(r); - case 6: - if (!n--) - break; - default: - BUG(); - break; - } + *args++ = UPT_SYSCALL_ARG1(r); + *args++ = UPT_SYSCALL_ARG2(r); + *args++ = UPT_SYSCALL_ARG3(r); + *args++ = UPT_SYSCALL_ARG4(r); + *args++ = UPT_SYSCALL_ARG5(r); + *args = UPT_SYSCALL_ARG6(r); } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index d653139857af..8dbb5c379450 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -91,11 +91,9 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - BUG_ON(i + n > 6); - memcpy(args, ®s->bx + i, n * sizeof(args[0])); + memcpy(args, ®s->bx, 6 * sizeof(args[0])); } static inline void syscall_set_arguments(struct task_struct *task, @@ -116,63 +114,26 @@ static inline int syscall_get_arch(void) static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread_info.status & TS_COMPAT) - switch (i) { - case 0: - if (!n--) break; - *args++ = regs->bx; - case 1: - if (!n--) break; - *args++ = regs->cx; - case 2: - if (!n--) break; - *args++ = regs->dx; - case 3: - if (!n--) break; - *args++ = regs->si; - case 4: - if (!n--) break; - *args++ = regs->di; - case 5: - if (!n--) break; - *args++ = regs->bp; - case 6: - if (!n--) break; - default: - BUG(); - break; - } - else + if (task->thread_info.status & TS_COMPAT) { + *args++ = regs->bx; + *args++ = regs->cx; + *args++ = regs->dx; + *args++ = regs->si; + *args++ = regs->di; + *args = regs->bp; + } else # endif - switch (i) { - case 0: - if (!n--) break; - *args++ = regs->di; - case 1: - if (!n--) break; - *args++ = regs->si; - case 2: - if (!n--) break; - *args++ = regs->dx; - case 3: - if (!n--) break; - *args++ = regs->r10; - case 4: - if (!n--) break; - *args++ = regs->r8; - case 5: - if (!n--) break; - *args++ = regs->r9; - case 6: - if (!n--) break; - default: - BUG(); - break; - } + { + *args++ = regs->di; + *args++ = regs->si; + *args++ = regs->dx; + *args++ = regs->r10; + *args++ = regs->r8; + *args = regs->r9; + } } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h index a168bf81c7f4..1504ce9a233a 100644 --- a/arch/xtensa/include/asm/syscall.h +++ b/arch/xtensa/include/asm/syscall.h @@ -59,23 +59,13 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS; - unsigned int j; + unsigned int i; - if (n == 0) - return; - - WARN_ON_ONCE(i + n > SYSCALL_MAX_ARGS); - - for (j = 0; j < n; ++j) { - if (i + j < SYSCALL_MAX_ARGS) - args[j] = regs->areg[reg[i + j]]; - else - args[j] = 0; - } + for (i = 0; i < 6; ++i) + args[i] = regs->areg[reg[i]]; } static inline void syscall_set_arguments(struct task_struct *task, diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h index 0c938a4354f6..269e9412ef42 100644 --- a/include/asm-generic/syscall.h +++ b/include/asm-generic/syscall.h @@ -105,21 +105,16 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, * syscall_get_arguments - extract system call parameter values * @task: task of interest, must be blocked * @regs: task_pt_regs() of @task - * @i: argument index [0,5] - * @n: number of arguments; n+i must be [1,6]. * @args: array filled with argument values * - * Fetches @n arguments to the system call starting with the @i'th argument - * (from 0 through 5). Argument @i is stored in @args[0], and so on. - * An arch inline version is probably optimal when @i and @n are constants. + * Fetches 6 arguments to the system call. First argument is stored in +* @args[0], and so on. * * It's only valid to call this when @task is stopped for tracing on * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. - * It's invalid to call this with @i + @n > 6; we only support system calls - * taking up to 6 arguments. */ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args); + unsigned long *args); /** * syscall_set_arguments - change system call parameter value diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index 44a3259ed4a5..b6e0cbc2c71f 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -28,7 +28,7 @@ TRACE_EVENT_FN(sys_enter, TP_fast_assign( __entry->id = id; - syscall_get_arguments(current, regs, 0, 6, __entry->args); + syscall_get_arguments(current, regs, __entry->args); ), TP_printk("NR %ld (%lx, %lx, %lx, %lx, %lx, %lx)", diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 54a0347ca812..df27e499956a 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -149,7 +149,7 @@ static void populate_seccomp_data(struct seccomp_data *sd) sd->nr = syscall_get_nr(task, regs); sd->arch = syscall_get_arch(); - syscall_get_arguments(task, regs, 0, 6, args); + syscall_get_arguments(task, regs, args); sd->args[0] = args[0]; sd->args[1] = args[1]; sd->args[2] = args[2]; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index e9f5bbbad6d9..fa8fbff736d6 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -348,7 +348,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) entry = ring_buffer_event_data(event); entry->nr = syscall_nr; - syscall_get_arguments(current, regs, 0, 6, args); + syscall_get_arguments(current, regs, args); memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args); event_trigger_unlock_commit(trace_file, buffer, event, entry, @@ -616,7 +616,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) return; rec->nr = syscall_nr; - syscall_get_arguments(current, regs, 0, 6, args); + syscall_get_arguments(current, regs, args); memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args); if ((valid_prog_array && diff --git a/lib/syscall.c b/lib/syscall.c index e8467e17b9a2..fb328e7ccb08 100644 --- a/lib/syscall.c +++ b/lib/syscall.c @@ -27,7 +27,7 @@ static int collect_syscall(struct task_struct *target, struct syscall_info *info info->data.nr = syscall_get_nr(target, regs); if (info->data.nr != -1L) - syscall_get_arguments(target, regs, 0, 6, + syscall_get_arguments(target, regs, (unsigned long *)&info->data.args[0]); put_task_stack(target); From 32d92586629a8b3637a3c9361709818e25f327ad Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 27 Mar 2019 20:07:31 -0400 Subject: [PATCH 059/116] syscalls: Remove start and number from syscall_set_arguments() args After removing the start and count arguments of syscall_get_arguments() it seems reasonable to remove them from syscall_set_arguments(). Note, as of today, there are no users of syscall_set_arguments(). But we are told that there will be soon. But for now, at least make it consistent with syscall_get_arguments(). Link: http://lkml.kernel.org/r/20190327222014.GA32540@altlinux.org Cc: Oleg Nesterov Cc: Kees Cook Cc: Andy Lutomirski Cc: Dominik Brodowski Cc: Dave Martin Cc: "Dmitry V. Levin" Cc: x86@kernel.org Cc: linux-snps-arc@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: uclinux-h8-devel@lists.sourceforge.jp Cc: linux-hexagon@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-riscv@lists.infradead.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linux-arch@vger.kernel.org Acked-by: Max Filippov # For xtensa changes Acked-by: Will Deacon # For the arm64 bits Reviewed-by: Thomas Gleixner # for x86 Reviewed-by: Dmitry V. Levin Signed-off-by: Steven Rostedt (VMware) --- arch/arm/include/asm/syscall.h | 20 ++------ arch/arm64/include/asm/syscall.h | 20 ++------ arch/c6x/include/asm/syscall.h | 38 +++------------ arch/csky/include/asm/syscall.h | 14 ++---- arch/ia64/include/asm/syscall.h | 10 ++-- arch/ia64/kernel/ptrace.c | 7 ++- arch/microblaze/include/asm/syscall.h | 4 +- arch/nds32/include/asm/syscall.h | 29 ++--------- arch/nios2/include/asm/syscall.h | 42 +++------------- arch/openrisc/include/asm/syscall.h | 6 +-- arch/powerpc/include/asm/syscall.h | 7 +-- arch/riscv/include/asm/syscall.h | 13 ++--- arch/s390/include/asm/syscall.h | 11 ++--- arch/sh/include/asm/syscall_32.h | 21 +++----- arch/sh/include/asm/syscall_64.h | 4 +- arch/sparc/include/asm/syscall.h | 7 ++- arch/um/include/asm/syscall-generic.h | 39 +++------------ arch/x86/include/asm/syscall.h | 69 +++++++-------------------- arch/xtensa/include/asm/syscall.h | 17 ++----- include/asm-generic/syscall.h | 10 +--- 20 files changed, 86 insertions(+), 302 deletions(-) diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h index db969a2972ae..080ce70cab12 100644 --- a/arch/arm/include/asm/syscall.h +++ b/arch/arm/include/asm/syscall.h @@ -65,26 +65,12 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - if (n == 0) - return; + regs->ARM_ORIG_r0 = args[0]; + args++; - if (i + n > SYSCALL_MAX_ARGS) { - pr_warn("%s called with max args %d, handling only %d\n", - __func__, i + n, SYSCALL_MAX_ARGS); - n = SYSCALL_MAX_ARGS - i; - } - - if (i == 0) { - regs->ARM_ORIG_r0 = args[0]; - args++; - i++; - n--; - } - - memcpy(®s->ARM_r0 + i, args, n * sizeof(args[0])); + memcpy(®s->ARM_r0 + 1, args, 5 * sizeof(args[0])); } static inline int syscall_get_arch(void) diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 55b2dab21023..a179df3674a1 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -75,26 +75,12 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - if (n == 0) - return; + regs->orig_x0 = args[0]; + args++; - if (i + n > SYSCALL_MAX_ARGS) { - pr_warning("%s called with max args %d, handling only %d\n", - __func__, i + n, SYSCALL_MAX_ARGS); - n = SYSCALL_MAX_ARGS - i; - } - - if (i == 0) { - regs->orig_x0 = args[0]; - args++; - i++; - n--; - } - - memcpy(®s->regs[i], args, n * sizeof(args[0])); + memcpy(®s->regs[1], args, 5 * sizeof(args[0])); } /* diff --git a/arch/c6x/include/asm/syscall.h b/arch/c6x/include/asm/syscall.h index 06db3251926b..15ba8599858e 100644 --- a/arch/c6x/include/asm/syscall.h +++ b/arch/c6x/include/asm/syscall.h @@ -59,40 +59,14 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - switch (i) { - case 0: - if (!n--) - break; - regs->a4 = *args++; - case 1: - if (!n--) - break; - regs->b4 = *args++; - case 2: - if (!n--) - break; - regs->a6 = *args++; - case 3: - if (!n--) - break; - regs->b6 = *args++; - case 4: - if (!n--) - break; - regs->a8 = *args++; - case 5: - if (!n--) - break; - regs->a9 = *args++; - case 6: - if (!n) - break; - default: - BUG(); - } + regs->a4 = *args++; + regs->b4 = *args++; + regs->a6 = *args++; + regs->b6 = *args++; + regs->a8 = *args++; + regs->a9 = *args; } #endif /* __ASM_C6X_SYSCALLS_H */ diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h index c691fe92edc6..bda0a446c63e 100644 --- a/arch/csky/include/asm/syscall.h +++ b/arch/csky/include/asm/syscall.h @@ -52,17 +52,11 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) + const unsigned long *args) { - BUG_ON(i + n > 6); - if (i == 0) { - regs->orig_a0 = args[0]; - args++; - n--; - } else { - i--; - } - memcpy(®s->a1 + i, args, n * sizeof(regs->a1)); + regs->orig_a0 = args[0]; + args++; + memcpy(®s->a1, args, 5 * sizeof(regs->a1)); } static inline int diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h index 8204c1ff70ce..0d9e7fab4a79 100644 --- a/arch/ia64/include/asm/syscall.h +++ b/arch/ia64/include/asm/syscall.h @@ -59,23 +59,19 @@ static inline void syscall_set_return_value(struct task_struct *task, } extern void ia64_syscall_get_set_arguments(struct task_struct *task, - struct pt_regs *regs, unsigned int i, unsigned int n, - unsigned long *args, int rw); + struct pt_regs *regs, unsigned long *args, int rw); static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, unsigned long *args) { - ia64_syscall_get_set_arguments(task, regs, 0, 6, args, 0); + ia64_syscall_get_set_arguments(task, regs, args, 0); } static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - BUG_ON(i + n > 6); - - ia64_syscall_get_set_arguments(task, regs, i, n, args, 1); + ia64_syscall_get_set_arguments(task, regs, args, 1); } static inline int syscall_get_arch(void) diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 6d50ede0ed69..bf9c24d9ce84 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -2179,12 +2179,11 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data) } void ia64_syscall_get_set_arguments(struct task_struct *task, - struct pt_regs *regs, unsigned int i, unsigned int n, - unsigned long *args, int rw) + struct pt_regs *regs, unsigned long *args, int rw) { struct syscall_get_set_args data = { - .i = i, - .n = n, + .i = 0, + .n = 6, .args = args, .regs = regs, .rw = rw, diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h index 4b23e44e5c3c..833d3a53dab3 100644 --- a/arch/microblaze/include/asm/syscall.h +++ b/arch/microblaze/include/asm/syscall.h @@ -93,9 +93,11 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { + unsigned int i = 0; + unsigned int n = 6; + while (n--) microblaze_set_syscall_arg(regs, i++, *args++); } diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h index 89a6ec8731d8..671ebd357496 100644 --- a/arch/nds32/include/asm/syscall.h +++ b/arch/nds32/include/asm/syscall.h @@ -129,39 +129,20 @@ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, * syscall_set_arguments - change system call parameter value * @task: task of interest, must be in system call entry tracing * @regs: task_pt_regs() of @task - * @i: argument index [0,5] - * @n: number of arguments; n+i must be [1,6]. * @args: array of argument values to store * - * Changes @n arguments to the system call starting with the @i'th argument. - * Argument @i gets value @args[0], and so on. - * An arch inline version is probably optimal when @i and @n are constants. + * Changes 6 arguments to the system call. The first argument gets value + * @args[0], and so on. * * It's only valid to call this when @task is stopped for tracing on * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. - * It's invalid to call this with @i + @n > 6; we only support system calls - * taking up to 6 arguments. */ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - if (n == 0) - return; + regs->orig_r0 = args[0]; + args++; - if (i + n > SYSCALL_MAX_ARGS) { - pr_warn("%s called with max args %d, handling only %d\n", - __func__, i + n, SYSCALL_MAX_ARGS); - n = SYSCALL_MAX_ARGS - i; - } - - if (i == 0) { - regs->orig_r0 = args[0]; - args++; - i++; - n--; - } - - memcpy(®s->uregs[0] + i, args, n * sizeof(args[0])); + memcpy(®s->uregs[0] + 1, args, 5 * sizeof(args[0])); } #endif /* _ASM_NDS32_SYSCALL_H */ diff --git a/arch/nios2/include/asm/syscall.h b/arch/nios2/include/asm/syscall.h index 792bd449d839..d7624ed06efb 100644 --- a/arch/nios2/include/asm/syscall.h +++ b/arch/nios2/include/asm/syscall.h @@ -69,42 +69,14 @@ static inline void syscall_get_arguments(struct task_struct *task, } static inline void syscall_set_arguments(struct task_struct *task, - struct pt_regs *regs, unsigned int i, unsigned int n, - const unsigned long *args) + struct pt_regs *regs, const unsigned long *args) { - BUG_ON(i + n > 6); - - switch (i) { - case 0: - if (!n--) - break; - regs->r4 = *args++; - case 1: - if (!n--) - break; - regs->r5 = *args++; - case 2: - if (!n--) - break; - regs->r6 = *args++; - case 3: - if (!n--) - break; - regs->r7 = *args++; - case 4: - if (!n--) - break; - regs->r8 = *args++; - case 5: - if (!n--) - break; - regs->r9 = *args++; - case 6: - if (!n) - break; - default: - BUG(); - } + regs->r4 = *args++; + regs->r5 = *args++; + regs->r6 = *args++; + regs->r7 = *args++; + regs->r8 = *args++; + regs->r9 = *args; } #endif diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h index 72607860cd55..b4ff07c1baed 100644 --- a/arch/openrisc/include/asm/syscall.h +++ b/arch/openrisc/include/asm/syscall.h @@ -63,11 +63,9 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) + const unsigned long *args) { - BUG_ON(i + n > 6); - - memcpy(®s->gpr[3 + i], args, n * sizeof(args[0])); + memcpy(®s->gpr[3], args, 6 * sizeof(args[0])); } static inline int syscall_get_arch(void) diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 5c9b9dc82b7e..1243045bad2d 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -86,15 +86,12 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - BUG_ON(i + n > 6); - memcpy(®s->gpr[3 + i], args, n * sizeof(args[0])); + memcpy(®s->gpr[3], args, 6 * sizeof(args[0])); /* Also copy the first argument into orig_gpr3 */ - if (i == 0 && n > 0) - regs->orig_gpr3 = args[0]; + regs->orig_gpr3 = args[0]; } static inline int syscall_get_arch(void) diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index 6adca1804be1..a3d5273ded7c 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -81,18 +81,11 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - BUG_ON(i + n > 6); - if (i == 0) { - regs->orig_a0 = args[0]; - args++; - n--; - } else { - i--; - } - memcpy(®s->a1 + i, args, n * sizeof(regs->a1)); + regs->orig_a0 = args[0]; + args++; + memcpy(®s->a1, args, 5 * sizeof(regs->a1)); } static inline int syscall_get_arch(void) diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index ee0b1f6aa36d..59c3e91f2cdb 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -74,15 +74,14 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - BUG_ON(i + n > 6); + unsigned int n = 6; + while (n-- > 0) - if (i + n > 0) - regs->gprs[2 + i + n] = args[n]; - if (i == 0) - regs->orig_gpr2 = args[0]; + if (n > 0) + regs->gprs[2 + n] = args[n]; + regs->orig_gpr2 = args[0]; } static inline int syscall_get_arch(void) diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h index 2bf1199a0595..8c9d7e5e5dcc 100644 --- a/arch/sh/include/asm/syscall_32.h +++ b/arch/sh/include/asm/syscall_32.h @@ -62,23 +62,14 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - /* Same note as above applies */ - BUG_ON(i); - - switch (n) { - case 6: regs->regs[1] = args[5]; - case 5: regs->regs[0] = args[4]; - case 4: regs->regs[7] = args[3]; - case 3: regs->regs[6] = args[2]; - case 2: regs->regs[5] = args[1]; - case 1: regs->regs[4] = args[0]; - break; - default: - BUG(); - } + regs->regs[1] = args[5]; + regs->regs[0] = args[4]; + regs->regs[7] = args[3]; + regs->regs[6] = args[2]; + regs->regs[5] = args[1]; + regs->regs[4] = args[0]; } static inline int syscall_get_arch(void) diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h index 4e8f6460c703..22fad97da066 100644 --- a/arch/sh/include/asm/syscall_64.h +++ b/arch/sh/include/asm/syscall_64.h @@ -54,11 +54,9 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - BUG_ON(i + n > 6); - memcpy(®s->regs[2 + i], args, n * sizeof(args[0])); + memcpy(®s->regs[2], args, 6 * sizeof(args[0])); } static inline int syscall_get_arch(void) diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h index 872dfee852d6..4d075434e816 100644 --- a/arch/sparc/include/asm/syscall.h +++ b/arch/sparc/include/asm/syscall.h @@ -119,13 +119,12 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - unsigned int j; + unsigned int i; - for (j = 0; j < n; j++) - regs->u_regs[UREG_I0 + i + j] = args[j]; + for (i = 0; i < 6; i++) + regs->u_regs[UREG_I0 + i] = args[i]; } static inline int syscall_get_arch(void) diff --git a/arch/um/include/asm/syscall-generic.h b/arch/um/include/asm/syscall-generic.h index 25d00acd1322..98e50c50c12e 100644 --- a/arch/um/include/asm/syscall-generic.h +++ b/arch/um/include/asm/syscall-generic.h @@ -67,43 +67,16 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { struct uml_pt_regs *r = ®s->regs; - switch (i) { - case 0: - if (!n--) - break; - UPT_SYSCALL_ARG1(r) = *args++; - case 1: - if (!n--) - break; - UPT_SYSCALL_ARG2(r) = *args++; - case 2: - if (!n--) - break; - UPT_SYSCALL_ARG3(r) = *args++; - case 3: - if (!n--) - break; - UPT_SYSCALL_ARG4(r) = *args++; - case 4: - if (!n--) - break; - UPT_SYSCALL_ARG5(r) = *args++; - case 5: - if (!n--) - break; - UPT_SYSCALL_ARG6(r) = *args++; - case 6: - if (!n--) - break; - default: - BUG(); - break; - } + UPT_SYSCALL_ARG1(r) = *args++; + UPT_SYSCALL_ARG2(r) = *args++; + UPT_SYSCALL_ARG3(r) = *args++; + UPT_SYSCALL_ARG4(r) = *args++; + UPT_SYSCALL_ARG5(r) = *args++; + UPT_SYSCALL_ARG6(r) = *args; } /* See arch/x86/um/asm/syscall.h for syscall_get_arch() definition. */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 8dbb5c379450..4c305471ec33 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -138,63 +138,26 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread_info.status & TS_COMPAT) - switch (i) { - case 0: - if (!n--) break; - regs->bx = *args++; - case 1: - if (!n--) break; - regs->cx = *args++; - case 2: - if (!n--) break; - regs->dx = *args++; - case 3: - if (!n--) break; - regs->si = *args++; - case 4: - if (!n--) break; - regs->di = *args++; - case 5: - if (!n--) break; - regs->bp = *args++; - case 6: - if (!n--) break; - default: - BUG(); - break; - } - else + if (task->thread_info.status & TS_COMPAT) { + regs->bx = *args++; + regs->cx = *args++; + regs->dx = *args++; + regs->si = *args++; + regs->di = *args++; + regs->bp = *args; + } else # endif - switch (i) { - case 0: - if (!n--) break; - regs->di = *args++; - case 1: - if (!n--) break; - regs->si = *args++; - case 2: - if (!n--) break; - regs->dx = *args++; - case 3: - if (!n--) break; - regs->r10 = *args++; - case 4: - if (!n--) break; - regs->r8 = *args++; - case 5: - if (!n--) break; - regs->r9 = *args++; - case 6: - if (!n--) break; - default: - BUG(); - break; - } + { + regs->di = *args++; + regs->si = *args++; + regs->dx = *args++; + regs->r10 = *args++; + regs->r8 = *args++; + regs->r9 = *args; + } } static inline int syscall_get_arch(void) diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h index 1504ce9a233a..91dc06d58060 100644 --- a/arch/xtensa/include/asm/syscall.h +++ b/arch/xtensa/include/asm/syscall.h @@ -70,24 +70,13 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS; - unsigned int j; + unsigned int i; - if (n == 0) - return; - - if (WARN_ON_ONCE(i + n > SYSCALL_MAX_ARGS)) { - if (i < SYSCALL_MAX_ARGS) - n = SYSCALL_MAX_ARGS - i; - else - return; - } - - for (j = 0; j < n; ++j) - regs->areg[reg[i + j]] = args[j]; + for (i = 0; i < 6; ++i) + regs->areg[reg[i]] = args[i]; } asmlinkage long xtensa_rt_sigreturn(struct pt_regs*); diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h index 269e9412ef42..b88239e9efe4 100644 --- a/include/asm-generic/syscall.h +++ b/include/asm-generic/syscall.h @@ -120,21 +120,15 @@ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, * syscall_set_arguments - change system call parameter value * @task: task of interest, must be in system call entry tracing * @regs: task_pt_regs() of @task - * @i: argument index [0,5] - * @n: number of arguments; n+i must be [1,6]. * @args: array of argument values to store * - * Changes @n arguments to the system call starting with the @i'th argument. - * Argument @i gets value @args[0], and so on. - * An arch inline version is probably optimal when @i and @n are constants. + * Changes 6 arguments to the system call. + * The first argument gets value @args[0], and so on. * * It's only valid to call this when @task is stopped for tracing on * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. - * It's invalid to call this with @i + @n > 6; we only support system calls - * taking up to 6 arguments. */ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args); /** From f0d1762554014ce0ae347b9f0d088f2c157c8c72 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 5 Apr 2019 10:14:58 +0800 Subject: [PATCH 060/116] paride/pcd: Fix potential NULL pointer dereference and mem leak Syzkaller report this: pcd: pcd version 1.07, major 46, nice 0 pcd0: Autoprobe failed pcd: No CD-ROM drive found kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI CPU: 1 PID: 4525 Comm: syz-executor.0 Not tainted 5.1.0-rc3+ #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:pcd_init+0x95c/0x1000 [pcd] Code: c4 ab f7 48 89 d8 48 c1 e8 03 80 3c 28 00 74 08 48 89 df e8 56 a3 da f7 4c 8b 23 49 8d bc 24 80 05 00 00 48 89 f8 48 c1 e8 03 <80> 3c 28 00 74 05 e8 39 a3 da f7 49 8b bc 24 80 05 00 00 e8 cc b2 RSP: 0018:ffff8881e84df880 EFLAGS: 00010202 RAX: 00000000000000b0 RBX: ffffffffc155a088 RCX: ffffffffc1508935 RDX: 0000000000040000 RSI: ffffc900014f0000 RDI: 0000000000000580 RBP: dffffc0000000000 R08: ffffed103ee658b8 R09: ffffed103ee658b8 R10: 0000000000000001 R11: ffffed103ee658b7 R12: 0000000000000000 R13: ffffffffc155a778 R14: ffffffffc155a4a8 R15: 0000000000000003 FS: 00007fe71bee3700(0000) GS:ffff8881f7300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055a7334441a8 CR3: 00000001e9674003 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? 0xffffffffc1508000 ? 0xffffffffc1508000 do_one_initcall+0xbc/0x47d init/main.c:901 do_init_module+0x1b5/0x547 kernel/module.c:3456 load_module+0x6405/0x8c10 kernel/module.c:3804 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fe71bee2c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003 RBP: 00007fe71bee2c70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe71bee36bc R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004 Modules linked in: pcd(+) paride solos_pci atm ts_fsm rtc_mt6397 mac80211 nhc_mobility nhc_udp nhc_ipv6 nhc_hop nhc_dest nhc_fragment nhc_routing 6lowpan rtc_cros_ec memconsole intel_xhci_usb_role_switch roles rtc_wm8350 usbcore industrialio_triggered_buffer kfifo_buf industrialio asc7621 dm_era dm_persistent_data dm_bufio dm_mod tpm gnss_ubx gnss_serial serdev gnss max2165 cpufreq_dt hid_penmount hid menf21bmc_wdt rc_core n_tracesink ide_gd_mod cdns_csi2tx v4l2_fwnode videodev media pinctrl_lewisburg pinctrl_intel iptable_security iptable_raw iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun joydev mousedev ppdev kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel aes_x86_64 crypto_simd ide_pci_generic piix input_leds cryptd glue_helper psmouse ide_core intel_agp serio_raw intel_gtt ata_generic i2c_piix4 agpgart pata_acpi parport_pc parport floppy rtc_cmos sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: bmc150_magn] Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace d873691c3cd69f56 ]--- If alloc_disk fails in pcd_init_units, cd->disk will be NULL, however in pcd_detect and pcd_exit, it's not check this before free.It may result a NULL pointer dereference. Also when register_blkdev failed, blk_cleanup_queue() and blk_mq_free_tag_set() should be called to free resources. Reported-by: Hulk Robot Fixes: 81b74ac68c28 ("paride/pcd: cleanup queues when detection fails") Signed-off-by: YueHaibing Signed-off-by: Jens Axboe --- drivers/block/paride/pcd.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 377a694dc228..6d415b20fb70 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -314,6 +314,7 @@ static void pcd_init_units(void) disk->queue = blk_mq_init_sq_queue(&cd->tag_set, &pcd_mq_ops, 1, BLK_MQ_F_SHOULD_MERGE); if (IS_ERR(disk->queue)) { + put_disk(disk); disk->queue = NULL; continue; } @@ -750,6 +751,8 @@ static int pcd_detect(void) printk("%s: No CD-ROM drive found\n", name); for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { + if (!cd->disk) + continue; blk_cleanup_queue(cd->disk->queue); cd->disk->queue = NULL; blk_mq_free_tag_set(&cd->tag_set); @@ -1010,8 +1013,14 @@ static int __init pcd_init(void) pcd_probe_capabilities(); if (register_blkdev(major, name)) { - for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) + for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { + if (!cd->disk) + continue; + + blk_cleanup_queue(cd->disk->queue); + blk_mq_free_tag_set(&cd->tag_set); put_disk(cd->disk); + } return -EBUSY; } @@ -1032,6 +1041,9 @@ static void __exit pcd_exit(void) int unit; for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { + if (!cd->disk) + continue; + if (cd->present) { del_gendisk(cd->disk); pi_release(cd->pi); From fd9c40f64c514bdc585a21e2e33fa5f83ca8811b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 4 Apr 2019 10:08:43 -0700 Subject: [PATCH 061/116] block: Revert v5.0 blk_mq_request_issue_directly() changes blk_mq_try_issue_directly() can return BLK_STS*_RESOURCE for requests that have been queued. If that happens when blk_mq_try_issue_directly() is called by the dm-mpath driver then dm-mpath will try to resubmit a request that is already queued and a kernel crash follows. Since it is nontrivial to fix blk_mq_request_issue_directly(), revert the blk_mq_request_issue_directly() changes that went into kernel v5.0. This patch reverts the following commits: * d6a51a97c0b2 ("blk-mq: replace and kill blk_mq_request_issue_directly") # v5.0. * 5b7a6f128aad ("blk-mq: issue directly with bypass 'false' in blk_mq_sched_insert_requests") # v5.0. * 7f556a44e61d ("blk-mq: refactor the code of issue request directly") # v5.0. Cc: Christoph Hellwig Cc: Ming Lei Cc: Jianchao Wang Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: James Smart Cc: Dongli Zhang Cc: Laurence Oberman Cc: Reported-by: Laurence Oberman Tested-by: Laurence Oberman Fixes: 7f556a44e61d ("blk-mq: refactor the code of issue request directly") # v5.0. Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-core.c | 4 +- block/blk-mq-sched.c | 8 +-- block/blk-mq.c | 122 ++++++++++++++++++++++--------------------- block/blk-mq.h | 6 +-- 4 files changed, 71 insertions(+), 69 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 4673ebe42255..a55389ba8779 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1245,8 +1245,6 @@ static int blk_cloned_rq_check_limits(struct request_queue *q, */ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq) { - blk_qc_t unused; - if (blk_cloned_rq_check_limits(q, rq)) return BLK_STS_IOERR; @@ -1262,7 +1260,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * * bypass a potential scheduler on the bottom device for * insert. */ - return blk_mq_try_issue_directly(rq->mq_hctx, rq, &unused, true, true); + return blk_mq_request_issue_directly(rq, true); } EXPORT_SYMBOL_GPL(blk_insert_cloned_request); diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 40905539afed..aa6bc5c02643 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -423,10 +423,12 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, * busy in case of 'none' scheduler, and this way may save * us one extra enqueue & dequeue to sw queue. */ - if (!hctx->dispatch_busy && !e && !run_queue_async) + if (!hctx->dispatch_busy && !e && !run_queue_async) { blk_mq_try_issue_list_directly(hctx, list); - else - blk_mq_insert_requests(hctx, ctx, list); + if (list_empty(list)) + return; + } + blk_mq_insert_requests(hctx, ctx, list); } blk_mq_run_hw_queue(hctx, run_queue_async); diff --git a/block/blk-mq.c b/block/blk-mq.c index ac61bcc67a89..a9354835cf51 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1801,74 +1801,76 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, return ret; } -blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, +static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_qc_t *cookie, - bool bypass, bool last) + bool bypass_insert, bool last) { struct request_queue *q = rq->q; bool run_queue = true; - blk_status_t ret = BLK_STS_RESOURCE; - int srcu_idx; - bool force = false; - hctx_lock(hctx, &srcu_idx); /* - * hctx_lock is needed before checking quiesced flag. + * RCU or SRCU read lock is needed before checking quiesced flag. * - * When queue is stopped or quiesced, ignore 'bypass', insert - * and return BLK_STS_OK to caller, and avoid driver to try to - * dispatch again. + * When queue is stopped or quiesced, ignore 'bypass_insert' from + * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller, + * and avoid driver to try to dispatch again. */ - if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) { + if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) { run_queue = false; - bypass = false; - goto out_unlock; + bypass_insert = false; + goto insert; } - if (unlikely(q->elevator && !bypass)) - goto out_unlock; + if (q->elevator && !bypass_insert) + goto insert; if (!blk_mq_get_dispatch_budget(hctx)) - goto out_unlock; + goto insert; if (!blk_mq_get_driver_tag(rq)) { blk_mq_put_dispatch_budget(hctx); - goto out_unlock; + goto insert; } - /* - * Always add a request that has been through - *.queue_rq() to the hardware dispatch list. - */ - force = true; - ret = __blk_mq_issue_directly(hctx, rq, cookie, last); -out_unlock: + return __blk_mq_issue_directly(hctx, rq, cookie, last); +insert: + if (bypass_insert) + return BLK_STS_RESOURCE; + + blk_mq_request_bypass_insert(rq, run_queue); + return BLK_STS_OK; +} + +static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, + struct request *rq, blk_qc_t *cookie) +{ + blk_status_t ret; + int srcu_idx; + + might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); + + hctx_lock(hctx, &srcu_idx); + + ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true); + if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) + blk_mq_request_bypass_insert(rq, true); + else if (ret != BLK_STS_OK) + blk_mq_end_request(rq, ret); + + hctx_unlock(hctx, srcu_idx); +} + +blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last) +{ + blk_status_t ret; + int srcu_idx; + blk_qc_t unused_cookie; + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; + + hctx_lock(hctx, &srcu_idx); + ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last); hctx_unlock(hctx, srcu_idx); - switch (ret) { - case BLK_STS_OK: - break; - case BLK_STS_DEV_RESOURCE: - case BLK_STS_RESOURCE: - if (force) { - blk_mq_request_bypass_insert(rq, run_queue); - /* - * We have to return BLK_STS_OK for the DM - * to avoid livelock. Otherwise, we return - * the real result to indicate whether the - * request is direct-issued successfully. - */ - ret = bypass ? BLK_STS_OK : ret; - } else if (!bypass) { - blk_mq_sched_insert_request(rq, false, - run_queue, false); - } - break; - default: - if (!bypass) - blk_mq_end_request(rq, ret); - break; - } return ret; } @@ -1876,20 +1878,22 @@ out_unlock: void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, struct list_head *list) { - blk_qc_t unused; - blk_status_t ret = BLK_STS_OK; - while (!list_empty(list)) { + blk_status_t ret; struct request *rq = list_first_entry(list, struct request, queuelist); list_del_init(&rq->queuelist); - if (ret == BLK_STS_OK) - ret = blk_mq_try_issue_directly(hctx, rq, &unused, - false, + ret = blk_mq_request_issue_directly(rq, list_empty(list)); + if (ret != BLK_STS_OK) { + if (ret == BLK_STS_RESOURCE || + ret == BLK_STS_DEV_RESOURCE) { + blk_mq_request_bypass_insert(rq, list_empty(list)); - else - blk_mq_sched_insert_request(rq, false, true, false); + break; + } + blk_mq_end_request(rq, ret); + } } /* @@ -1897,7 +1901,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, * the driver there was more coming, but that turned out to * be a lie. */ - if (ret != BLK_STS_OK && hctx->queue->mq_ops->commit_rqs) + if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs) hctx->queue->mq_ops->commit_rqs(hctx); } @@ -2012,13 +2016,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) data.hctx = same_queue_rq->mq_hctx; trace_block_unplug(q, 1, true); blk_mq_try_issue_directly(data.hctx, same_queue_rq, - &cookie, false, true); + &cookie); } } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator && !data.hctx->dispatch_busy)) { blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); - blk_mq_try_issue_directly(data.hctx, rq, &cookie, false, true); + blk_mq_try_issue_directly(data.hctx, rq, &cookie); } else { blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); diff --git a/block/blk-mq.h b/block/blk-mq.h index d704fc7766f4..423ea88ab6fb 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -70,10 +70,8 @@ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue); void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list); -blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, - struct request *rq, - blk_qc_t *cookie, - bool bypass, bool last); +/* Used by blk_insert_cloned_request() to issue request directly */ +blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last); void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, struct list_head *list); From ede885ecb2cdf8a8dd5367702e3d964ec846a2d5 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 19 Mar 2019 15:19:56 -0700 Subject: [PATCH 062/116] kvm: svm: fix potential get_num_contig_pages overflow get_num_contig_pages() could potentially overflow int so make its type consistent with its usage. Reported-by: Cfir Cohen Cc: stable@vger.kernel.org Signed-off-by: David Rientjes Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 426039285fd1..947c35a1e545 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -6422,11 +6422,11 @@ e_free: return ret; } -static int get_num_contig_pages(int idx, struct page **inpages, - unsigned long npages) +static unsigned long get_num_contig_pages(unsigned long idx, + struct page **inpages, unsigned long npages) { unsigned long paddr, next_paddr; - int i = idx + 1, pages = 1; + unsigned long i = idx + 1, pages = 1; /* find the number of contiguous pages starting from idx */ paddr = __sme_page_pa(inpages[idx]); @@ -6445,12 +6445,12 @@ static int get_num_contig_pages(int idx, struct page **inpages, static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) { - unsigned long vaddr, vaddr_end, next_vaddr, npages, size; + unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i; struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct kvm_sev_launch_update_data params; struct sev_data_launch_update_data *data; struct page **inpages; - int i, ret, pages; + int ret; if (!sev_guest(kvm)) return -ENOTTY; From b86bc2858b389255cd44555ce4b1e427b2b770c0 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 25 Mar 2019 11:47:31 -0700 Subject: [PATCH 063/116] KVM: SVM: prevent DBG_DECRYPT and DBG_ENCRYPT overflow This ensures that the address and length provided to DBG_DECRYPT and DBG_ENCRYPT do not cause an overflow. At the same time, pass the actual number of pages pinned in memory to sev_unpin_memory() as a cleanup. Reported-by: Cfir Cohen Signed-off-by: David Rientjes Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 947c35a1e545..e0a791c3d4fc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -6799,7 +6799,8 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) struct page **src_p, **dst_p; struct kvm_sev_dbg debug; unsigned long n; - int ret, size; + unsigned int size; + int ret; if (!sev_guest(kvm)) return -ENOTTY; @@ -6807,6 +6808,11 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug))) return -EFAULT; + if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr) + return -EINVAL; + if (!debug.dst_uaddr) + return -EINVAL; + vaddr = debug.src_uaddr; size = debug.len; vaddr_end = vaddr + size; @@ -6857,8 +6863,8 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) dst_vaddr, len, &argp->error); - sev_unpin_memory(kvm, src_p, 1); - sev_unpin_memory(kvm, dst_p, 1); + sev_unpin_memory(kvm, src_p, n); + sev_unpin_memory(kvm, dst_p, n); if (ret) goto err; From acff78477b9b4f26ecdf65733a4ed77fe837e9dc Mon Sep 17 00:00:00 2001 From: Marc Orr Date: Mon, 1 Apr 2019 23:55:59 -0700 Subject: [PATCH 064/116] KVM: x86: nVMX: close leak of L0's x2APIC MSRs (CVE-2019-3887) The nested_vmx_prepare_msr_bitmap() function doesn't directly guard the x2APIC MSR intercepts with the "virtualize x2APIC mode" MSR. As a result, we discovered the potential for a buggy or malicious L1 to get access to L0's x2APIC MSRs, via an L2, as follows. 1. L1 executes WRMSR(IA32_SPEC_CTRL, 1). This causes the spec_ctrl variable, in nested_vmx_prepare_msr_bitmap() to become true. 2. L1 disables "virtualize x2APIC mode" in VMCS12. 3. L1 enables "APIC-register virtualization" in VMCS12. Now, KVM will set VMCS02's x2APIC MSR intercepts from VMCS12, and then set "virtualize x2APIC mode" to 0 in VMCS02. Oops. This patch closes the leak by explicitly guarding VMCS02's x2APIC MSR intercepts with VMCS12's "virtualize x2APIC mode" control. The scenario outlined above and fix prescribed here, were verified with a related patch in kvm-unit-tests titled "Add leak scenario to virt_x2apic_mode_test". Note, it looks like this issue may have been introduced inadvertently during a merge---see 15303ba5d1cd. Signed-off-by: Marc Orr Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 72 ++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 153e539c29c9..897d70e3d291 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -500,6 +500,17 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, } } +static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) { + int msr; + + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { + unsigned word = msr / BITS_PER_LONG; + + msr_bitmap[word] = ~0; + msr_bitmap[word + (0x800 / sizeof(long))] = ~0; + } +} + /* * Merge L0's and L1's MSR bitmap, return false to indicate that * we do not use the hardware. @@ -541,39 +552,44 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, return false; msr_bitmap_l1 = (unsigned long *)kmap(page); - if (nested_cpu_has_apic_reg_virt(vmcs12)) { - /* - * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it - * just lets the processor take the value from the virtual-APIC page; - * take those 256 bits directly from the L1 bitmap. - */ - for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { - unsigned word = msr / BITS_PER_LONG; - msr_bitmap_l0[word] = msr_bitmap_l1[word]; - msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0; - } - } else { - for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { - unsigned word = msr / BITS_PER_LONG; - msr_bitmap_l0[word] = ~0; - msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0; - } - } - nested_vmx_disable_intercept_for_msr( - msr_bitmap_l1, msr_bitmap_l0, - X2APIC_MSR(APIC_TASKPRI), - MSR_TYPE_W); + /* + * To keep the control flow simple, pay eight 8-byte writes (sixteen + * 4-byte writes on 32-bit systems) up front to enable intercepts for + * the x2APIC MSR range and selectively disable them below. + */ + enable_x2apic_msr_intercepts(msr_bitmap_l0); + + if (nested_cpu_has_virt_x2apic_mode(vmcs12)) { + if (nested_cpu_has_apic_reg_virt(vmcs12)) { + /* + * L0 need not intercept reads for MSRs between 0x800 + * and 0x8ff, it just lets the processor take the value + * from the virtual-APIC page; take those 256 bits + * directly from the L1 bitmap. + */ + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { + unsigned word = msr / BITS_PER_LONG; + + msr_bitmap_l0[word] = msr_bitmap_l1[word]; + } + } - if (nested_cpu_has_vid(vmcs12)) { nested_vmx_disable_intercept_for_msr( msr_bitmap_l1, msr_bitmap_l0, - X2APIC_MSR(APIC_EOI), - MSR_TYPE_W); - nested_vmx_disable_intercept_for_msr( - msr_bitmap_l1, msr_bitmap_l0, - X2APIC_MSR(APIC_SELF_IPI), + X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_W); + + if (nested_cpu_has_vid(vmcs12)) { + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + X2APIC_MSR(APIC_EOI), + MSR_TYPE_W); + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + X2APIC_MSR(APIC_SELF_IPI), + MSR_TYPE_W); + } } if (spec_ctrl) From c73f4c998e1fd4249b9edfa39e23f4fda2b9b041 Mon Sep 17 00:00:00 2001 From: Marc Orr Date: Mon, 1 Apr 2019 23:56:00 -0700 Subject: [PATCH 065/116] KVM: x86: nVMX: fix x2APIC VTPR read intercept Referring to the "VIRTUALIZING MSR-BASED APIC ACCESSES" chapter of the SDM, when "virtualize x2APIC mode" is 1 and "APIC-register virtualization" is 0, a RDMSR of 808H should return the VTPR from the virtual APIC page. However, for nested, KVM currently fails to disable the read intercept for this MSR. This means that a RDMSR exit takes precedence over "virtualize x2APIC mode", and KVM passes through L1's TPR to L2, instead of sourcing the value from L2's virtual APIC page. This patch fixes the issue by disabling the read intercept, in VMCS02, for the VTPR when "APIC-register virtualization" is 0. The issue described above and fix prescribed here, were verified with a related patch in kvm-unit-tests titled "Test VMX's virtualize x2APIC mode w/ nested". Signed-off-by: Marc Orr Reviewed-by: Jim Mattson Fixes: c992384bde84f ("KVM: vmx: speed up MSR bitmap merge") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 897d70e3d291..7ec9bb1dd723 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -578,7 +578,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, nested_vmx_disable_intercept_for_msr( msr_bitmap_l1, msr_bitmap_l0, X2APIC_MSR(APIC_TASKPRI), - MSR_TYPE_W); + MSR_TYPE_R | MSR_TYPE_W); if (nested_cpu_has_vid(vmcs12)) { nested_vmx_disable_intercept_for_msr( From 4ed319c6ac08e9a28fca7ac188181ac122f4de84 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 5 Apr 2019 15:26:39 -0400 Subject: [PATCH 066/116] dm integrity: fix deadlock with overlapping I/O dm-integrity will deadlock if overlapping I/O is issued to it, the bug was introduced by commit 724376a04d1a ("dm integrity: implement fair range locks"). Users rarely use overlapping I/O so this bug went undetected until now. Fix this bug by correcting, likely cut-n-paste, typos in ranges_overlap() and also remove a flawed ranges_overlap() check in remove_range_unlocked(). This condition could leave unprocessed bios hanging on wait_list forever. Cc: stable@vger.kernel.org # v4.19+ Fixes: 724376a04d1a ("dm integrity: implement fair range locks") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 94b865c5ce71..7c678f50aaa3 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -913,7 +913,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2) { return range1->logical_sector < range2->logical_sector + range2->n_sectors && - range2->logical_sector + range2->n_sectors > range2->logical_sector; + range1->logical_sector + range1->n_sectors > range2->logical_sector; } static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting) @@ -959,8 +959,6 @@ static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity struct dm_integrity_range *last_range = list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry); struct task_struct *last_range_task; - if (!ranges_overlap(range, last_range)) - break; last_range_task = last_range->task; list_del(&last_range->wait_entry); if (!add_new_range(ic, last_range, false)) { From 5f074f3e192f10c9fade898b9b3b8812e3d83342 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 5 Apr 2019 18:38:45 -0700 Subject: [PATCH 067/116] lib/string.c: implement a basic bcmp A recent optimization in Clang (r355672) lowers comparisons of the return value of memcmp against zero to comparisons of the return value of bcmp against zero. This helps some platforms that implement bcmp more efficiently than memcmp. glibc simply aliases bcmp to memcmp, but an optimized implementation is in the works. This results in linkage failures for all targets with Clang due to the undefined symbol. For now, just implement bcmp as a tailcail to memcmp to unbreak the build. This routine can be further optimized in the future. Other ideas discussed: * A weak alias was discussed, but breaks for architectures that define their own implementations of memcmp since aliases to declarations are not permitted (only definitions). Arch-specific memcmp implementations typically declare memcmp in C headers, but implement them in assembly. * -ffreestanding also is used sporadically throughout the kernel. * -fno-builtin-bcmp doesn't work when doing LTO. Link: https://bugs.llvm.org/show_bug.cgi?id=41035 Link: https://code.woboq.org/userspace/glibc/string/memcmp.c.html#bcmp Link: https://github.com/llvm/llvm-project/commit/8e16d73346f8091461319a7dfc4ddd18eedcff13 Link: https://github.com/ClangBuiltLinux/linux/issues/416 Link: http://lkml.kernel.org/r/20190313211335.165605-1-ndesaulniers@google.com Signed-off-by: Nick Desaulniers Reported-by: Nathan Chancellor Reported-by: Adhemerval Zanella Suggested-by: Arnd Bergmann Suggested-by: James Y Knight Suggested-by: Masahiro Yamada Suggested-by: Nathan Chancellor Suggested-by: Rasmus Villemoes Acked-by: Steven Rostedt (VMware) Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Reviewed-by: Masahiro Yamada Reviewed-by: Andy Shevchenko Cc: David Laight Cc: Rasmus Villemoes Cc: Namhyung Kim Cc: Greg Kroah-Hartman Cc: Alexander Shishkin Cc: Dan Williams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 3 +++ lib/string.c | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/linux/string.h b/include/linux/string.h index 7927b875f80c..6ab0a6fa512e 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -150,6 +150,9 @@ extern void * memscan(void *,int,__kernel_size_t); #ifndef __HAVE_ARCH_MEMCMP extern int memcmp(const void *,const void *,__kernel_size_t); #endif +#ifndef __HAVE_ARCH_BCMP +extern int bcmp(const void *,const void *,__kernel_size_t); +#endif #ifndef __HAVE_ARCH_MEMCHR extern void * memchr(const void *,int,__kernel_size_t); #endif diff --git a/lib/string.c b/lib/string.c index 38e4ca08e757..3ab861c1a857 100644 --- a/lib/string.c +++ b/lib/string.c @@ -866,6 +866,26 @@ __visible int memcmp(const void *cs, const void *ct, size_t count) EXPORT_SYMBOL(memcmp); #endif +#ifndef __HAVE_ARCH_BCMP +/** + * bcmp - returns 0 if and only if the buffers have identical contents. + * @a: pointer to first buffer. + * @b: pointer to second buffer. + * @len: size of buffers. + * + * The sign or magnitude of a non-zero return value has no particular + * meaning, and architectures may implement their own more efficient bcmp(). So + * while this particular implementation is a simple (tail) call to memcmp, do + * not rely on anything but whether the return value is zero or non-zero. + */ +#undef bcmp +int bcmp(const void *a, const void *b, size_t len) +{ + return memcmp(a, b, len); +} +EXPORT_SYMBOL(bcmp); +#endif + #ifndef __HAVE_ARCH_MEMSCAN /** * memscan - Find a character in an area of memory. From 298a32b132087550d3fa80641ca58323c5dfd4d9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 5 Apr 2019 18:38:49 -0700 Subject: [PATCH 068/116] kmemleak: powerpc: skip scanning holes in the .bss section Commit 2d4f567103ff ("KVM: PPC: Introduce kvm_tmp framework") adds kvm_tmp[] into the .bss section and then free the rest of unused spaces back to the page allocator. kernel_init kvm_guest_init kvm_free_tmp free_reserved_area free_unref_page free_unref_page_prepare With DEBUG_PAGEALLOC=y, it will unmap those pages from kernel. As the result, kmemleak scan will trigger a panic when it scans the .bss section with unmapped pages. This patch creates dedicated kmemleak objects for the .data, .bss and potentially .data..ro_after_init sections to allow partial freeing via the kmemleak_free_part() in the powerpc kvm_free_tmp() function. Link: http://lkml.kernel.org/r/20190321171917.62049-1-catalin.marinas@arm.com Signed-off-by: Catalin Marinas Reported-by: Qian Cai Acked-by: Michael Ellerman (powerpc) Tested-by: Qian Cai Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Avi Kivity Cc: Paolo Bonzini Cc: Radim Krcmar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/kvm.c | 7 +++++++ mm/kmemleak.c | 16 +++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 683b5b3805bd..cd381e2291df 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -712,6 +713,12 @@ static void kvm_use_magic_page(void) static __init void kvm_free_tmp(void) { + /* + * Inform kmemleak about the hole in the .bss section since the + * corresponding pages will be unmapped with DEBUG_PAGEALLOC=y. + */ + kmemleak_free_part(&kvm_tmp[kvm_tmp_index], + ARRAY_SIZE(kvm_tmp) - kvm_tmp_index); free_reserved_area(&kvm_tmp[kvm_tmp_index], &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL); } diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 707fa5579f66..6c318f5ac234 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1529,11 +1529,6 @@ static void kmemleak_scan(void) } rcu_read_unlock(); - /* data/bss scanning */ - scan_large_block(_sdata, _edata); - scan_large_block(__bss_start, __bss_stop); - scan_large_block(__start_ro_after_init, __end_ro_after_init); - #ifdef CONFIG_SMP /* per-cpu sections scanning */ for_each_possible_cpu(i) @@ -2071,6 +2066,17 @@ void __init kmemleak_init(void) } local_irq_restore(flags); + /* register the data/bss sections */ + create_object((unsigned long)_sdata, _edata - _sdata, + KMEMLEAK_GREY, GFP_ATOMIC); + create_object((unsigned long)__bss_start, __bss_stop - __bss_start, + KMEMLEAK_GREY, GFP_ATOMIC); + /* only register .data..ro_after_init if not within .data */ + if (__start_ro_after_init < _sdata || __end_ro_after_init > _edata) + create_object((unsigned long)__start_ro_after_init, + __end_ro_after_init - __start_ro_after_init, + KMEMLEAK_GREY, GFP_ATOMIC); + /* * This is the point where tracking allocations is safe. Automatic * scanning is started during the late initcall. Add the early logged From 6147e136ff5071609b54f18982dea87706288e21 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Apr 2019 18:38:53 -0700 Subject: [PATCH 069/116] include/linux/bitrev.h: fix constant bitrev clang points out with hundreds of warnings that the bitrev macros have a problem with constant input: drivers/hwmon/sht15.c:187:11: error: variable '__x' is uninitialized when used within its own initialization [-Werror,-Wuninitialized] u8 crc = bitrev8(data->val_status & 0x0F); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/bitrev.h:102:21: note: expanded from macro 'bitrev8' __constant_bitrev8(__x) : \ ~~~~~~~~~~~~~~~~~~~^~~~ include/linux/bitrev.h:67:11: note: expanded from macro '__constant_bitrev8' u8 __x = x; \ ~~~ ^ Both the bitrev and the __constant_bitrev macros use an internal variable named __x, which goes horribly wrong when passing one to the other. The obvious fix is to rename one of the variables, so this adds an extra '_'. It seems we got away with this because - there are only a few drivers using bitrev macros - usually there are no constant arguments to those - when they are constant, they tend to be either 0 or (unsigned)-1 (drivers/isdn/i4l/isdnhdlc.o, drivers/iio/amplifiers/ad8366.c) and give the correct result by pure chance. In fact, the only driver that I could find that gets different results with this is drivers/net/wan/slic_ds26522.c, which in turn is a driver for fairly rare hardware (adding the maintainer to Cc for testing). Link: http://lkml.kernel.org/r/20190322140503.123580-1-arnd@arndb.de Fixes: 556d2f055bf6 ("ARM: 8187/1: add CONFIG_HAVE_ARCH_BITREVERSE to support rbit instruction") Signed-off-by: Arnd Bergmann Reviewed-by: Nick Desaulniers Cc: Zhao Qiang Cc: Yalin Wang Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitrev.h | 46 +++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/include/linux/bitrev.h b/include/linux/bitrev.h index 50fb0dee23e8..d35b8ec1c485 100644 --- a/include/linux/bitrev.h +++ b/include/linux/bitrev.h @@ -34,41 +34,41 @@ static inline u32 __bitrev32(u32 x) #define __constant_bitrev32(x) \ ({ \ - u32 __x = x; \ - __x = (__x >> 16) | (__x << 16); \ - __x = ((__x & (u32)0xFF00FF00UL) >> 8) | ((__x & (u32)0x00FF00FFUL) << 8); \ - __x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4); \ - __x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2); \ - __x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1); \ - __x; \ + u32 ___x = x; \ + ___x = (___x >> 16) | (___x << 16); \ + ___x = ((___x & (u32)0xFF00FF00UL) >> 8) | ((___x & (u32)0x00FF00FFUL) << 8); \ + ___x = ((___x & (u32)0xF0F0F0F0UL) >> 4) | ((___x & (u32)0x0F0F0F0FUL) << 4); \ + ___x = ((___x & (u32)0xCCCCCCCCUL) >> 2) | ((___x & (u32)0x33333333UL) << 2); \ + ___x = ((___x & (u32)0xAAAAAAAAUL) >> 1) | ((___x & (u32)0x55555555UL) << 1); \ + ___x; \ }) #define __constant_bitrev16(x) \ ({ \ - u16 __x = x; \ - __x = (__x >> 8) | (__x << 8); \ - __x = ((__x & (u16)0xF0F0U) >> 4) | ((__x & (u16)0x0F0FU) << 4); \ - __x = ((__x & (u16)0xCCCCU) >> 2) | ((__x & (u16)0x3333U) << 2); \ - __x = ((__x & (u16)0xAAAAU) >> 1) | ((__x & (u16)0x5555U) << 1); \ - __x; \ + u16 ___x = x; \ + ___x = (___x >> 8) | (___x << 8); \ + ___x = ((___x & (u16)0xF0F0U) >> 4) | ((___x & (u16)0x0F0FU) << 4); \ + ___x = ((___x & (u16)0xCCCCU) >> 2) | ((___x & (u16)0x3333U) << 2); \ + ___x = ((___x & (u16)0xAAAAU) >> 1) | ((___x & (u16)0x5555U) << 1); \ + ___x; \ }) #define __constant_bitrev8x4(x) \ ({ \ - u32 __x = x; \ - __x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4); \ - __x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2); \ - __x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1); \ - __x; \ + u32 ___x = x; \ + ___x = ((___x & (u32)0xF0F0F0F0UL) >> 4) | ((___x & (u32)0x0F0F0F0FUL) << 4); \ + ___x = ((___x & (u32)0xCCCCCCCCUL) >> 2) | ((___x & (u32)0x33333333UL) << 2); \ + ___x = ((___x & (u32)0xAAAAAAAAUL) >> 1) | ((___x & (u32)0x55555555UL) << 1); \ + ___x; \ }) #define __constant_bitrev8(x) \ ({ \ - u8 __x = x; \ - __x = (__x >> 4) | (__x << 4); \ - __x = ((__x & (u8)0xCCU) >> 2) | ((__x & (u8)0x33U) << 2); \ - __x = ((__x & (u8)0xAAU) >> 1) | ((__x & (u8)0x55U) << 1); \ - __x; \ + u8 ___x = x; \ + ___x = (___x >> 4) | (___x << 4); \ + ___x = ((___x & (u8)0xCCU) >> 2) | ((___x & (u8)0x33U) << 2); \ + ___x = ((___x & (u8)0xAAU) >> 1) | ((___x & (u8)0x55U) << 1); \ + ___x; \ }) #define bitrev32(x) \ From b11ed18efa8f3dc58b259b812588317b765b1cfc Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Fri, 5 Apr 2019 18:38:58 -0700 Subject: [PATCH 070/116] lib/lzo: fix bugs for very short or empty input For very short input data (0 - 1 bytes), lzo-rle was not behaving correctly. Fix this behaviour and update documentation accordingly. For zero-length input, lzo v0 outputs an end-of-stream marker only, which was misinterpreted by lzo-rle as a bitstream version number. Ensure bitstream versions > 0 require a minimum stream length of 5. Also fixes a bug in handling the tail for very short inputs when a bitstream version is present. Link: http://lkml.kernel.org/r/20190326165857.34613-1-dave.rodgman@arm.com Signed-off-by: Dave Rodgman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/lzo.txt | 8 +++++--- lib/lzo/lzo1x_compress.c | 9 ++++++--- lib/lzo/lzo1x_decompress_safe.c | 4 +--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt index f79934225d8d..ca983328976b 100644 --- a/Documentation/lzo.txt +++ b/Documentation/lzo.txt @@ -102,9 +102,11 @@ Byte sequences dictionary which is empty, and that it will always be invalid at this place. - 17 : bitstream version. If the first byte is 17, the next byte - gives the bitstream version (version 1 only). If the first byte - is not 17, the bitstream version is 0. + 17 : bitstream version. If the first byte is 17, and compressed + stream length is at least 5 bytes (length of shortest possible + versioned bitstream), the next byte gives the bitstream version + (version 1 only). + Otherwise, the bitstream version is 0. 18..21 : copy 0..3 literals state = (byte - 17) = 0..3 [ copy literals ] diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index 4525fb094844..a8ede77afe0d 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c @@ -291,13 +291,14 @@ int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len, { const unsigned char *ip = in; unsigned char *op = out; + unsigned char *data_start; size_t l = in_len; size_t t = 0; signed char state_offset = -2; unsigned int m4_max_offset; - // LZO v0 will never write 17 as first byte, - // so this is used to version the bitstream + // LZO v0 will never write 17 as first byte (except for zero-length + // input), so this is used to version the bitstream if (bitstream_version > 0) { *op++ = 17; *op++ = bitstream_version; @@ -306,6 +307,8 @@ int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len, m4_max_offset = M4_MAX_OFFSET_V0; } + data_start = op; + while (l > 20) { size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1); uintptr_t ll_end = (uintptr_t) ip + ll; @@ -324,7 +327,7 @@ int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len, if (t > 0) { const unsigned char *ii = in + in_len - t; - if (op == out && t <= 238) { + if (op == data_start && t <= 238) { *op++ = (17 + t); } else if (t <= 3) { op[state_offset] |= t; diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c index 6d2600ea3b55..9e07e9ef1aad 100644 --- a/lib/lzo/lzo1x_decompress_safe.c +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -54,11 +54,9 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, if (unlikely(in_len < 3)) goto input_overrun; - if (likely(*ip == 17)) { + if (likely(in_len >= 5) && likely(*ip == 17)) { bitstream_version = ip[1]; ip += 2; - if (unlikely(in_len < 5)) - goto input_overrun; } else { bitstream_version = 0; } From fcae96ff96538f66e7acd5d4e0f2e7516ff8cbd0 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 5 Apr 2019 18:39:01 -0700 Subject: [PATCH 071/116] mm: fix vm_fault_t cast in VM_FAULT_GET_HINDEX() Symmetrically to VM_FAULT_SET_HINDEX(), we need a force-cast in VM_FAULT_GET_HINDEX() to tell sparse that this is intentional. Sparse complains about the current code when building a kernel with CONFIG_MEMORY_FAILURE: arch/x86/mm/fault.c:1058:53: warning: restricted vm_fault_t degrades to integer Link: http://lkml.kernel.org/r/20190327204117.35215-1-jannh@google.com Fixes: 3d3539018d2c ("mm: create the new vm_fault_t type") Signed-off-by: Jann Horn Reviewed-by: Andrew Morton Cc: Souptick Joarder Cc: Matthew Wilcox Cc: Vlastimil Babka Cc: "Kirill A. Shutemov" Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7eade9132f02..4ef4bbe78a1d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -671,7 +671,7 @@ enum vm_fault_reason { /* Encode hstate index for a hwpoisoned large page */ #define VM_FAULT_SET_HINDEX(x) ((__force vm_fault_t)((x) << 16)) -#define VM_FAULT_GET_HINDEX(x) (((x) >> 16) & 0xf) +#define VM_FAULT_GET_HINDEX(x) (((__force unsigned int)(x) >> 16) & 0xf) #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | \ VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON | \ From 58b6e5e8f1addd44583d61b0a03c0f5519527e35 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 5 Apr 2019 18:39:06 -0700 Subject: [PATCH 072/116] hugetlbfs: fix memory leak for resv_map When mknod is used to create a block special file in hugetlbfs, it will allocate an inode and kmalloc a 'struct resv_map' via resv_map_alloc(). inode->i_mapping->private_data will point the newly allocated resv_map. However, when the device special file is opened bd_acquire() will set inode->i_mapping to bd_inode->i_mapping. Thus the pointer to the allocated resv_map is lost and the structure is leaked. Programs to reproduce: mount -t hugetlbfs nodev hugetlbfs mknod hugetlbfs/dev b 0 0 exec 30<> hugetlbfs/dev umount hugetlbfs/ resv_map structures are only needed for inodes which can have associated page allocations. To fix the leak, only allocate resv_map for those inodes which could possibly be associated with page allocations. Link: http://lkml.kernel.org/r/20190401213101.16476-1-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Reviewed-by: Andrew Morton Reported-by: Yufen Yu Suggested-by: Yufen Yu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index ec32fece5e1e..9285dd4f4b1c 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -755,11 +755,17 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, umode_t mode, dev_t dev) { struct inode *inode; - struct resv_map *resv_map; + struct resv_map *resv_map = NULL; - resv_map = resv_map_alloc(); - if (!resv_map) - return NULL; + /* + * Reserve maps are only needed for inodes that can have associated + * page allocations. + */ + if (S_ISREG(mode) || S_ISLNK(mode)) { + resv_map = resv_map_alloc(); + if (!resv_map) + return NULL; + } inode = new_inode(sb); if (inode) { @@ -794,8 +800,10 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, break; } lockdep_annotate_inode_mutex_key(inode); - } else - kref_put(&resv_map->refs, resv_map_release); + } else { + if (resv_map) + kref_put(&resv_map->refs, resv_map_release); + } return inode; } From c6f3c5ee40c10bb65725047a220570f718507001 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 5 Apr 2019 18:39:10 -0700 Subject: [PATCH 073/116] mm/huge_memory.c: fix modifying of page protection by insert_pfn_pmd() With some architectures like ppc64, set_pmd_at() cannot cope with a situation where there is already some (different) valid entry present. Use pmdp_set_access_flags() instead to modify the pfn which is built to deal with modifying existing PMD entries. This is similar to commit cae85cb8add3 ("mm/memory.c: fix modifying of page protection by insert_pfn()") We also do similar update w.r.t insert_pfn_pud eventhough ppc64 don't support pud pfn entries now. Without this patch we also see the below message in kernel log "BUG: non-zero pgtables_bytes on freeing mm:" Link: http://lkml.kernel.org/r/20190402115125.18803-1-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V Reported-by: Chandan Rajendra Reviewed-by: Jan Kara Cc: Dan Williams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 404acdcd0455..165ea46bf149 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -755,6 +755,21 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, spinlock_t *ptl; ptl = pmd_lock(mm, pmd); + if (!pmd_none(*pmd)) { + if (write) { + if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) { + WARN_ON_ONCE(!is_huge_zero_pmd(*pmd)); + goto out_unlock; + } + entry = pmd_mkyoung(*pmd); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + if (pmdp_set_access_flags(vma, addr, pmd, entry, 1)) + update_mmu_cache_pmd(vma, addr, pmd); + } + + goto out_unlock; + } + entry = pmd_mkhuge(pfn_t_pmd(pfn, prot)); if (pfn_t_devmap(pfn)) entry = pmd_mkdevmap(entry); @@ -766,11 +781,16 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, if (pgtable) { pgtable_trans_huge_deposit(mm, pmd, pgtable); mm_inc_nr_ptes(mm); + pgtable = NULL; } set_pmd_at(mm, addr, pmd, entry); update_mmu_cache_pmd(vma, addr, pmd); + +out_unlock: spin_unlock(ptl); + if (pgtable) + pte_free(mm, pgtable); } vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, @@ -821,6 +841,20 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, spinlock_t *ptl; ptl = pud_lock(mm, pud); + if (!pud_none(*pud)) { + if (write) { + if (pud_pfn(*pud) != pfn_t_to_pfn(pfn)) { + WARN_ON_ONCE(!is_huge_zero_pud(*pud)); + goto out_unlock; + } + entry = pud_mkyoung(*pud); + entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma); + if (pudp_set_access_flags(vma, addr, pud, entry, 1)) + update_mmu_cache_pud(vma, addr, pud); + } + goto out_unlock; + } + entry = pud_mkhuge(pfn_t_pud(pfn, prot)); if (pfn_t_devmap(pfn)) entry = pud_mkdevmap(entry); @@ -830,6 +864,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, } set_pud_at(mm, addr, pud, entry); update_mmu_cache_pud(vma, addr, pud); + +out_unlock: spin_unlock(ptl); } From be87ab0afd680ac35486d16c0963c56d9be1d8a0 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 5 Apr 2019 18:39:14 -0700 Subject: [PATCH 074/116] psi: clarify the units used in pressure files The output of the PSI files show a bunch of numbers with no unit. The psi.txt documentation file also does not indicate what units are used. One can only find out by looking at the source code. The units are percentage for the averages and useconds for the total. Make the information easier to find by documenting the units in psi.txt. Link: http://lkml.kernel.org/r/20190402193810.3450-1-longman@redhat.com Signed-off-by: Waiman Long Acked-by: Johannes Weiner Cc: "Peter Zijlstra (Intel)" Cc: Tejun Heo Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/accounting/psi.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/accounting/psi.txt b/Documentation/accounting/psi.txt index b8ca28b60215..7e71c9c1d8e9 100644 --- a/Documentation/accounting/psi.txt +++ b/Documentation/accounting/psi.txt @@ -56,12 +56,12 @@ situation from a state where some tasks are stalled but the CPU is still doing productive work. As such, time spent in this subset of the stall state is tracked separately and exported in the "full" averages. -The ratios are tracked as recent trends over ten, sixty, and three -hundred second windows, which gives insight into short term events as -well as medium and long term trends. The total absolute stall time is -tracked and exported as well, to allow detection of latency spikes -which wouldn't necessarily make a dent in the time averages, or to -average trends over custom time frames. +The ratios (in %) are tracked as recent trends over ten, sixty, and +three hundred second windows, which gives insight into short term events +as well as medium and long term trends. The total absolute stall time +(in us) is tracked and exported as well, to allow detection of latency +spikes which wouldn't necessarily make a dent in the time averages, +or to average trends over custom time frames. Cgroup2 interface ================= From 0b3d6e6f2dd0a7b697b1aa8c167265908940624b Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Fri, 5 Apr 2019 18:39:18 -0700 Subject: [PATCH 075/116] mm: writeback: use exact memcg dirty counts Since commit a983b5ebee57 ("mm: memcontrol: fix excessive complexity in memory.stat reporting") memcg dirty and writeback counters are managed as: 1) per-memcg per-cpu values in range of [-32..32] 2) per-memcg atomic counter When a per-cpu counter cannot fit in [-32..32] it's flushed to the atomic. Stat readers only check the atomic. Thus readers such as balance_dirty_pages() may see a nontrivial error margin: 32 pages per cpu. Assuming 100 cpus: 4k x86 page_size: 13 MiB error per memcg 64k ppc page_size: 200 MiB error per memcg Considering that dirty+writeback are used together for some decisions the errors double. This inaccuracy can lead to undeserved oom kills. One nasty case is when all per-cpu counters hold positive values offsetting an atomic negative value (i.e. per_cpu[*]=32, atomic=n_cpu*-32). balance_dirty_pages() only consults the atomic and does not consider throttling the next n_cpu*32 dirty pages. If the file_lru is in the 13..200 MiB range then there's absolutely no dirty throttling, which burdens vmscan with only dirty+writeback pages thus resorting to oom kill. It could be argued that tiny containers are not supported, but it's more subtle. It's the amount the space available for file lru that matters. If a container has memory.max-200MiB of non reclaimable memory, then it will also suffer such oom kills on a 100 cpu machine. The following test reliably ooms without this patch. This patch avoids oom kills. $ cat test mount -t cgroup2 none /dev/cgroup cd /dev/cgroup echo +io +memory > cgroup.subtree_control mkdir test cd test echo 10M > memory.max (echo $BASHPID > cgroup.procs && exec /memcg-writeback-stress /foo) (echo $BASHPID > cgroup.procs && exec dd if=/dev/zero of=/foo bs=2M count=100) $ cat memcg-writeback-stress.c /* * Dirty pages from all but one cpu. * Clean pages from the non dirtying cpu. * This is to stress per cpu counter imbalance. * On a 100 cpu machine: * - per memcg per cpu dirty count is 32 pages for each of 99 cpus * - per memcg atomic is -99*32 pages * - thus the complete dirty limit: sum of all counters 0 * - balance_dirty_pages() only sees atomic count -99*32 pages, which * it max()s to 0. * - So a workload can dirty -99*32 pages before balance_dirty_pages() * cares. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include static char *buf; static int bufSize; static void set_affinity(int cpu) { cpu_set_t affinity; CPU_ZERO(&affinity); CPU_SET(cpu, &affinity); if (sched_setaffinity(0, sizeof(affinity), &affinity)) err(1, "sched_setaffinity"); } static void dirty_on(int output_fd, int cpu) { int i, wrote; set_affinity(cpu); for (i = 0; i < 32; i++) { for (wrote = 0; wrote < bufSize; ) { int ret = write(output_fd, buf+wrote, bufSize-wrote); if (ret == -1) err(1, "write"); wrote += ret; } } } int main(int argc, char **argv) { int cpu, flush_cpu = 1, output_fd; const char *output; if (argc != 2) errx(1, "usage: output_file"); output = argv[1]; bufSize = getpagesize(); buf = malloc(getpagesize()); if (buf == NULL) errx(1, "malloc failed"); output_fd = open(output, O_CREAT|O_RDWR); if (output_fd == -1) err(1, "open(%s)", output); for (cpu = 0; cpu < get_nprocs(); cpu++) { if (cpu != flush_cpu) dirty_on(output_fd, cpu); } set_affinity(flush_cpu); if (fsync(output_fd)) err(1, "fsync(%s)", output); if (close(output_fd)) err(1, "close(%s)", output); free(buf); } Make balance_dirty_pages() and wb_over_bg_thresh() work harder to collect exact per memcg counters. This avoids the aforementioned oom kills. This does not affect the overhead of memory.stat, which still reads the single atomic counter. Why not use percpu_counter? memcg already handles cpus going offline, so no need for that overhead from percpu_counter. And the percpu_counter spinlocks are more heavyweight than is required. It probably also makes sense to use exact dirty and writeback counters in memcg oom reports. But that is saved for later. Link: http://lkml.kernel.org/r/20190329174609.164344-1-gthelen@google.com Signed-off-by: Greg Thelen Reviewed-by: Roman Gushchin Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Cc: Tejun Heo Cc: [4.16+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 ++++- mm/memcontrol.c | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1f3d880b7ca1..dbb6118370c1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -566,7 +566,10 @@ struct mem_cgroup *lock_page_memcg(struct page *page); void __unlock_page_memcg(struct mem_cgroup *memcg); void unlock_page_memcg(struct page *page); -/* idx can be of type enum memcg_stat_item or node_stat_item */ +/* + * idx can be of type enum memcg_stat_item or node_stat_item. + * Keep in sync with memcg_exact_page_state(). + */ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 532e0e2a4817..81a0d3914ec9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3882,6 +3882,22 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb) return &memcg->cgwb_domain; } +/* + * idx can be of type enum memcg_stat_item or node_stat_item. + * Keep in sync with memcg_exact_page(). + */ +static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx) +{ + long x = atomic_long_read(&memcg->stat[idx]); + int cpu; + + for_each_online_cpu(cpu) + x += per_cpu_ptr(memcg->stat_cpu, cpu)->count[idx]; + if (x < 0) + x = 0; + return x; +} + /** * mem_cgroup_wb_stats - retrieve writeback related stats from its memcg * @wb: bdi_writeback in question @@ -3907,10 +3923,10 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css); struct mem_cgroup *parent; - *pdirty = memcg_page_state(memcg, NR_FILE_DIRTY); + *pdirty = memcg_exact_page_state(memcg, NR_FILE_DIRTY); /* this should eventually include NR_UNSTABLE_NFS */ - *pwriteback = memcg_page_state(memcg, NR_WRITEBACK); + *pwriteback = memcg_exact_page_state(memcg, NR_WRITEBACK); *pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) | (1 << LRU_ACTIVE_FILE)); *pheadroom = PAGE_COUNTER_MAX; From 166dbd930c99f640fa8a9beead7b9f5f5b016fa0 Mon Sep 17 00:00:00 2001 From: Tomer Maimon Date: Fri, 5 Apr 2019 18:39:22 -0700 Subject: [PATCH 076/116] MAINTAINERS: fix bad pattern in ARM/NUVOTON NPCM In the process of upstreaming architecture support for ARM/NUVOTON NPCM include/dt-bindings/clock/nuvoton,npcm7xx-clks.h was renamed include/dt-bindings/clock/nuvoton,npcm7xx-clock.h without updating MAINTAINERS. This updates the MAINTAINERS pattern to match the new name of this file. Link: http://lkml.kernel.org/r/20190328235752.334462-1-tmaimon77@gmail.com Fixes: 6a498e06ba22 ("MAINTAINERS: Add entry for the Nuvoton NPCM architecture") Signed-off-by: Brendan Higgins Signed-off-by: Tomer Maimon Reported-by: Joe Perches Reviewed-by: Benjamin Fair Cc: Avi Fishman Cc: Mukesh Ojha Cc: Nancy Yuen Cc: Patrick Venture Cc: Tali Perry Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6771bd784f5f..63bfdaf13f28 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1900,7 +1900,7 @@ L: openbmc@lists.ozlabs.org (moderated for non-subscribers) S: Supported F: arch/arm/mach-npcm/ F: arch/arm/boot/dts/nuvoton-npcm* -F: include/dt-bindings/clock/nuvoton,npcm7xx-clks.h +F: include/dt-bindings/clock/nuvoton,npcm7xx-clock.h F: drivers/*/*npcm* F: Documentation/devicetree/bindings/*/*npcm* F: Documentation/devicetree/bindings/*/*/*npcm* From 803cfadcb6c5518ebb5a9a398d56b9418ad65585 Mon Sep 17 00:00:00 2001 From: Tomer Maimon Date: Fri, 5 Apr 2019 18:39:26 -0700 Subject: [PATCH 077/116] MAINTAINERS: add maintainer and replacing reviewer ARM/NUVOTON NPCM Add Tali Perry as Nuvoton NPCM maintainer, replace Brendan Higgins Nuvoton NPCM reviewer with Benjamin Fair. Link: http://lkml.kernel.org/r/20190328235752.334462-2-tmaimon77@gmail.com Signed-off-by: Tomer Maimon Reviewed-by: Brendan Higgins Reviewed-by: Benjamin Fair Reviewed-by: Mukesh Ojha Cc: Joe Perches Cc: Avi Fishman Cc: Patrick Venture Cc: Nancy Yuen Cc: Tali Perry Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 63bfdaf13f28..2359e12e4c41 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1893,9 +1893,10 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git ARM/NUVOTON NPCM ARCHITECTURE M: Avi Fishman M: Tomer Maimon +M: Tali Perry R: Patrick Venture R: Nancy Yuen -R: Brendan Higgins +R: Benjamin Fair L: openbmc@lists.ozlabs.org (moderated for non-subscribers) S: Supported F: arch/arm/mach-npcm/ From acaf892ecbf5be7710ae05a61fd43c668f68ad95 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 5 Apr 2019 18:39:30 -0700 Subject: [PATCH 078/116] sh: fix multiple function definition build errors Many of the sh CPU-types have their own plat_irq_setup() and arch_init_clk_ops() functions, so these same (empty) functions in arch/sh/boards/of-generic.c are not needed and cause build errors. If there is some case where these empty functions are needed, they can be retained by marking them as "__weak" while at the same time making builds that do not need them succeed. Fixes these build errors: arch/sh/boards/of-generic.o: In function `plat_irq_setup': (.init.text+0x134): multiple definition of `plat_irq_setup' arch/sh/kernel/cpu/sh2/setup-sh7619.o:(.init.text+0x30): first defined here arch/sh/boards/of-generic.o: In function `arch_init_clk_ops': (.init.text+0x118): multiple definition of `arch_init_clk_ops' arch/sh/kernel/cpu/sh2/clock-sh7619.o:(.init.text+0x0): first defined here Link: http://lkml.kernel.org/r/9ee4e0c5-f100-86a2-bd4d-1d3287ceab31@infradead.org Signed-off-by: Randy Dunlap Reported-by: kbuild test robot Cc: Takashi Iwai Cc: Yoshinori Sato Cc: Rich Felker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/boards/of-generic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c index 958f46da3a79..d91065e81a4e 100644 --- a/arch/sh/boards/of-generic.c +++ b/arch/sh/boards/of-generic.c @@ -164,10 +164,10 @@ static struct sh_machine_vector __initmv sh_of_generic_mv = { struct sh_clk_ops; -void __init arch_init_clk_ops(struct sh_clk_ops **ops, int idx) +void __init __weak arch_init_clk_ops(struct sh_clk_ops **ops, int idx) { } -void __init plat_irq_setup(void) +void __init __weak plat_irq_setup(void) { } From e91455217d8c7b128c158432869f6e697283f3ec Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 5 Apr 2019 18:39:34 -0700 Subject: [PATCH 079/116] mm/util.c: fix strndup_user() comment The kerneldoc misdescribes strndup_user()'s return value. Cc: Dan Carpenter Cc: Timur Tabi Cc: Mihai Caraman Cc: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/util.c b/mm/util.c index d559bde497a9..43a2984bccaa 100644 --- a/mm/util.c +++ b/mm/util.c @@ -204,7 +204,7 @@ EXPORT_SYMBOL(vmemdup_user); * @s: The string to duplicate * @n: Maximum number of bytes to copy, including the trailing NUL. * - * Return: newly allocated copy of @s or %NULL in case of error + * Return: newly allocated copy of @s or an ERR_PTR() in case of error */ char *strndup_user(const char __user *s, long n) { From 9002b21465fa4d829edfc94a5a441005cffaa972 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 5 Apr 2019 18:39:38 -0700 Subject: [PATCH 080/116] kernel/sysctl.c: fix out-of-bounds access when setting file-max Commit 32a5ad9c2285 ("sysctl: handle overflow for file-max") hooked up min/max values for the file-max sysctl parameter via the .extra1 and .extra2 fields in the corresponding struct ctl_table entry. Unfortunately, the minimum value points at the global 'zero' variable, which is an int. This results in a KASAN splat when accessed as a long by proc_doulongvec_minmax on 64-bit architectures: | BUG: KASAN: global-out-of-bounds in __do_proc_doulongvec_minmax+0x5d8/0x6a0 | Read of size 8 at addr ffff2000133d1c20 by task systemd/1 | | CPU: 0 PID: 1 Comm: systemd Not tainted 5.1.0-rc3-00012-g40b114779944 #2 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace+0x0/0x228 | show_stack+0x14/0x20 | dump_stack+0xe8/0x124 | print_address_description+0x60/0x258 | kasan_report+0x140/0x1a0 | __asan_report_load8_noabort+0x18/0x20 | __do_proc_doulongvec_minmax+0x5d8/0x6a0 | proc_doulongvec_minmax+0x4c/0x78 | proc_sys_call_handler.isra.19+0x144/0x1d8 | proc_sys_write+0x34/0x58 | __vfs_write+0x54/0xe8 | vfs_write+0x124/0x3c0 | ksys_write+0xbc/0x168 | __arm64_sys_write+0x68/0x98 | el0_svc_common+0x100/0x258 | el0_svc_handler+0x48/0xc0 | el0_svc+0x8/0xc | | The buggy address belongs to the variable: | zero+0x0/0x40 | | Memory state around the buggy address: | ffff2000133d1b00: 00 00 00 00 00 00 00 00 fa fa fa fa 04 fa fa fa | ffff2000133d1b80: fa fa fa fa 04 fa fa fa fa fa fa fa 04 fa fa fa | >ffff2000133d1c00: fa fa fa fa 04 fa fa fa fa fa fa fa 00 00 00 00 | ^ | ffff2000133d1c80: fa fa fa fa 00 fa fa fa fa fa fa fa 00 00 00 00 | ffff2000133d1d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Fix the splat by introducing a unsigned long 'zero_ul' and using that instead. Link: http://lkml.kernel.org/r/20190403153409.17307-1-will.deacon@arm.com Fixes: 32a5ad9c2285 ("sysctl: handle overflow for file-max") Signed-off-by: Will Deacon Acked-by: Christian Brauner Cc: Kees Cook Cc: Alexey Dobriyan Cc: Matteo Croce Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e5da394d1ca3..c9ec050bcf46 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -128,6 +128,7 @@ static int zero; static int __maybe_unused one = 1; static int __maybe_unused two = 2; static int __maybe_unused four = 4; +static unsigned long zero_ul; static unsigned long one_ul = 1; static unsigned long long_max = LONG_MAX; static int one_hundred = 100; @@ -1750,7 +1751,7 @@ static struct ctl_table fs_table[] = { .maxlen = sizeof(files_stat.max_files), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra1 = &zero, + .extra1 = &zero_ul, .extra2 = &long_max, }, { From 3ace6891ce8bb9e1267358cb58f93b4fd8b72b69 Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Mon, 1 Apr 2019 13:14:37 +0300 Subject: [PATCH 081/116] i2c: imx: don't leak the i2c adapter on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure to free the i2c adapter on the error exit path. Signed-off-by: Laurentiu Tudor Reviewed-by: Mukesh Ojha Reviewed-by: Uwe Kleine-König Fixes: e1ab9a468e3b ("i2c: imx: improve the error handling in i2c_imx_dma_request()") Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 42fed40198a0..c0c3043b5d61 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1169,11 +1169,13 @@ static int i2c_imx_probe(struct platform_device *pdev) /* Init DMA config if supported */ ret = i2c_imx_dma_request(i2c_imx, phy_addr); if (ret < 0) - goto clk_notifier_unregister; + goto del_adapter; dev_info(&i2c_imx->adapter.dev, "IMX I2C adapter registered\n"); return 0; /* Return OK */ +del_adapter: + i2c_del_adapter(&i2c_imx->adapter); clk_notifier_unregister: clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb); rpm_disable: From 7ff684a683d777c4956fce93e60accbab2bd7696 Mon Sep 17 00:00:00 2001 From: John Pittman Date: Fri, 5 Apr 2019 17:42:45 -0400 Subject: [PATCH 082/116] null_blk: prevent crash from bad home_node value At module load, if the selected home_node value is greater than the available numa nodes, the system will crash in __alloc_pages_nodemask() due to a bad paging request. Prevent this user error crash by detecting the bad value, logging an error, and setting g_home_node back to the default of NUMA_NO_NODE. Signed-off-by: John Pittman Signed-off-by: Jens Axboe --- drivers/block/null_blk_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 417a9f15c116..d7ac09c092f2 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -1748,6 +1748,11 @@ static int __init null_init(void) return -EINVAL; } + if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) { + pr_err("null_blk: invalid home_node value\n"); + g_home_node = NUMA_NO_NODE; + } + if (g_queue_mode == NULL_Q_RQ) { pr_err("null_blk: legacy IO path no longer available\n"); return -EINVAL; From 47b16820c490149c2923e8474048f2c6e7557cab Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 19 Feb 2019 08:49:56 -0800 Subject: [PATCH 083/116] xsysace: Fix error handling in ace_setup If xace hardware reports a bad version number, the error handling code in ace_setup() calls put_disk(), followed by queue cleanup. However, since the disk data structure has the queue pointer set, put_disk() also cleans and releases the queue. This results in blk_cleanup_queue() accessing an already released data structure, which in turn may result in a crash such as the following. [ 10.681671] BUG: Kernel NULL pointer dereference at 0x00000040 [ 10.681826] Faulting instruction address: 0xc0431480 [ 10.682072] Oops: Kernel access of bad area, sig: 11 [#1] [ 10.682251] BE PAGE_SIZE=4K PREEMPT Xilinx Virtex440 [ 10.682387] Modules linked in: [ 10.682528] CPU: 0 PID: 1 Comm: swapper Tainted: G W 5.0.0-rc6-next-20190218+ #2 [ 10.682733] NIP: c0431480 LR: c043147c CTR: c0422ad8 [ 10.682863] REGS: cf82fbe0 TRAP: 0300 Tainted: G W (5.0.0-rc6-next-20190218+) [ 10.683065] MSR: 00029000 CR: 22000222 XER: 00000000 [ 10.683236] DEAR: 00000040 ESR: 00000000 [ 10.683236] GPR00: c043147c cf82fc90 cf82ccc0 00000000 00000000 00000000 00000002 00000000 [ 10.683236] GPR08: 00000000 00000000 c04310bc 00000000 22000222 00000000 c0002c54 00000000 [ 10.683236] GPR16: 00000000 00000001 c09aa39c c09021b0 c09021dc 00000007 c0a68c08 00000000 [ 10.683236] GPR24: 00000001 ced6d400 ced6dcf0 c0815d9c 00000000 00000000 00000000 cedf0800 [ 10.684331] NIP [c0431480] blk_mq_run_hw_queue+0x28/0x114 [ 10.684473] LR [c043147c] blk_mq_run_hw_queue+0x24/0x114 [ 10.684602] Call Trace: [ 10.684671] [cf82fc90] [c043147c] blk_mq_run_hw_queue+0x24/0x114 (unreliable) [ 10.684854] [cf82fcc0] [c04315bc] blk_mq_run_hw_queues+0x50/0x7c [ 10.685002] [cf82fce0] [c0422b24] blk_set_queue_dying+0x30/0x68 [ 10.685154] [cf82fcf0] [c0423ec0] blk_cleanup_queue+0x34/0x14c [ 10.685306] [cf82fd10] [c054d73c] ace_probe+0x3dc/0x508 [ 10.685445] [cf82fd50] [c052d740] platform_drv_probe+0x4c/0xb8 [ 10.685592] [cf82fd70] [c052abb0] really_probe+0x20c/0x32c [ 10.685728] [cf82fda0] [c052ae58] driver_probe_device+0x68/0x464 [ 10.685877] [cf82fdc0] [c052b500] device_driver_attach+0xb4/0xe4 [ 10.686024] [cf82fde0] [c052b5dc] __driver_attach+0xac/0xfc [ 10.686161] [cf82fe00] [c0528428] bus_for_each_dev+0x80/0xc0 [ 10.686314] [cf82fe30] [c0529b3c] bus_add_driver+0x144/0x234 [ 10.686457] [cf82fe50] [c052c46c] driver_register+0x88/0x15c [ 10.686610] [cf82fe60] [c09de288] ace_init+0x4c/0xac [ 10.686742] [cf82fe80] [c0002730] do_one_initcall+0xac/0x330 [ 10.686888] [cf82fee0] [c09aafd0] kernel_init_freeable+0x34c/0x478 [ 10.687043] [cf82ff30] [c0002c6c] kernel_init+0x18/0x114 [ 10.687188] [cf82ff40] [c000f2f0] ret_from_kernel_thread+0x14/0x1c [ 10.687349] Instruction dump: [ 10.687435] 3863ffd4 4bfffd70 9421ffd0 7c0802a6 93c10028 7c9e2378 93e1002c 38810008 [ 10.687637] 7c7f1b78 90010034 4bfffc25 813f008c <81290040> 75290100 4182002c 80810008 [ 10.688056] ---[ end trace 13c9ff51d41b9d40 ]--- Fix the problem by setting the disk queue pointer to NULL before calling put_disk(). A more comprehensive fix might be to rearrange the code to check the hardware version before initializing data structures, but I don't know if this would have undesirable side effects, and it would increase the complexity of backporting the fix to older kernels. Fixes: 74489a91dd43a ("Add support for Xilinx SystemACE CompactFlash interface") Acked-by: Michal Simek Signed-off-by: Guenter Roeck Signed-off-by: Jens Axboe --- drivers/block/xsysace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 87ccef4bd69e..32a21b8d1d85 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -1090,6 +1090,8 @@ static int ace_setup(struct ace_device *ace) return 0; err_read: + /* prevent double queue cleanup */ + ace->gd->queue = NULL; put_disk(ace->gd); err_alloc_disk: blk_cleanup_queue(ace->queue); From 10dce8af34226d90fa56746a934f8da5dcdba3df Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Tue, 26 Mar 2019 22:20:43 +0000 Subject: [PATCH 084/116] fs: stream_open - opener for stream-like files so that read and write can run simultaneously without deadlock Commit 9c225f2655e3 ("vfs: atomic f_pos accesses as per POSIX") added locking for file.f_pos access and in particular made concurrent read and write not possible - now both those functions take f_pos lock for the whole run, and so if e.g. a read is blocked waiting for data, write will deadlock waiting for that read to complete. This caused regression for stream-like files where previously read and write could run simultaneously, but after that patch could not do so anymore. See e.g. commit 581d21a2d02a ("xenbus: fix deadlock on writes to /proc/xen/xenbus") which fixes such regression for particular case of /proc/xen/xenbus. The patch that added f_pos lock in 2014 did so to guarantee POSIX thread safety for read/write/lseek and added the locking to file descriptors of all regular files. In 2014 that thread-safety problem was not new as it was already discussed earlier in 2006. However even though 2006'th version of Linus's patch was adding f_pos locking "only for files that are marked seekable with FMODE_LSEEK (thus avoiding the stream-like objects like pipes and sockets)", the 2014 version - the one that actually made it into the tree as 9c225f2655e3 - is doing so irregardless of whether a file is seekable or not. See https://lore.kernel.org/lkml/53022DB1.4070805@gmail.com/ https://lwn.net/Articles/180387 https://lwn.net/Articles/180396 for historic context. The reason that it did so is, probably, that there are many files that are marked non-seekable, but e.g. their read implementation actually depends on knowing current position to correctly handle the read. Some examples: kernel/power/user.c snapshot_read fs/debugfs/file.c u32_array_read fs/fuse/control.c fuse_conn_waiting_read + ... drivers/hwmon/asus_atk0110.c atk_debugfs_ggrp_read arch/s390/hypfs/inode.c hypfs_read_iter ... Despite that, many nonseekable_open users implement read and write with pure stream semantics - they don't depend on passed ppos at all. And for those cases where read could wait for something inside, it creates a situation similar to xenbus - the write could be never made to go until read is done, and read is waiting for some, potentially external, event, for potentially unbounded time -> deadlock. Besides xenbus, there are 14 such places in the kernel that I've found with semantic patch (see below): drivers/xen/evtchn.c:667:8-24: ERROR: evtchn_fops: .read() can deadlock .write() drivers/isdn/capi/capi.c:963:8-24: ERROR: capi_fops: .read() can deadlock .write() drivers/input/evdev.c:527:1-17: ERROR: evdev_fops: .read() can deadlock .write() drivers/char/pcmcia/cm4000_cs.c:1685:7-23: ERROR: cm4000_fops: .read() can deadlock .write() net/rfkill/core.c:1146:8-24: ERROR: rfkill_fops: .read() can deadlock .write() drivers/s390/char/fs3270.c:488:1-17: ERROR: fs3270_fops: .read() can deadlock .write() drivers/usb/misc/ldusb.c:310:1-17: ERROR: ld_usb_fops: .read() can deadlock .write() drivers/hid/uhid.c:635:1-17: ERROR: uhid_fops: .read() can deadlock .write() net/batman-adv/icmp_socket.c:80:1-17: ERROR: batadv_fops: .read() can deadlock .write() drivers/media/rc/lirc_dev.c:198:1-17: ERROR: lirc_fops: .read() can deadlock .write() drivers/leds/uleds.c:77:1-17: ERROR: uleds_fops: .read() can deadlock .write() drivers/input/misc/uinput.c:400:1-17: ERROR: uinput_fops: .read() can deadlock .write() drivers/infiniband/core/user_mad.c:985:7-23: ERROR: umad_fops: .read() can deadlock .write() drivers/gnss/core.c:45:1-17: ERROR: gnss_fops: .read() can deadlock .write() In addition to the cases above another regression caused by f_pos locking is that now FUSE filesystems that implement open with FOPEN_NONSEEKABLE flag, can no longer implement bidirectional stream-like files - for the same reason as above e.g. read can deadlock write locking on file.f_pos in the kernel. FUSE's FOPEN_NONSEEKABLE was added in 2008 in a7c1b990f715 ("fuse: implement nonseekable open") to support OSSPD. OSSPD implements /dev/dsp in userspace with FOPEN_NONSEEKABLE flag, with corresponding read and write routines not depending on current position at all, and with both read and write being potentially blocking operations: See https://github.com/libfuse/osspd https://lwn.net/Articles/308445 https://github.com/libfuse/osspd/blob/14a9cff0/osspd.c#L1406 https://github.com/libfuse/osspd/blob/14a9cff0/osspd.c#L1438-L1477 https://github.com/libfuse/osspd/blob/14a9cff0/osspd.c#L1479-L1510 Corresponding libfuse example/test also describes FOPEN_NONSEEKABLE as "somewhat pipe-like files ..." with read handler not using offset. However that test implements only read without write and cannot exercise the deadlock scenario: https://github.com/libfuse/libfuse/blob/fuse-3.4.2-3-ga1bff7d/example/poll.c#L124-L131 https://github.com/libfuse/libfuse/blob/fuse-3.4.2-3-ga1bff7d/example/poll.c#L146-L163 https://github.com/libfuse/libfuse/blob/fuse-3.4.2-3-ga1bff7d/example/poll.c#L209-L216 I've actually hit the read vs write deadlock for real while implementing my FUSE filesystem where there is /head/watch file, for which open creates separate bidirectional socket-like stream in between filesystem and its user with both read and write being later performed simultaneously. And there it is semantically not easy to split the stream into two separate read-only and write-only channels: https://lab.nexedi.com/kirr/wendelin.core/blob/f13aa600/wcfs/wcfs.go#L88-169 Let's fix this regression. The plan is: 1. We can't change nonseekable_open to include &~FMODE_ATOMIC_POS - doing so would break many in-kernel nonseekable_open users which actually use ppos in read/write handlers. 2. Add stream_open() to kernel to open stream-like non-seekable file descriptors. Read and write on such file descriptors would never use nor change ppos. And with that property on stream-like files read and write will be running without taking f_pos lock - i.e. read and write could be running simultaneously. 3. With semantic patch search and convert to stream_open all in-kernel nonseekable_open users for which read and write actually do not depend on ppos and where there is no other methods in file_operations which assume @offset access. 4. Add FOPEN_STREAM to fs/fuse/ and open in-kernel file-descriptors via steam_open if that bit is present in filesystem open reply. It was tempting to change fs/fuse/ open handler to use stream_open instead of nonseekable_open on just FOPEN_NONSEEKABLE flags, but grepping through Debian codesearch shows users of FOPEN_NONSEEKABLE, and in particular GVFS which actually uses offset in its read and write handlers https://codesearch.debian.net/search?q=-%3Enonseekable+%3D https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1080 https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1247-1346 https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1399-1481 so if we would do such a change it will break a real user. 5. Add stream_open and FOPEN_STREAM handling to stable kernels starting from v3.14+ (the kernel where 9c225f2655 first appeared). This will allow to patch OSSPD and other FUSE filesystems that provide stream-like files to return FOPEN_STREAM | FOPEN_NONSEEKABLE in their open handler and this way avoid the deadlock on all kernel versions. This should work because fs/fuse/ ignores unknown open flags returned from a filesystem and so passing FOPEN_STREAM to a kernel that is not aware of this flag cannot hurt. In turn the kernel that is not aware of FOPEN_STREAM will be < v3.14 where just FOPEN_NONSEEKABLE is sufficient to implement streams without read vs write deadlock. This patch adds stream_open, converts /proc/xen/xenbus to it and adds semantic patch to automatically locate in-kernel places that are either required to be converted due to read vs write deadlock, or that are just safe to be converted because read and write do not use ppos and there are no other funky methods in file_operations. Regarding semantic patch I've verified each generated change manually - that it is correct to convert - and each other nonseekable_open instance left - that it is either not correct to convert there, or that it is not converted due to current stream_open.cocci limitations. The script also does not convert files that should be valid to convert, but that currently have .llseek = noop_llseek or generic_file_llseek for unknown reason despite file being opened with nonseekable_open (e.g. drivers/input/mousedev.c) Cc: Michael Kerrisk Cc: Yongzhi Pan Cc: Jonathan Corbet Cc: David Vrabel Cc: Juergen Gross Cc: Miklos Szeredi Cc: Tejun Heo Cc: Kirill Tkhai Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: Greg Kroah-Hartman Cc: Julia Lawall Cc: Nikolaus Rath Cc: Han-Wen Nienhuys Signed-off-by: Kirill Smelkov Signed-off-by: Linus Torvalds --- drivers/xen/xenbus/xenbus_dev_frontend.c | 4 +- fs/open.c | 18 ++ fs/read_write.c | 5 +- include/linux/fs.h | 4 + scripts/coccinelle/api/stream_open.cocci | 363 +++++++++++++++++++++++ 5 files changed, 389 insertions(+), 5 deletions(-) create mode 100644 scripts/coccinelle/api/stream_open.cocci diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index c3e201025ef0..0782ff3c2273 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -622,9 +622,7 @@ static int xenbus_file_open(struct inode *inode, struct file *filp) if (xen_store_evtchn == 0) return -ENOENT; - nonseekable_open(inode, filp); - - filp->f_mode &= ~FMODE_ATOMIC_POS; /* cdev-style semantics */ + stream_open(inode, filp); u = kzalloc(sizeof(*u), GFP_KERNEL); if (u == NULL) diff --git a/fs/open.c b/fs/open.c index f1c2f855fd43..a00350018a47 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1215,3 +1215,21 @@ int nonseekable_open(struct inode *inode, struct file *filp) } EXPORT_SYMBOL(nonseekable_open); + +/* + * stream_open is used by subsystems that want stream-like file descriptors. + * Such file descriptors are not seekable and don't have notion of position + * (file.f_pos is always 0). Contrary to file descriptors of other regular + * files, .read() and .write() can run simultaneously. + * + * stream_open never fails and is marked to return int so that it could be + * directly used as file_operations.open . + */ +int stream_open(struct inode *inode, struct file *filp) +{ + filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS); + filp->f_mode |= FMODE_STREAM; + return 0; +} + +EXPORT_SYMBOL(stream_open); diff --git a/fs/read_write.c b/fs/read_write.c index 177ccc3d405a..61b43ad7608e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -560,12 +560,13 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ static inline loff_t file_pos_read(struct file *file) { - return file->f_pos; + return file->f_mode & FMODE_STREAM ? 0 : file->f_pos; } static inline void file_pos_write(struct file *file, loff_t pos) { - file->f_pos = pos; + if ((file->f_mode & FMODE_STREAM) == 0) + file->f_pos = pos; } ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count) diff --git a/include/linux/fs.h b/include/linux/fs.h index 8b42df09b04c..dd28e7679089 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -158,6 +158,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_OPENED ((__force fmode_t)0x80000) #define FMODE_CREATED ((__force fmode_t)0x100000) +/* File is stream-like */ +#define FMODE_STREAM ((__force fmode_t)0x200000) + /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) @@ -3074,6 +3077,7 @@ extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t); extern loff_t no_seek_end_llseek(struct file *, loff_t, int); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); +extern int stream_open(struct inode * inode, struct file * filp); #ifdef CONFIG_BLOCK typedef void (dio_submit_t)(struct bio *bio, struct inode *inode, diff --git a/scripts/coccinelle/api/stream_open.cocci b/scripts/coccinelle/api/stream_open.cocci new file mode 100644 index 000000000000..350145da7669 --- /dev/null +++ b/scripts/coccinelle/api/stream_open.cocci @@ -0,0 +1,363 @@ +// SPDX-License-Identifier: GPL-2.0 +// Author: Kirill Smelkov (kirr@nexedi.com) +// +// Search for stream-like files that are using nonseekable_open and convert +// them to stream_open. A stream-like file is a file that does not use ppos in +// its read and write. Rationale for the conversion is to avoid deadlock in +// between read and write. + +virtual report +virtual patch +virtual explain // explain decisions in the patch (SPFLAGS="-D explain") + +// stream-like reader & writer - ones that do not depend on f_pos. +@ stream_reader @ +identifier readstream, ppos; +identifier f, buf, len; +type loff_t; +@@ + ssize_t readstream(struct file *f, char *buf, size_t len, loff_t *ppos) + { + ... when != ppos + } + +@ stream_writer @ +identifier writestream, ppos; +identifier f, buf, len; +type loff_t; +@@ + ssize_t writestream(struct file *f, const char *buf, size_t len, loff_t *ppos) + { + ... when != ppos + } + + +// a function that blocks +@ blocks @ +identifier block_f; +identifier wait_event =~ "^wait_event_.*"; +@@ + block_f(...) { + ... when exists + wait_event(...) + ... when exists + } + +// stream_reader that can block inside. +// +// XXX wait_* can be called not directly from current function (e.g. func -> f -> g -> wait()) +// XXX currently reader_blocks supports only direct and 1-level indirect cases. +@ reader_blocks_direct @ +identifier stream_reader.readstream; +identifier wait_event =~ "^wait_event_.*"; +@@ + readstream(...) + { + ... when exists + wait_event(...) + ... when exists + } + +@ reader_blocks_1 @ +identifier stream_reader.readstream; +identifier blocks.block_f; +@@ + readstream(...) + { + ... when exists + block_f(...) + ... when exists + } + +@ reader_blocks depends on reader_blocks_direct || reader_blocks_1 @ +identifier stream_reader.readstream; +@@ + readstream(...) { + ... + } + + +// file_operations + whether they have _any_ .read, .write, .llseek ... at all. +// +// XXX add support for file_operations xxx[N] = ... (sound/core/pcm_native.c) +@ fops0 @ +identifier fops; +@@ + struct file_operations fops = { + ... + }; + +@ has_read @ +identifier fops0.fops; +identifier read_f; +@@ + struct file_operations fops = { + .read = read_f, + }; + +@ has_read_iter @ +identifier fops0.fops; +identifier read_iter_f; +@@ + struct file_operations fops = { + .read_iter = read_iter_f, + }; + +@ has_write @ +identifier fops0.fops; +identifier write_f; +@@ + struct file_operations fops = { + .write = write_f, + }; + +@ has_write_iter @ +identifier fops0.fops; +identifier write_iter_f; +@@ + struct file_operations fops = { + .write_iter = write_iter_f, + }; + +@ has_llseek @ +identifier fops0.fops; +identifier llseek_f; +@@ + struct file_operations fops = { + .llseek = llseek_f, + }; + +@ has_no_llseek @ +identifier fops0.fops; +@@ + struct file_operations fops = { + .llseek = no_llseek, + }; + +@ has_mmap @ +identifier fops0.fops; +identifier mmap_f; +@@ + struct file_operations fops = { + .mmap = mmap_f, + }; + +@ has_copy_file_range @ +identifier fops0.fops; +identifier copy_file_range_f; +@@ + struct file_operations fops = { + .copy_file_range = copy_file_range_f, + }; + +@ has_remap_file_range @ +identifier fops0.fops; +identifier remap_file_range_f; +@@ + struct file_operations fops = { + .remap_file_range = remap_file_range_f, + }; + +@ has_splice_read @ +identifier fops0.fops; +identifier splice_read_f; +@@ + struct file_operations fops = { + .splice_read = splice_read_f, + }; + +@ has_splice_write @ +identifier fops0.fops; +identifier splice_write_f; +@@ + struct file_operations fops = { + .splice_write = splice_write_f, + }; + + +// file_operations that is candidate for stream_open conversion - it does not +// use mmap and other methods that assume @offset access to file. +// +// XXX for simplicity require no .{read/write}_iter and no .splice_{read/write} for now. +// XXX maybe_steam.fops cannot be used in other rules - it gives "bad rule maybe_stream or bad variable fops". +@ maybe_stream depends on (!has_llseek || has_no_llseek) && !has_mmap && !has_copy_file_range && !has_remap_file_range && !has_read_iter && !has_write_iter && !has_splice_read && !has_splice_write @ +identifier fops0.fops; +@@ + struct file_operations fops = { + }; + + +// ---- conversions ---- + +// XXX .open = nonseekable_open -> .open = stream_open +// XXX .open = func -> openfunc -> nonseekable_open + +// read & write +// +// if both are used in the same file_operations together with an opener - +// under that conditions we can use stream_open instead of nonseekable_open. +@ fops_rw depends on maybe_stream @ +identifier fops0.fops, openfunc; +identifier stream_reader.readstream; +identifier stream_writer.writestream; +@@ + struct file_operations fops = { + .open = openfunc, + .read = readstream, + .write = writestream, + }; + +@ report_rw depends on report @ +identifier fops_rw.openfunc; +position p1; +@@ + openfunc(...) { + <... + nonseekable_open@p1 + ...> + } + +@ script:python depends on report && reader_blocks @ +fops << fops0.fops; +p << report_rw.p1; +@@ +coccilib.report.print_report(p[0], + "ERROR: %s: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix." % (fops,)) + +@ script:python depends on report && !reader_blocks @ +fops << fops0.fops; +p << report_rw.p1; +@@ +coccilib.report.print_report(p[0], + "WARNING: %s: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open." % (fops,)) + + +@ explain_rw_deadlocked depends on explain && reader_blocks @ +identifier fops_rw.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ nonseekable_open /* read & write (was deadlock) */ + ...> + } + + +@ explain_rw_nodeadlock depends on explain && !reader_blocks @ +identifier fops_rw.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ nonseekable_open /* read & write (no direct deadlock) */ + ...> + } + +@ patch_rw depends on patch @ +identifier fops_rw.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ stream_open + ...> + } + + +// read, but not write +@ fops_r depends on maybe_stream && !has_write @ +identifier fops0.fops, openfunc; +identifier stream_reader.readstream; +@@ + struct file_operations fops = { + .open = openfunc, + .read = readstream, + }; + +@ report_r depends on report @ +identifier fops_r.openfunc; +position p1; +@@ + openfunc(...) { + <... + nonseekable_open@p1 + ...> + } + +@ script:python depends on report @ +fops << fops0.fops; +p << report_r.p1; +@@ +coccilib.report.print_report(p[0], + "WARNING: %s: .read() has stream semantic; safe to change nonseekable_open -> stream_open." % (fops,)) + +@ explain_r depends on explain @ +identifier fops_r.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ nonseekable_open /* read only */ + ...> + } + +@ patch_r depends on patch @ +identifier fops_r.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ stream_open + ...> + } + + +// write, but not read +@ fops_w depends on maybe_stream && !has_read @ +identifier fops0.fops, openfunc; +identifier stream_writer.writestream; +@@ + struct file_operations fops = { + .open = openfunc, + .write = writestream, + }; + +@ report_w depends on report @ +identifier fops_w.openfunc; +position p1; +@@ + openfunc(...) { + <... + nonseekable_open@p1 + ...> + } + +@ script:python depends on report @ +fops << fops0.fops; +p << report_w.p1; +@@ +coccilib.report.print_report(p[0], + "WARNING: %s: .write() has stream semantic; safe to change nonseekable_open -> stream_open." % (fops,)) + +@ explain_w depends on explain @ +identifier fops_w.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ nonseekable_open /* write only */ + ...> + } + +@ patch_w depends on patch @ +identifier fops_w.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ stream_open + ...> + } + + +// no read, no write - don't change anything From c2f8d7cb32cd95e3005bed58ce02afa686b9f357 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 18 Mar 2019 22:56:15 +0100 Subject: [PATCH 085/116] Revert: parisc: Use F_EXTEND() macro in iosapic code Revert parts of commit 97d7e2e3fd8a ("parisc: Use F_EXTEND() macro in iosapic code"). It breaks booting the 32-bit kernel on some machines. Reported-by: Sven Schnelle Tested-by: Sven Schnelle Fixes: 97d7e2e3fd8a ("parisc: Use F_EXTEND() macro in iosapic code") Signed-off-by: Helge Deller --- drivers/parisc/iosapic.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c index 1be571c20062..6bad04cbb1d3 100644 --- a/drivers/parisc/iosapic.c +++ b/drivers/parisc/iosapic.c @@ -157,8 +157,12 @@ #define DBG_IRT(x...) #endif +#ifdef CONFIG_64BIT +#define COMPARE_IRTE_ADDR(irte, hpa) ((irte)->dest_iosapic_addr == (hpa)) +#else #define COMPARE_IRTE_ADDR(irte, hpa) \ - ((irte)->dest_iosapic_addr == F_EXTEND(hpa)) + ((irte)->dest_iosapic_addr == ((hpa) | 0xffffffff00000000ULL)) +#endif #define IOSAPIC_REG_SELECT 0x00 #define IOSAPIC_REG_WINDOW 0x10 From 45efd871bf0a47648f119d1b41467f70484de5bc Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 4 Apr 2019 18:16:03 +0200 Subject: [PATCH 086/116] parisc: regs_return_value() should return gpr28 While working on kretprobes for PA-RISC I was wondering while the kprobes sanity test always fails on kretprobes. This is caused by returning gpr20 instead of gpr28. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # 4.14+ --- arch/parisc/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h index 2a27b275ab09..4a87b3d600c6 100644 --- a/arch/parisc/include/asm/ptrace.h +++ b/arch/parisc/include/asm/ptrace.h @@ -22,7 +22,7 @@ unsigned long profile_pc(struct pt_regs *); static inline unsigned long regs_return_value(struct pt_regs *regs) { - return regs->gr[20]; + return regs->gr[28]; } static inline void instruction_pointer_set(struct pt_regs *regs, From f324fa58327791b2696628b31480e7e21c745706 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 4 Apr 2019 18:16:04 +0200 Subject: [PATCH 087/116] parisc: also set iaoq_b in instruction_pointer_set() When setting the instruction pointer on PA-RISC we also need to set the back of the instruction queue to the new offset, otherwise we will execute on instruction from the new location, and jumping back to the old location stored in iaoq_b. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller Fixes: 75ebedf1d263 ("parisc: Add HAVE_REGS_AND_STACK_ACCESS_API feature") Cc: stable@vger.kernel.org # 4.19+ --- arch/parisc/include/asm/ptrace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h index 4a87b3d600c6..9ff033d261ab 100644 --- a/arch/parisc/include/asm/ptrace.h +++ b/arch/parisc/include/asm/ptrace.h @@ -28,7 +28,8 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { - regs->iaoq[0] = val; + regs->iaoq[0] = val; + regs->iaoq[1] = val + 4; } /* Query offset/name of register from its name/offset */ From d006e95b5561f708d0385e9677ffe2c46f2ae345 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 2 Apr 2019 12:13:27 +0200 Subject: [PATCH 088/116] parisc: Detect QEMU earlier in boot process While adding LASI support to QEMU, I noticed that the QEMU detection in the kernel happens much too late. For example, when a LASI chip is found by the kernel, it registers the LASI LED driver as well. But when we run on QEMU it makes sense to avoid spending unnecessary CPU cycles, so we need to access the running_on_QEMU flag earlier than before. This patch now makes the QEMU detection the fist task of the Linux kernel by moving it to where the kernel enters the C-coding. Fixes: 310d82784fb4 ("parisc: qemu idle sleep support") Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v4.14+ --- arch/parisc/kernel/process.c | 6 ------ arch/parisc/kernel/setup.c | 3 +++ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index eb39e7e380d7..841db71958cd 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -210,12 +210,6 @@ void __cpuidle arch_cpu_idle(void) static int __init parisc_idle_init(void) { - const char *marker; - - /* check QEMU/SeaBIOS marker in PAGE0 */ - marker = (char *) &PAGE0->pad0; - running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0); - if (!running_on_qemu) cpu_idle_poll_ctrl(1); diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 15dd9e21be7e..d908058d05c1 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -397,6 +397,9 @@ void __init start_parisc(void) int ret, cpunum; struct pdc_coproc_cfg coproc_cfg; + /* check QEMU/SeaBIOS marker in PAGE0 */ + running_on_qemu = (memcmp(&PAGE0->pad0, "SeaBIOS", 8) == 0); + cpunum = smp_processor_id(); init_cpu_topology(); From d7ee81ad09f072eab1681877fc71ec05f9c1ae92 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Apr 2019 10:12:48 +0300 Subject: [PATCH 089/116] NFC: nci: Add some bounds checking in nci_hci_cmd_received() This is similar to commit 674d9de02aa7 ("NFC: Fix possible memory corruption when handling SHDLC I-Frame commands"). I'm not totally sure, but I think that commit description may have overstated the danger. I was under the impression that this data came from the firmware? If you can't trust your networking firmware, then you're already in trouble. Anyway, these days we add bounds checking where ever we can and we call it kernel hardening. Better safe than sorry. Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/nfc/nci/hci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index ddfc52ac1f9b..c0d323b58e73 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -312,6 +312,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, create_info = (struct nci_hci_create_pipe_resp *)skb->data; dest_gate = create_info->dest_gate; new_pipe = create_info->pipe; + if (new_pipe >= NCI_HCI_MAX_PIPES) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } /* Save the new created pipe and bind with local gate, * the description for skb->data[3] is destination gate id @@ -336,6 +340,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, goto exit; } delete_info = (struct nci_hci_delete_pipe_noti *)skb->data; + if (delete_info->pipe >= NCI_HCI_MAX_PIPES) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } ndev->hci_dev->pipes[delete_info->pipe].gate = NCI_HCI_INVALID_GATE; From 6491d698396fd5da4941980a35ca7c162a672016 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Apr 2019 10:13:51 +0300 Subject: [PATCH 090/116] nfc: nci: Potential off by one in ->pipes[] array This is similar to commit e285d5bfb7e9 ("NFC: Fix the number of pipes") where we changed NFC_HCI_MAX_PIPES from 127 to 128. As the comment next to the define explains, the pipe identifier is 7 bits long. The highest possible pipe is 127, but the number of possible pipes is 128. As the code is now, then there is potential for an out of bounds array access: net/nfc/nci/hci.c:297 nci_hci_cmd_received() warn: array off by one? 'ndev->hci_dev->pipes[pipe]' '0-127 == 127' Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- include/net/nfc/nci_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 87499b6b35d6..df5c69db68af 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -166,7 +166,7 @@ struct nci_conn_info { * According to specification 102 622 chapter 4.4 Pipes, * the pipe identifier is 7 bits long. */ -#define NCI_HCI_MAX_PIPES 127 +#define NCI_HCI_MAX_PIPES 128 struct nci_hci_gate { u8 gate; From ac0722f23ff5bc1b15e268564a4d56d35cd4a1b5 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 18 Mar 2019 11:05:21 +0100 Subject: [PATCH 091/116] dt-bindings: cpu: Fix JSON schema Commit fd73403a4862 ("dt-bindings: arm: Add SMP enable-method for Milbeaut") added support for a new cpu enable-method, but did so using tabulations to ident. This is however invalid in the syntax, and resulted in a failure when trying to use that schemas for validation. Use spaces instead of tabs to indent to fix this. Fixes: fd73403a4862 ("dt-bindings: arm: Add SMP enable-method for Milbeaut") Signed-off-by: Maxime Ripard Reviewed-by: Rob Herring Acked-by: Sugaya Taichi Signed-off-by: Olof Johansson --- Documentation/devicetree/bindings/arm/cpus.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml index 365dcf384d73..82dd7582e945 100644 --- a/Documentation/devicetree/bindings/arm/cpus.yaml +++ b/Documentation/devicetree/bindings/arm/cpus.yaml @@ -228,7 +228,7 @@ patternProperties: - renesas,r9a06g032-smp - rockchip,rk3036-smp - rockchip,rk3066-smp - - socionext,milbeaut-m10v-smp + - socionext,milbeaut-m10v-smp - ste,dbx500-smp cpu-release-addr: From fbe8758f931ff5468aaeb4b304fc3edb70c908d6 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Sun, 7 Apr 2019 15:18:41 -0700 Subject: [PATCH 092/116] Revert "ARM: dts: nomadik: Fix polarity of SPI CS" This reverts commit fa9463564e77067df81b0b8dec91adbbbc47bfb4. Per Linus Walleij: Dear ARM SoC maintainers, can you please revert this patch. It was the wrong solution to the wrong problem, and I must have acted in stress. Andrey fixed the real bug in a proper way in these commits: commit e5545c94e43b8f6599ffc01df8d1aedf18ee912a "gpio: of: Check propname before applying "cs-gpios" quirks" commit 7ce40277bf848391705011ba37eac2e377cbd9e6 "gpio: of: Check for "spi-cs-high" in child instead of parent node" Signed-off-by: Olof Johansson --- arch/arm/boot/dts/ste-nomadik-nhk15.dts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/ste-nomadik-nhk15.dts b/arch/arm/boot/dts/ste-nomadik-nhk15.dts index f2f6558a00f1..04066f9cb8a3 100644 --- a/arch/arm/boot/dts/ste-nomadik-nhk15.dts +++ b/arch/arm/boot/dts/ste-nomadik-nhk15.dts @@ -213,13 +213,12 @@ gpio-sck = <&gpio0 5 GPIO_ACTIVE_HIGH>; gpio-mosi = <&gpio0 4 GPIO_ACTIVE_HIGH>; /* - * This chipselect is active high. Just setting the flags - * to GPIO_ACTIVE_HIGH is not enough for the SPI DT bindings, - * it will be ignored, only the special "spi-cs-high" flag - * really counts. + * It's not actually active high, but the frameworks assume + * the polarity of the passed-in GPIO is "normal" (active + * high) then actively drives the line low to select the + * chip. */ cs-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>; - spi-cs-high; num-chipselects = <1>; /* From cd92d74d67c811dc22544430b9ac3029f5bd64c5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Mar 2019 16:50:42 +0100 Subject: [PATCH 093/116] ARM: orion: don't use using 64-bit DMA masks clang warns about statically defined DMA masks from the DMA_BIT_MASK macro with length 64: arch/arm/plat-orion/common.c:625:29: error: shift count >= width of type [-Werror,-Wshift-count-overflow] .coherent_dma_mask = DMA_BIT_MASK(64), ^~~~~~~~~~~~~~~~ include/linux/dma-mapping.h:141:54: note: expanded from macro 'DMA_BIT_MASK' #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) The ones in orion shouldn't really be 64 bit masks, so changing them to what the driver can support avoids the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Olof Johansson --- arch/arm/plat-orion/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index a6c81ce00f52..8647cb80a93b 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -622,7 +622,7 @@ static struct platform_device orion_xor0_shared = { .resource = orion_xor0_shared_resources, .dev = { .dma_mask = &orion_xor_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = &orion_xor0_pdata, }, }; @@ -683,7 +683,7 @@ static struct platform_device orion_xor1_shared = { .resource = orion_xor1_shared_resources, .dev = { .dma_mask = &orion_xor_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = &orion_xor1_pdata, }, }; From 2125801ccce19249708ca3245d48998e70569ab8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Mar 2019 16:50:43 +0100 Subject: [PATCH 094/116] ARM: iop: don't use using 64-bit DMA masks clang warns about statically defined DMA masks from the DMA_BIT_MASK macro with length 64: arch/arm/mach-iop13xx/setup.c:303:35: error: shift count >= width of type [-Werror,-Wshift-count-overflow] static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(64); ^~~~~~~~~~~~~~~~ include/linux/dma-mapping.h:141:54: note: expanded from macro 'DMA_BIT_MASK' #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) ^ ~~~ The ones in iop shouldn't really be 64 bit masks, so changing them to what the driver can support avoids the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Olof Johansson --- arch/arm/mach-iop13xx/setup.c | 8 ++++---- arch/arm/mach-iop13xx/tpmi.c | 10 +++++----- arch/arm/plat-iop/adma.c | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c index 53c316f7301e..fe4932fda01d 100644 --- a/arch/arm/mach-iop13xx/setup.c +++ b/arch/arm/mach-iop13xx/setup.c @@ -300,7 +300,7 @@ static struct resource iop13xx_adma_2_resources[] = { } }; -static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(64); +static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(32); static struct iop_adma_platform_data iop13xx_adma_0_data = { .hw_id = 0, .pool_size = PAGE_SIZE, @@ -324,7 +324,7 @@ static struct platform_device iop13xx_adma_0_channel = { .resource = iop13xx_adma_0_resources, .dev = { .dma_mask = &iop13xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop13xx_adma_0_data, }, }; @@ -336,7 +336,7 @@ static struct platform_device iop13xx_adma_1_channel = { .resource = iop13xx_adma_1_resources, .dev = { .dma_mask = &iop13xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop13xx_adma_1_data, }, }; @@ -348,7 +348,7 @@ static struct platform_device iop13xx_adma_2_channel = { .resource = iop13xx_adma_2_resources, .dev = { .dma_mask = &iop13xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop13xx_adma_2_data, }, }; diff --git a/arch/arm/mach-iop13xx/tpmi.c b/arch/arm/mach-iop13xx/tpmi.c index db511ec2b1df..116feb6b261e 100644 --- a/arch/arm/mach-iop13xx/tpmi.c +++ b/arch/arm/mach-iop13xx/tpmi.c @@ -152,7 +152,7 @@ static struct resource iop13xx_tpmi_3_resources[] = { } }; -u64 iop13xx_tpmi_mask = DMA_BIT_MASK(64); +u64 iop13xx_tpmi_mask = DMA_BIT_MASK(32); static struct platform_device iop13xx_tpmi_0_device = { .name = "iop-tpmi", .id = 0, @@ -160,7 +160,7 @@ static struct platform_device iop13xx_tpmi_0_device = { .resource = iop13xx_tpmi_0_resources, .dev = { .dma_mask = &iop13xx_tpmi_mask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), }, }; @@ -171,7 +171,7 @@ static struct platform_device iop13xx_tpmi_1_device = { .resource = iop13xx_tpmi_1_resources, .dev = { .dma_mask = &iop13xx_tpmi_mask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), }, }; @@ -182,7 +182,7 @@ static struct platform_device iop13xx_tpmi_2_device = { .resource = iop13xx_tpmi_2_resources, .dev = { .dma_mask = &iop13xx_tpmi_mask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), }, }; @@ -193,7 +193,7 @@ static struct platform_device iop13xx_tpmi_3_device = { .resource = iop13xx_tpmi_3_resources, .dev = { .dma_mask = &iop13xx_tpmi_mask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), }, }; diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c index a4d1f8de3b5b..d9612221e484 100644 --- a/arch/arm/plat-iop/adma.c +++ b/arch/arm/plat-iop/adma.c @@ -143,7 +143,7 @@ struct platform_device iop3xx_dma_0_channel = { .resource = iop3xx_dma_0_resources, .dev = { .dma_mask = &iop3xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop3xx_dma_0_data, }, }; @@ -155,7 +155,7 @@ struct platform_device iop3xx_dma_1_channel = { .resource = iop3xx_dma_1_resources, .dev = { .dma_mask = &iop3xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop3xx_dma_1_data, }, }; @@ -167,7 +167,7 @@ struct platform_device iop3xx_aau_channel = { .resource = iop3xx_aau_resources, .dev = { .dma_mask = &iop3xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop3xx_aau_data, }, }; From 9a8f32038a74cb800e9649afbf4b3dba2b7d6539 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 Mar 2019 22:19:16 +0100 Subject: [PATCH 095/116] ARM: milbeaut: fix build with !CONFIG_HOTPLUG_CPU When HOTPLUG_CPU is disabled, some fields in the smp operations are not available or needed: arch/arm/mach-milbeaut/platsmp.c:90:3: error: field designator 'cpu_die' does not refer to any field in type 'struct smp_operations' .cpu_die = m10v_cpu_die, ^ arch/arm/mach-milbeaut/platsmp.c:91:3: error: field designator 'cpu_kill' does not refer to any field in type 'struct smp_operations' .cpu_kill = m10v_cpu_kill, ^ Hide them in an #ifdef like the other platforms do. Fixes: 9fb29c734f9e ("ARM: milbeaut: Add basic support for Milbeaut m10v SoC") Signed-off-by: Arnd Bergmann Signed-off-by: Olof Johansson --- arch/arm/mach-milbeaut/platsmp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/mach-milbeaut/platsmp.c b/arch/arm/mach-milbeaut/platsmp.c index 591543c81399..3ea880f5fcb7 100644 --- a/arch/arm/mach-milbeaut/platsmp.c +++ b/arch/arm/mach-milbeaut/platsmp.c @@ -65,6 +65,7 @@ static void m10v_smp_init(unsigned int max_cpus) writel(KERNEL_UNBOOT_FLAG, m10v_smp_base + cpu * 4); } +#ifdef CONFIG_HOTPLUG_CPU static void m10v_cpu_die(unsigned int l_cpu) { gic_cpu_if_down(0); @@ -83,12 +84,15 @@ static int m10v_cpu_kill(unsigned int l_cpu) return 1; } +#endif static struct smp_operations m10v_smp_ops __initdata = { .smp_prepare_cpus = m10v_smp_init, .smp_boot_secondary = m10v_boot_secondary, +#ifdef CONFIG_HOTPLUG_CPU .cpu_die = m10v_cpu_die, .cpu_kill = m10v_cpu_kill, +#endif }; CPU_METHOD_OF_DECLARE(m10v_smp, "socionext,milbeaut-m10v-smp", &m10v_smp_ops); From 15ade5d2e7775667cf191cf2f94327a4889f8b9d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 7 Apr 2019 14:09:59 -1000 Subject: [PATCH 096/116] Linux 5.1-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 026fbc450906..15c8251d4d5e 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Shy Crocodile # *DOCUMENTATION* From b959ecf8f953701a19970e5db7e427c05143f303 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 5 Apr 2019 14:20:24 +0200 Subject: [PATCH 097/116] selftests: add a tc matchall test case This is a follow up of the commit 0db6f8befc32 ("net/sched: fix ->get helper of the matchall cls"). To test it: $ cd tools/testing/selftests/tc-testing $ ln -s ../plugin-lib/nsPlugin.py plugins/20-nsPlugin.py $ ./tdc.py -n -e 2638 Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/filters/tests.json | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json index 99a5ffca1088..2d096b2abf2c 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -18,6 +18,26 @@ "$TC qdisc del dev $DEV1 ingress" ] }, + { + "id": "2638", + "name": "Add matchall and try to get it", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok" + ], + "cmdUnderTest": "$TC filter get dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "filter protocol all pref 1 matchall chain 0 handle 0x1234", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] + }, { "id": "d052", "name": "Add 1M filters with the same action", From fcf88917dd435c6a4cb2830cb086ee58605a1d85 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Sat, 6 Apr 2019 18:59:01 -0400 Subject: [PATCH 098/116] slab: fix a crash by reading /proc/slab_allocators The commit 510ded33e075 ("slab: implement slab_root_caches list") changes the name of the list node within "struct kmem_cache" from "list" to "root_caches_node", but leaks_show() still use the "list" which causes a crash when reading /proc/slab_allocators. You need to have CONFIG_SLAB=y and CONFIG_MEMCG=y to see the problem, because without MEMCG all slab caches are root caches, and the "list" node happens to be the right one. Fixes: 510ded33e075 ("slab: implement slab_root_caches list") Signed-off-by: Qian Cai Reviewed-by: Tobin C. Harding Cc: Tejun Heo Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index 329bfe67f2ca..47a380a486ee 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4308,7 +4308,8 @@ static void show_symbol(struct seq_file *m, unsigned long address) static int leaks_show(struct seq_file *m, void *p) { - struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); + struct kmem_cache *cachep = list_entry(p, struct kmem_cache, + root_caches_node); struct page *page; struct kmem_cache_node *n; const char *name; From 5055376a3b44c4021de8830c9157f086a97731df Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 8 Apr 2019 10:04:20 +0800 Subject: [PATCH 099/116] net: vrf: Fix ping failed when vrf mtu is set to 0 When the mtu of a vrf device is set to 0, it would cause ping failed. So I think we should limit vrf mtu in a reasonable range to solve this problem. I set dev->min_mtu to IPV6_MIN_MTU, so it will works for both ipv4 and ipv6. And if dev->max_mtu still be 0 can be confusing, so I set dev->max_mtu to ETH_MAX_MTU. Here is the reproduce step: 1.Config vrf interface and set mtu to 0: 3: enp4s0: mtu 1500 qdisc fq_codel master vrf1 state UP mode DEFAULT group default qlen 1000 link/ether 52:54:00:9e:dd:c1 brd ff:ff:ff:ff:ff:ff 2.Ping peer: 3: enp4s0: mtu 1500 qdisc fq_codel master vrf1 state UP group default qlen 1000 link/ether 52:54:00:9e:dd:c1 brd ff:ff:ff:ff:ff:ff inet 10.0.0.1/16 scope global enp4s0 valid_lft forever preferred_lft forever connect: Network is unreachable 3.Set mtu to default value, ping works: PING 10.0.0.2 (10.0.0.2) 56(84) bytes of data. 64 bytes from 10.0.0.2: icmp_seq=1 ttl=64 time=1.88 ms Fixes: ad49bc6361ca2 ("net: vrf: remove MTU limits for vrf device") Signed-off-by: Miaohe Lin Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 6d1a1abbed27..cd15c32b2e43 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1275,8 +1275,12 @@ static void vrf_setup(struct net_device *dev) dev->priv_flags |= IFF_NO_QUEUE; dev->priv_flags |= IFF_NO_RX_HANDLER; - dev->min_mtu = 0; - dev->max_mtu = 0; + /* VRF devices do not care about MTU, but if the MTU is set + * too low then the ipv4 and ipv6 protocols are disabled + * which breaks networking. + */ + dev->min_mtu = IPV6_MIN_MTU; + dev->max_mtu = ETH_MAX_MTU; } static int vrf_validate(struct nlattr *tb[], struct nlattr *data[], From b1a6e8f9131381a92bfdacdf86ef80cca82f71d4 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Mon, 8 Apr 2019 18:08:04 +0200 Subject: [PATCH 100/116] MAINTAINERS: ieee802154: update documentation file pattern When moving the documentation for the ieee802154 subsystem from plain text to rst the file pattern in the MAINTAINERS file got wrong. Updating it here to fix scripts using this file. Reported-by: Joe Perches Signed-off-by: Stefan Schmidt Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6771bd784f5f..621622138860 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7515,7 +7515,7 @@ F: include/net/mac802154.h F: include/net/af_ieee802154.h F: include/net/cfg802154.h F: include/net/ieee802154_netdev.h -F: Documentation/networking/ieee802154.txt +F: Documentation/networking/ieee802154.rst IFE PROTOCOL M: Yotam Gigi From b75bb8a5b755d0c7bf1ac071e4df2349a7644a1e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 5 Apr 2019 20:46:46 +0200 Subject: [PATCH 101/116] r8169: disable ASPM again There's a significant number of reports that re-enabling ASPM causes different issues, ranging from decreased performance to system not booting at all. This affects only a minority of users, but the number of affected users is big enough that we better switch off ASPM again. This will hurt notebook users who are not affected by the issues, they may see decreased battery runtime w/o ASPM. With the PCI core folks is being discussed to add generic sysfs attributes to control ASPM. Once this is in place brave enough users can re-enable ASPM on their system. Fixes: a99790bf5c7f ("r8169: Reinstate ASPM Support") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 19efa88f3f02..ed651dde6ef9 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -7352,6 +7353,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) return rc; + /* Disable ASPM completely as that cause random device stop working + * problems as well as full system hangs for some PCIe devices users. + */ + pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); + /* enable device (incl. PCI PM wakeup and hotplug setup) */ rc = pcim_enable_device(pdev); if (rc < 0) { From e891db1a18bf11e02533ec2386b796cfd8d60666 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 22 Mar 2019 12:51:20 +0200 Subject: [PATCH 102/116] tpm: turn on TPM on suspend for TPM 1.x tpm_chip_start/stop() should be also called for TPM 1.x devices on suspend. Add that functionality back. Do not lock the chip because it is unnecessary as there are no multiple threads using it when doing the suspend. Fixes: a3fbfae82b4c ("tpm: take TPM chip power gating out of tpm_transmit()") Reported-by: Paul Zimmerman Signed-off-by: Jarkko Sakkinen Tested-by: Domenico Andreoli Signed-off-by: James Morris --- drivers/char/tpm/tpm-interface.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 83ece5639f86..ae1030c9b086 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -402,15 +402,13 @@ int tpm_pm_suspend(struct device *dev) if (chip->flags & TPM_CHIP_FLAG_ALWAYS_POWERED) return 0; - if (chip->flags & TPM_CHIP_FLAG_TPM2) { - mutex_lock(&chip->tpm_mutex); - if (!tpm_chip_start(chip)) { + if (!tpm_chip_start(chip)) { + if (chip->flags & TPM_CHIP_FLAG_TPM2) tpm2_shutdown(chip, TPM2_SU_STATE); - tpm_chip_stop(chip); - } - mutex_unlock(&chip->tpm_mutex); - } else { - rc = tpm1_pm_suspend(chip, tpm_suspend_pcr); + else + rc = tpm1_pm_suspend(chip, tpm_suspend_pcr); + + tpm_chip_stop(chip); } return rc; From 7110629263469b4664d00b38ef80a656eddf3637 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Wed, 27 Mar 2019 11:32:38 -0700 Subject: [PATCH 103/116] tpm: fix an invalid condition in tpm_common_poll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The poll condition should only check response_length, because reads should only be issued if there is data to read. The response_read flag only prevents double writes. The problem was that the write set the response_read to false, enqued a tpm job, and returned. Then application called poll which checked the response_read flag and returned EPOLLIN. Then the application called read, but got nothing. After all that the async_work kicked in. Added also mutex_lock around the poll check to prevent other possible race conditions. Fixes: 9488585b21bef0df12 ("tpm: add support for partial reads") Reported-by: Mantas Mikulėnas Tested-by: Mantas Mikulėnas Signed-off-by: Tadeusz Struk Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- drivers/char/tpm/tpm-dev-common.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c index 8856cce5a23b..817ae09a369e 100644 --- a/drivers/char/tpm/tpm-dev-common.c +++ b/drivers/char/tpm/tpm-dev-common.c @@ -233,12 +233,19 @@ __poll_t tpm_common_poll(struct file *file, poll_table *wait) __poll_t mask = 0; poll_wait(file, &priv->async_wait, wait); + mutex_lock(&priv->buffer_mutex); - if (!priv->response_read || priv->response_length) + /* + * The response_length indicates if there is still response + * (or part of it) to be consumed. Partial reads decrease it + * by the number of bytes read, and write resets it the zero. + */ + if (priv->response_length) mask = EPOLLIN | EPOLLRDNORM; else mask = EPOLLOUT | EPOLLWRNORM; + mutex_unlock(&priv->buffer_mutex); return mask; } From c78719203fc629421a0d91d3d22240c36ae0119c Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 25 Mar 2019 16:43:10 +0200 Subject: [PATCH 104/116] KEYS: trusted: allow trusted.ko to initialize w/o a TPM Allow trusted.ko to initialize w/o a TPM. This commit also adds checks to the exported functions to fail when a TPM is not available. Fixes: 240730437deb ("KEYS: trusted: explicitly use tpm_chip structure...") Cc: James Morris Reported-by: Dan Williams Tested-by: Dan Williams Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- security/keys/trusted.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/security/keys/trusted.c b/security/keys/trusted.c index bcc9c6ead7fd..a284d790de8c 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -135,6 +135,9 @@ int TSS_authhmac(unsigned char *digest, const unsigned char *key, int ret; va_list argp; + if (!chip) + return -ENODEV; + sdesc = init_sdesc(hashalg); if (IS_ERR(sdesc)) { pr_info("trusted_key: can't alloc %s\n", hash_alg); @@ -196,6 +199,9 @@ int TSS_checkhmac1(unsigned char *buffer, va_list argp; int ret; + if (!chip) + return -ENODEV; + bufsize = LOAD32(buffer, TPM_SIZE_OFFSET); tag = LOAD16(buffer, 0); ordinal = command; @@ -363,6 +369,9 @@ int trusted_tpm_send(unsigned char *cmd, size_t buflen) { int rc; + if (!chip) + return -ENODEV; + dump_tpm_buf(cmd); rc = tpm_send(chip, cmd, buflen); dump_tpm_buf(cmd); @@ -429,6 +438,9 @@ int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce) { int ret; + if (!chip) + return -ENODEV; + INIT_BUF(tb); store16(tb, TPM_TAG_RQU_COMMAND); store32(tb, TPM_OIAP_SIZE); @@ -1245,9 +1257,13 @@ static int __init init_trusted(void) { int ret; + /* encrypted_keys.ko depends on successful load of this module even if + * TPM is not used. + */ chip = tpm_default_chip(); if (!chip) - return -ENOENT; + return 0; + ret = init_digests(); if (ret < 0) goto err_put; @@ -1269,10 +1285,12 @@ err_put: static void __exit cleanup_trusted(void) { - put_device(&chip->dev); - kfree(digests); - trusted_shash_release(); - unregister_key_type(&key_type_trusted); + if (chip) { + put_device(&chip->dev); + kfree(digests); + trusted_shash_release(); + unregister_key_type(&key_type_trusted); + } } late_initcall(init_trusted); From b9d0a85d6b2e76630cfd4c475ee3af4109bfd87a Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Wed, 20 Feb 2019 16:25:38 +0800 Subject: [PATCH 105/116] tpm: Fix the type of the return value in calc_tpm2_event_size() calc_tpm2_event_size() has an invalid signature because it returns a 'size_t' where as its signature says that it returns 'int'. Cc: Fixes: 4d23cc323cdb ("tpm: add securityfs support for TPM 2.0 firmware event log") Suggested-by: Jarkko Sakkinen Signed-off-by: Yue Haibing Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- drivers/char/tpm/eventlog/tpm2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/eventlog/tpm2.c b/drivers/char/tpm/eventlog/tpm2.c index d8b77133a83a..f824563fc28d 100644 --- a/drivers/char/tpm/eventlog/tpm2.c +++ b/drivers/char/tpm/eventlog/tpm2.c @@ -37,8 +37,8 @@ * * Returns size of the event. If it is an invalid event, returns 0. */ -static int calc_tpm2_event_size(struct tcg_pcr_event2_head *event, - struct tcg_pcr_event *event_header) +static size_t calc_tpm2_event_size(struct tcg_pcr_event2_head *event, + struct tcg_pcr_event *event_header) { struct tcg_efi_specid_event_head *efispecid; struct tcg_event_field *event_field; From be24b37e22c20cbaa891971616784dd0f35211e8 Mon Sep 17 00:00:00 2001 From: "ndesaulniers@google.com" Date: Mon, 22 Oct 2018 16:43:57 -0700 Subject: [PATCH 106/116] KEYS: trusted: fix -Wvarags warning Fixes the warning reported by Clang: security/keys/trusted.c:146:17: warning: passing an object that undergoes default argument promotion to 'va_start' has undefined behavior [-Wvarargs] va_start(argp, h3); ^ security/keys/trusted.c:126:37: note: parameter of type 'unsigned char' is declared here unsigned char *h2, unsigned char h3, ...) ^ Specifically, it seems that both the C90 (4.8.1.1) and C11 (7.16.1.4) standards explicitly call this out as undefined behavior: The parameter parmN is the identifier of the rightmost parameter in the variable parameter list in the function definition (the one just before the ...). If the parameter parmN is declared with ... or with a type that is not compatible with the type that results after application of the default argument promotions, the behavior is undefined. Link: https://github.com/ClangBuiltLinux/linux/issues/41 Link: https://www.eskimo.com/~scs/cclass/int/sx11c.html Suggested-by: David Laight Suggested-by: Denis Kenzior Suggested-by: James Bottomley Suggested-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- include/keys/trusted.h | 2 +- security/keys/trusted.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/keys/trusted.h b/include/keys/trusted.h index adbcb6817826..0071298b9b28 100644 --- a/include/keys/trusted.h +++ b/include/keys/trusted.h @@ -38,7 +38,7 @@ enum { int TSS_authhmac(unsigned char *digest, const unsigned char *key, unsigned int keylen, unsigned char *h1, - unsigned char *h2, unsigned char h3, ...); + unsigned char *h2, unsigned int h3, ...); int TSS_checkhmac1(unsigned char *buffer, const uint32_t command, const unsigned char *ononce, diff --git a/security/keys/trusted.c b/security/keys/trusted.c index a284d790de8c..efdbf17f3915 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -125,7 +125,7 @@ out: */ int TSS_authhmac(unsigned char *digest, const unsigned char *key, unsigned int keylen, unsigned char *h1, - unsigned char *h2, unsigned char h3, ...) + unsigned char *h2, unsigned int h3, ...) { unsigned char paramdigest[SHA1_DIGEST_SIZE]; struct sdesc *sdesc; @@ -144,7 +144,7 @@ int TSS_authhmac(unsigned char *digest, const unsigned char *key, return PTR_ERR(sdesc); } - c = h3; + c = !!h3; ret = crypto_shash_init(&sdesc->shash); if (ret < 0) goto out; From f1a0ba6cccff75d882204cae1f154f17620b3c4a Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 12 Feb 2019 15:42:10 -0800 Subject: [PATCH 107/116] selftests/tpm2: Extend tests to cover partial reads Three new tests added: 1. Send get random cmd, read header in 1st read, read the rest in second read - expect success 2. Send get random cmd, read only part of the response, send another get random command, read the response - expect success 3. Send get random cmd followed by another get random cmd, without reading the first response - expect the second cmd to fail with -EBUSY Signed-off-by: Tadeusz Struk Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- tools/testing/selftests/tpm2/tpm2.py | 1 + tools/testing/selftests/tpm2/tpm2_tests.py | 63 ++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py index 40ea95ce2ead..6fc99ce025b5 100644 --- a/tools/testing/selftests/tpm2/tpm2.py +++ b/tools/testing/selftests/tpm2/tpm2.py @@ -22,6 +22,7 @@ TPM2_CC_UNSEAL = 0x015E TPM2_CC_FLUSH_CONTEXT = 0x0165 TPM2_CC_START_AUTH_SESSION = 0x0176 TPM2_CC_GET_CAPABILITY = 0x017A +TPM2_CC_GET_RANDOM = 0x017B TPM2_CC_PCR_READ = 0x017E TPM2_CC_POLICY_PCR = 0x017F TPM2_CC_PCR_EXTEND = 0x0182 diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py index 3bb066fea4a0..d4973be53493 100644 --- a/tools/testing/selftests/tpm2/tpm2_tests.py +++ b/tools/testing/selftests/tpm2/tpm2_tests.py @@ -158,6 +158,69 @@ class SmokeTest(unittest.TestCase): pass self.assertEqual(rejected, True) + def test_read_partial_resp(self): + try: + fmt = '>HIIH' + cmd = struct.pack(fmt, + tpm2.TPM2_ST_NO_SESSIONS, + struct.calcsize(fmt), + tpm2.TPM2_CC_GET_RANDOM, + 0x20) + self.client.tpm.write(cmd) + hdr = self.client.tpm.read(10) + sz = struct.unpack('>I', hdr[2:6])[0] + rsp = self.client.tpm.read() + except: + pass + self.assertEqual(sz, 10 + 2 + 32) + self.assertEqual(len(rsp), 2 + 32) + + def test_read_partial_overwrite(self): + try: + fmt = '>HIIH' + cmd = struct.pack(fmt, + tpm2.TPM2_ST_NO_SESSIONS, + struct.calcsize(fmt), + tpm2.TPM2_CC_GET_RANDOM, + 0x20) + self.client.tpm.write(cmd) + # Read part of the respone + rsp1 = self.client.tpm.read(15) + + # Send a new cmd + self.client.tpm.write(cmd) + + # Read the whole respone + rsp2 = self.client.tpm.read() + except: + pass + self.assertEqual(len(rsp1), 15) + self.assertEqual(len(rsp2), 10 + 2 + 32) + + def test_send_two_cmds(self): + rejected = False + try: + fmt = '>HIIH' + cmd = struct.pack(fmt, + tpm2.TPM2_ST_NO_SESSIONS, + struct.calcsize(fmt), + tpm2.TPM2_CC_GET_RANDOM, + 0x20) + self.client.tpm.write(cmd) + + # expect the second one to raise -EBUSY error + self.client.tpm.write(cmd) + rsp = self.client.tpm.read() + + except IOError, e: + # read the response + rsp = self.client.tpm.read() + rejected = True + pass + except: + pass + self.assertEqual(rejected, True) + class SpaceTest(unittest.TestCase): def setUp(self): logging.basicConfig(filename='SpaceTest.log', level=logging.DEBUG) From 6da70580af9612accf042b37564d73e787af39b4 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 12 Feb 2019 15:42:05 -0800 Subject: [PATCH 108/116] selftests/tpm2: Open tpm dev in unbuffered mode In order to have control over how many bytes are read or written the device needs to be opened in unbuffered mode. Signed-off-by: Tadeusz Struk Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- tools/testing/selftests/tpm2/tpm2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py index 6fc99ce025b5..828c18584624 100644 --- a/tools/testing/selftests/tpm2/tpm2.py +++ b/tools/testing/selftests/tpm2/tpm2.py @@ -358,9 +358,9 @@ class Client: self.flags = flags if (self.flags & Client.FLAG_SPACE) == 0: - self.tpm = open('/dev/tpm0', 'r+b') + self.tpm = open('/dev/tpm0', 'r+b', buffering=0) else: - self.tpm = open('/dev/tpmrm0', 'r+b') + self.tpm = open('/dev/tpmrm0', 'r+b', buffering=0) def close(self): self.tpm.close() From 492b67e28ee5f2a2522fb72e3d3bcb990e461514 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 6 Apr 2019 17:16:52 +0200 Subject: [PATCH 109/116] net: ip_gre: fix possible use-after-free in erspan_rcv erspan tunnels run __iptunnel_pull_header on received skbs to remove gre and erspan headers. This can determine a possible use-after-free accessing pkt_md pointer in erspan_rcv since the packet will be 'uncloned' running pskb_expand_head if it is a cloned gso skb (e.g if the packet has been sent though a veth device). Fix it resetting pkt_md pointer after __iptunnel_pull_header Fixes: 1d7e2ed22f8d ("net: erspan: refactor existing erspan code") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index fd219f7bd3ea..4b0526441476 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -259,7 +259,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, struct net *net = dev_net(skb->dev); struct metadata_dst *tun_dst = NULL; struct erspan_base_hdr *ershdr; - struct erspan_metadata *pkt_md; struct ip_tunnel_net *itn; struct ip_tunnel *tunnel; const struct iphdr *iph; @@ -282,9 +281,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, if (unlikely(!pskb_may_pull(skb, len))) return PACKET_REJECT; - ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); - pkt_md = (struct erspan_metadata *)(ershdr + 1); - if (__iptunnel_pull_header(skb, len, htons(ETH_P_TEB), @@ -292,8 +288,9 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, goto drop; if (tunnel->collect_md) { + struct erspan_metadata *pkt_md, *md; struct ip_tunnel_info *info; - struct erspan_metadata *md; + unsigned char *gh; __be64 tun_id; __be16 flags; @@ -306,6 +303,14 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, if (!tun_dst) return PACKET_REJECT; + /* skb can be uncloned in __iptunnel_pull_header, so + * old pkt_md is no longer valid and we need to reset + * it + */ + gh = skb_network_header(skb) + + skb_network_header_len(skb); + pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len + + sizeof(*ershdr)); md = ip_tunnel_info_opts(&tun_dst->u.tun_info); md->version = ver; md2 = &md->u.md2; From 2a3cabae4536edbcb21d344e7aa8be7a584d2afb Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 6 Apr 2019 17:16:53 +0200 Subject: [PATCH 110/116] net: ip6_gre: fix possible use-after-free in ip6erspan_rcv erspan_v6 tunnels run __iptunnel_pull_header on received skbs to remove erspan header. This can determine a possible use-after-free accessing pkt_md pointer in ip6erspan_rcv since the packet will be 'uncloned' running pskb_expand_head if it is a cloned gso skb (e.g if the packet has been sent though a veth device). Fix it resetting pkt_md pointer after __iptunnel_pull_header Fixes: 1d7e2ed22f8d ("net: erspan: refactor existing erspan code") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b32c95f02128..655e46b227f9 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -525,10 +525,10 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) } static int ip6erspan_rcv(struct sk_buff *skb, - struct tnl_ptk_info *tpi) + struct tnl_ptk_info *tpi, + int gre_hdr_len) { struct erspan_base_hdr *ershdr; - struct erspan_metadata *pkt_md; const struct ipv6hdr *ipv6h; struct erspan_md2 *md2; struct ip6_tnl *tunnel; @@ -547,18 +547,16 @@ static int ip6erspan_rcv(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, len))) return PACKET_REJECT; - ershdr = (struct erspan_base_hdr *)skb->data; - pkt_md = (struct erspan_metadata *)(ershdr + 1); - if (__iptunnel_pull_header(skb, len, htons(ETH_P_TEB), false, false) < 0) return PACKET_REJECT; if (tunnel->parms.collect_md) { + struct erspan_metadata *pkt_md, *md; struct metadata_dst *tun_dst; struct ip_tunnel_info *info; - struct erspan_metadata *md; + unsigned char *gh; __be64 tun_id; __be16 flags; @@ -571,6 +569,14 @@ static int ip6erspan_rcv(struct sk_buff *skb, if (!tun_dst) return PACKET_REJECT; + /* skb can be uncloned in __iptunnel_pull_header, so + * old pkt_md is no longer valid and we need to reset + * it + */ + gh = skb_network_header(skb) + + skb_network_header_len(skb); + pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len + + sizeof(*ershdr)); info = &tun_dst->u.tun_info; md = ip_tunnel_info_opts(info); md->version = ver; @@ -607,7 +613,7 @@ static int gre_rcv(struct sk_buff *skb) if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || tpi.proto == htons(ETH_P_ERSPAN2))) { - if (ip6erspan_rcv(skb, &tpi) == PACKET_RCVD) + if (ip6erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) return 0; goto out; } From afe64245af9f58267e7fa8fb76ad5650ee7ec25f Mon Sep 17 00:00:00 2001 From: Michael Zhivich Date: Mon, 8 Apr 2019 10:48:45 -0400 Subject: [PATCH 111/116] ethtool: avoid signed-unsigned comparison in ethtool_validate_speed() When building C++ userspace code that includes ethtool.h with "-Werror -Wall", g++ complains about signed-unsigned comparison in ethtool_validate_speed() due to definition of SPEED_UNKNOWN as -1. Explicitly cast SPEED_UNKNOWN to __u32 to match type of ethtool_validate_speed() argument. Signed-off-by: Michael Zhivich Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 3652b239dad1..d473e5ed044c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1591,7 +1591,7 @@ enum ethtool_link_mode_bit_indices { static inline int ethtool_validate_speed(__u32 speed) { - return speed <= INT_MAX || speed == SPEED_UNKNOWN; + return speed <= INT_MAX || speed == (__u32)SPEED_UNKNOWN; } /* Duplex, half or full. */ From caf2c5205d82ff0d758096a69a7e0466c38d4dbb Mon Sep 17 00:00:00 2001 From: Michael Zhivich Date: Mon, 8 Apr 2019 10:48:46 -0400 Subject: [PATCH 112/116] broadcom: tg3: fix use of SPEED_UNKNOWN ethtool constant tg3 driver uses u16 to store SPEED_UKNOWN ethtool constant, which is defined as -1, resulting in value truncation and thus incorrect test results against SPEED_UNKNOWN. For example, the following test will print "False": u16 speed = SPEED_UNKNOWN; if (speed == SPEED_UNKNOWN) printf("True"); else printf("False"); Change storage of speed to use u32 to avoid this issue. Signed-off-by: Michael Zhivich Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 8 ++++---- drivers/net/ethernet/broadcom/tg3.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 328373e0578f..060a6f386104 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -4283,7 +4283,7 @@ static void tg3_power_down(struct tg3 *tp) pci_set_power_state(tp->pdev, PCI_D3hot); } -static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u16 *speed, u8 *duplex) +static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u32 *speed, u8 *duplex) { switch (val & MII_TG3_AUX_STAT_SPDMASK) { case MII_TG3_AUX_STAT_10HALF: @@ -4787,7 +4787,7 @@ static int tg3_setup_copper_phy(struct tg3 *tp, bool force_reset) bool current_link_up; u32 bmsr, val; u32 lcl_adv, rmt_adv; - u16 current_speed; + u32 current_speed; u8 current_duplex; int i, err; @@ -5719,7 +5719,7 @@ out: static int tg3_setup_fiber_phy(struct tg3 *tp, bool force_reset) { u32 orig_pause_cfg; - u16 orig_active_speed; + u32 orig_active_speed; u8 orig_active_duplex; u32 mac_status; bool current_link_up; @@ -5823,7 +5823,7 @@ static int tg3_setup_fiber_mii_phy(struct tg3 *tp, bool force_reset) { int err = 0; u32 bmsr, bmcr; - u16 current_speed = SPEED_UNKNOWN; + u32 current_speed = SPEED_UNKNOWN; u8 current_duplex = DUPLEX_UNKNOWN; bool current_link_up = false; u32 local_adv, remote_adv, sgsr; diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index a772a33b685c..6953d0546acb 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -2873,7 +2873,7 @@ struct tg3_tx_ring_info { struct tg3_link_config { /* Describes what we're trying to get. */ u32 advertising; - u16 speed; + u32 speed; u8 duplex; u8 autoneg; u8 flowctrl; @@ -2882,7 +2882,7 @@ struct tg3_link_config { u8 active_flowctrl; u8 active_duplex; - u16 active_speed; + u32 active_speed; u32 rmt_adv; }; From d63da85a4226c4b5a28536a1f48d89eefd50a832 Mon Sep 17 00:00:00 2001 From: Michael Zhivich Date: Mon, 8 Apr 2019 10:48:47 -0400 Subject: [PATCH 113/116] qlogic: qlcnic: fix use of SPEED_UNKNOWN ethtool constant qlcnic driver uses u16 to store SPEED_UKNOWN ethtool constant, which is defined as -1, resulting in value truncation and thus incorrect test results against SPEED_UNKNOWN. For example, the following test will print "False": u16 speed = SPEED_UNKNOWN; if (speed == SPEED_UNKNOWN) printf("True"); else printf("False"); Change storage of speed to use u32 to avoid this issue. Signed-off-by: Michael Zhivich Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 0c443ea98479..374a4d4371f9 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -497,7 +497,7 @@ struct qlcnic_hardware_context { u16 board_type; u16 supported_type; - u16 link_speed; + u32 link_speed; u16 link_duplex; u16 link_autoneg; u16 module_type; From a62520473f15750cd1432d36b377a06cd7cff8d2 Mon Sep 17 00:00:00 2001 From: Paul Thomas Date: Mon, 8 Apr 2019 15:37:54 -0400 Subject: [PATCH 114/116] net: macb driver, check for SKBTX_HW_TSTAMP Make sure SKBTX_HW_TSTAMP (i.e. SOF_TIMESTAMPING_TX_HARDWARE) has been enabled for this skb. It does fix the issue where normal socks that aren't expecting a timestamp will not wake up on select, but when a user does want a SOF_TIMESTAMPING_TX_HARDWARE it does work. Signed-off-by: Paul Thomas Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 1522aee81884..3da2795e2486 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -898,7 +898,9 @@ static void macb_tx_interrupt(struct macb_queue *queue) /* First, update TX stats if needed */ if (skb) { - if (gem_ptp_do_txstamp(queue, skb, desc) == 0) { + if (unlikely(skb_shinfo(skb)->tx_flags & + SKBTX_HW_TSTAMP) && + gem_ptp_do_txstamp(queue, skb, desc) == 0) { /* skb now belongs to timestamp buffer * and will be removed later */ From a1b0e4e684e9c300b9e759b46cb7a0147e61ddff Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 8 Apr 2019 17:39:54 -0400 Subject: [PATCH 115/116] bnxt_en: Improve RX consumer index validity check. There is logic to check that the RX/TPA consumer index is the expected index to work around a hardware problem. However, the potentially bad consumer index is first used to index into an array to reference an entry. This can potentially crash if the bad consumer index is beyond legal range. Improve the logic to use the consumer index for dereferencing after the validity check and log an error message. Fixes: fa7e28127a5a ("bnxt_en: Add workaround to detect bad opaque in rx completion (part 2)") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0bb9d7b3a2b6..3df847b7079f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1133,6 +1133,8 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, tpa_info = &rxr->rx_tpa[agg_id]; if (unlikely(cons != rxr->rx_next_cons)) { + netdev_warn(bp->dev, "TPA cons %x != expected cons %x\n", + cons, rxr->rx_next_cons); bnxt_sched_reset(bp, rxr); return; } @@ -1585,15 +1587,17 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } cons = rxcmp->rx_cmp_opaque; - rx_buf = &rxr->rx_buf_ring[cons]; - data = rx_buf->data; - data_ptr = rx_buf->data_ptr; if (unlikely(cons != rxr->rx_next_cons)) { int rc1 = bnxt_discard_rx(bp, cpr, raw_cons, rxcmp); + netdev_warn(bp->dev, "RX cons %x != expected cons %x\n", + cons, rxr->rx_next_cons); bnxt_sched_reset(bp, rxr); return rc1; } + rx_buf = &rxr->rx_buf_ring[cons]; + data = rx_buf->data; + data_ptr = rx_buf->data_ptr; prefetch(data_ptr); misc = le32_to_cpu(rxcmp->rx_cmp_misc_v1); From 8e44e96c6c8e8fb80b84a2ca11798a8554f710f2 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 8 Apr 2019 17:39:55 -0400 Subject: [PATCH 116/116] bnxt_en: Reset device on RX buffer errors. If the RX completion indicates RX buffers errors, the RX ring will be disabled by firmware and no packets will be received on that ring from that point on. Recover by resetting the device. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3df847b7079f..4c586ba4364b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1614,11 +1614,17 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, rx_buf->data = NULL; if (rxcmp1->rx_cmp_cfa_code_errors_v2 & RX_CMP_L2_ERRORS) { + u32 rx_err = le32_to_cpu(rxcmp1->rx_cmp_cfa_code_errors_v2); + bnxt_reuse_rx_data(rxr, cons, data); if (agg_bufs) bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs); rc = -EIO; + if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) { + netdev_warn(bp->dev, "RX buffer error %x\n", rx_err); + bnxt_sched_reset(bp, rxr); + } goto next_rx; }