From f6d5b33125c4fa63c16f7f54c533338c9695d82c Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 29 Mar 2011 18:00:27 -0700 Subject: [PATCH 001/200] RTC: Fix early irqs caused by calling rtc_set_alarm too early When we register an rtc device at boot, we read the alarm value in hardware and set the rtc device's aie_timer to that value. The initial method to do this was to simply call rtc_set_alarm() with the value read from hardware. However, this may cause problems as rtc_set_alarm may enable interupts, and the RTC alarm might fire, which can cause invalid pointer dereferencing since the RTC registration is not complete. This patch solves the issue by initializing the rtc_device.aie_timer y hand via rtc_initialize_alarm(). This avoids any calls to the RTC hardware which might enable interrupts too early. CC: Thomas Gleixner CC: Alessandro Zummo Reported-by: Konrad Rzeszutek Wilk Tested-by: Konrad Rzeszutek Wilk Signed-off-by: John Stultz --- drivers/rtc/class.c | 2 +- drivers/rtc/interface.c | 26 ++++++++++++++++++++++++++ include/linux/rtc.h | 2 ++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 09b4437b3e61..39013867cbd6 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -171,7 +171,7 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev, err = __rtc_read_alarm(rtc, &alrm); if (!err && !rtc_valid_tm(&alrm.time)) - rtc_set_alarm(rtc, &alrm); + rtc_initialize_alarm(rtc, &alrm); strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE); dev_set_name(&rtc->dev, "rtc%d", id); diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 8ec6b069a7f5..b2fea80dfb65 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -375,6 +375,32 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) } EXPORT_SYMBOL_GPL(rtc_set_alarm); +/* Called once per device from rtc_device_register */ +int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) +{ + int err; + + err = rtc_valid_tm(&alarm->time); + if (err != 0) + return err; + + err = mutex_lock_interruptible(&rtc->ops_lock); + if (err) + return err; + + rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time); + rtc->aie_timer.period = ktime_set(0, 0); + if (alarm->enabled) { + rtc->aie_timer.enabled = 1; + timerqueue_add(&rtc->timerqueue, &rtc->aie_timer.node); + } + mutex_unlock(&rtc->ops_lock); + return err; +} +EXPORT_SYMBOL_GPL(rtc_initialize_alarm); + + + int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled) { int err = mutex_lock_interruptible(&rtc->ops_lock); diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 2ca7e8a78060..877ece45426f 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -228,6 +228,8 @@ extern int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alrm); extern int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alrm); +extern int rtc_initialize_alarm(struct rtc_device *rtc, + struct rtc_wkalrm *alrm); extern void rtc_update_irq(struct rtc_device *rtc, unsigned long num, unsigned long events); From a54aba87bb8e90f9e39bcfe151717b86abbbdd79 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Thu, 24 Mar 2011 22:09:33 +0200 Subject: [PATCH 002/200] RTC: Fix s3c compile error due to missing s3c_rtc_setpie s3c_rtc_setpie was removed, and it resulted in compiler error: drivers/rtc/rtc-s3c.c: In function s3c_rtc_release drivers/rtc/rtc-s3c.c:339:2: error: implicit declaration of function s3c_rtc_setpie Fix it by removing s3c_rtc_release calls. [jstultz: An identical fix was also sent in by Jiri Pinkava ] CC: Thomas Gleixner CC: Alessandro Zummo Signed-off-by: Vasily Khoruzhick Signed-off-by: John Stultz --- drivers/rtc/rtc-s3c.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 714964913e5e..b3466c491cd3 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -336,7 +336,6 @@ static void s3c_rtc_release(struct device *dev) /* do not clear AIE here, it may be needed for wake */ - s3c_rtc_setpie(dev, 0); free_irq(s3c_rtc_alarmno, rtc_dev); free_irq(s3c_rtc_tickno, rtc_dev); } @@ -408,7 +407,6 @@ static int __devexit s3c_rtc_remove(struct platform_device *dev) platform_set_drvdata(dev, NULL); rtc_device_unregister(rtc); - s3c_rtc_setpie(&dev->dev, 0); s3c_rtc_setaie(&dev->dev, 0); clk_disable(rtc_clk); From 8c122b96866580c99e44f3f07ac93a993d964ec3 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Fri, 18 Mar 2011 04:26:24 -0400 Subject: [PATCH 003/200] RTC: add missing "return 0" in new alarm func for rtc-bfin.c The new bfin_rtc_alarm_irq_enable function forgot to add a "return 0" to the end leading to the build warning: drivers/rtc/rtc-bfin.c: In function 'bfin_rtc_alarm_irq_enable': drivers/rtc/rtc-bfin.c:253: warning: control reaches end of non-void function CC: stable@kernel.org CC: Thomas Gleixner CC: Alessandro Zummo Signed-off-by: Mike Frysinger Signed-off-by: John Stultz --- drivers/rtc/rtc-bfin.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c index ca9cff85ab8a..f2496440d076 100644 --- a/drivers/rtc/rtc-bfin.c +++ b/drivers/rtc/rtc-bfin.c @@ -250,6 +250,8 @@ static int bfin_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) bfin_rtc_int_set_alarm(rtc); else bfin_rtc_int_clear(~(RTC_ISTAT_ALARM | RTC_ISTAT_ALARM_DAY)); + + return 0; } static int bfin_rtc_read_time(struct device *dev, struct rtc_time *tm) From c8309ef6a4c52919d44bbc9743d7ea05ae8f4c7f Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 30 Mar 2011 00:24:43 -0700 Subject: [PATCH 004/200] ARM: tegra: gpio: Fix unused variable warnings Change b0f18edaf6ee4e6fac89cae63a90bd38ad2a3418 (arm: tegra: Remove unused bogus irq enable/disable magic) introduces warnings: arch/arm/mach-tegra/gpio.c: In function 'tegra_gpio_resume': arch/arm/mach-tegra/gpio.c:260: warning: unused variable 'i' arch/arm/mach-tegra/gpio.c: In function 'tegra_gpio_suspend': arch/arm/mach-tegra/gpio.c:283: warning: unused variable 'i' Fix them, and fix a coding style issue on the same lines. Signed-off-by: Colin Cross Acked-by: Erik Gilling --- arch/arm/mach-tegra/gpio.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/gpio.c b/arch/arm/mach-tegra/gpio.c index 76a3f654220f..65a1aba6823d 100644 --- a/arch/arm/mach-tegra/gpio.c +++ b/arch/arm/mach-tegra/gpio.c @@ -257,7 +257,8 @@ static void tegra_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) void tegra_gpio_resume(void) { unsigned long flags; - int b, p, i; + int b; + int p; local_irq_save(flags); @@ -280,7 +281,8 @@ void tegra_gpio_resume(void) void tegra_gpio_suspend(void) { unsigned long flags; - int b, p, i; + int b; + int p; local_irq_save(flags); for (b = 0; b < ARRAY_SIZE(tegra_gpio_banks); b++) { From 62f0988ee5280ac03f787e3abd70bd91366e3778 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 29 Mar 2011 11:48:45 -0700 Subject: [PATCH 005/200] msm: Remove extraneous ffa device check The qsd8x50 board file contains a few references to machine_is_... macros that are otherwise unused, and contain no machine definition. The recent purge of unused machine definitions breaks the compilation of this target. Since the machine cannot ever be used, just remove the bogus checks. Signed-off-by: David Brown --- arch/arm/mach-msm/board-qsd8x50.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm/mach-msm/board-qsd8x50.c b/arch/arm/mach-msm/board-qsd8x50.c index 7f568611547e..6a96911b0ad5 100644 --- a/arch/arm/mach-msm/board-qsd8x50.c +++ b/arch/arm/mach-msm/board-qsd8x50.c @@ -160,10 +160,7 @@ static struct msm_mmc_platform_data qsd8x50_sdc1_data = { static void __init qsd8x50_init_mmc(void) { - if (machine_is_qsd8x50_ffa() || machine_is_qsd8x50a_ffa()) - vreg_mmc = vreg_get(NULL, "gp6"); - else - vreg_mmc = vreg_get(NULL, "gp5"); + vreg_mmc = vreg_get(NULL, "gp5"); if (IS_ERR(vreg_mmc)) { pr_err("vreg get for vreg_mmc failed (%ld)\n", From 893b66c39da812e7dd0d7b32aa0633e5d90d950c Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 30 Mar 2011 11:26:57 -0700 Subject: [PATCH 006/200] msm: timer: fix missing return value Change af90f10d38 "ARM: 6759/1: smp: Select local timers vs broadcast timer support runtime" missed a return statement, causing a compile warning: arch/arm/mach-msm/timer.c:272: warning: 'return' with no value, in function returning non-void Trivially return 0 for success when running on cpu 0 (to match the comment and previous behavior). Signed-off-by: David Brown --- arch/arm/mach-msm/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c index 56f920c55b6a..38b95e949d13 100644 --- a/arch/arm/mach-msm/timer.c +++ b/arch/arm/mach-msm/timer.c @@ -269,7 +269,7 @@ int __cpuinit local_timer_setup(struct clock_event_device *evt) /* Use existing clock_event for cpu 0 */ if (!smp_processor_id()) - return; + return 0; writel(DGT_CLK_CTL_DIV_4, MSM_TMR_BASE + DGT_CLK_CTL); From 906c3b616dcf8e64b11d7d665d62f5e9940f4d46 Mon Sep 17 00:00:00 2001 From: Nicolas Kaiser Date: Wed, 30 Mar 2011 15:59:11 +0200 Subject: [PATCH 007/200] arm: tegra: fix error check in tegra2_clocks.c Checking 'rate < 0' doesn't work because 'rate' is unsigned. Signed-off-by: Nicolas Kaiser Signed-off-by: Colin Cross --- arch/arm/mach-tegra/tegra2_clocks.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-tegra/tegra2_clocks.c b/arch/arm/mach-tegra/tegra2_clocks.c index 6d7c4eea4dcb..4459470c052d 100644 --- a/arch/arm/mach-tegra/tegra2_clocks.c +++ b/arch/arm/mach-tegra/tegra2_clocks.c @@ -1362,14 +1362,15 @@ static int tegra_clk_shared_bus_set_rate(struct clk *c, unsigned long rate) { unsigned long flags; int ret; + long new_rate = rate; - rate = clk_round_rate(c->parent, rate); - if (rate < 0) - return rate; + new_rate = clk_round_rate(c->parent, new_rate); + if (new_rate < 0) + return new_rate; spin_lock_irqsave(&c->parent->spinlock, flags); - c->u.shared_bus_user.rate = rate; + c->u.shared_bus_user.rate = new_rate; ret = tegra_clk_shared_bus_update(c->parent); spin_unlock_irqrestore(&c->parent->spinlock, flags); From 29ea23ff905d07d8559bac69cca46f4bbf20038c Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 2 Apr 2011 10:08:55 +0100 Subject: [PATCH 008/200] ARM: Make consolidated PM sleep code depend on PM_SLEEP CONFIG_PM is now set whenever we support either runtime PM in addition to suspend and hibernate. This causes build errors when runtime PM is enabled on a platform, but the CPU does not have the appropriate support for suspend. So, switch this code to use CONFIG_PM_SLEEP rather than CONFIG_PM to allow runtime PM to be enabled without causing build errors. Signed-off-by: Russell King --- arch/arm/kernel/Makefile | 2 +- arch/arm/mm/proc-arm920.S | 2 +- arch/arm/mm/proc-arm926.S | 2 +- arch/arm/mm/proc-sa1100.S | 2 +- arch/arm/mm/proc-v6.S | 2 +- arch/arm/mm/proc-v7.S | 2 +- arch/arm/mm/proc-xsc3.S | 2 +- arch/arm/mm/proc-xscale.S | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 74554f1742d7..8d95446150a3 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -29,7 +29,7 @@ obj-$(CONFIG_MODULES) += armksyms.o module.o obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o -obj-$(CONFIG_PM) += sleep.o +obj-$(CONFIG_PM_SLEEP) += sleep.o obj-$(CONFIG_HAVE_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_SMP) += smp.o smp_tlb.o obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 219980ec8b6e..394b623b0928 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -390,7 +390,7 @@ ENTRY(cpu_arm920_set_pte_ext) /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ .globl cpu_arm920_suspend_size .equ cpu_arm920_suspend_size, 4 * 3 -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP ENTRY(cpu_arm920_do_suspend) stmfd sp!, {r4 - r7, lr} mrc p15, 0, r4, c13, c0, 0 @ PID diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index 6a4bdb2c94a7..0ed85d930c09 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -404,7 +404,7 @@ ENTRY(cpu_arm926_set_pte_ext) /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ .globl cpu_arm926_suspend_size .equ cpu_arm926_suspend_size, 4 * 3 -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP ENTRY(cpu_arm926_do_suspend) stmfd sp!, {r4 - r7, lr} mrc p15, 0, r4, c13, c0, 0 @ PID diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index 74483d1977fe..184a9c997e36 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -171,7 +171,7 @@ ENTRY(cpu_sa1100_set_pte_ext) .globl cpu_sa1100_suspend_size .equ cpu_sa1100_suspend_size, 4*4 -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP ENTRY(cpu_sa1100_do_suspend) stmfd sp!, {r4 - r7, lr} mrc p15, 0, r4, c3, c0, 0 @ domain ID diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 832b6bdc192c..387441269cc6 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -124,7 +124,7 @@ ENTRY(cpu_v6_set_pte_ext) /* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */ .globl cpu_v6_suspend_size .equ cpu_v6_suspend_size, 4 * 8 -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP ENTRY(cpu_v6_do_suspend) stmfd sp!, {r4 - r11, lr} mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 262fa88a7439..713cea199ea8 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -211,7 +211,7 @@ cpu_v7_name: /* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */ .globl cpu_v7_suspend_size .equ cpu_v7_suspend_size, 4 * 8 -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP ENTRY(cpu_v7_do_suspend) stmfd sp!, {r4 - r11, lr} mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index 63d8b2044e84..596213699f37 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -417,7 +417,7 @@ ENTRY(cpu_xsc3_set_pte_ext) .globl cpu_xsc3_suspend_size .equ cpu_xsc3_suspend_size, 4 * 8 -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP ENTRY(cpu_xsc3_do_suspend) stmfd sp!, {r4 - r10, lr} mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index 086038cd86ab..ce233bcbf506 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -518,7 +518,7 @@ ENTRY(cpu_xscale_set_pte_ext) .globl cpu_xscale_suspend_size .equ cpu_xscale_suspend_size, 4 * 7 -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP ENTRY(cpu_xscale_do_suspend) stmfd sp!, {r4 - r10, lr} mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode From 6a7861825f79f09213ef81b3c468f6f2e86f408e Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 2 Apr 2011 10:15:28 +0100 Subject: [PATCH 009/200] ARM: Only allow PM_SLEEP with CPUs which support suspend Offering CONFIG_PM_SLEEP for CPUs which do not support suspend leads to build errors, so only set CONFIG_ARCH_SUSPEND_POSSIBLE if we have a CPU selected which supports suspend. Signed-off-by: Russell King --- arch/arm/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 5b9f78b570e8..9954c9bef679 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2012,6 +2012,8 @@ source "kernel/power/Kconfig" config ARCH_SUSPEND_POSSIBLE depends on !ARCH_S5P64X0 && !ARCH_S5P6442 + depends on CPU_ARM920T || CPU_ARM926T || CPU_SA1100 || \ + CPU_V6 || CPU_V6K || CPU_V7 || CPU_XSC3 || CPU_XSCALE def_bool y endmenu From 77f38e0eeac290827f41fd2215ab82546b8f73b8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 6 Apr 2011 09:09:16 -0700 Subject: [PATCH 010/200] libceph: fix linger request requeueing Fix the request transition from linger -> normal request. The key is to preserve r_osd and requeue on the same OSD. Reregister as a normal request, add the request to the proper queues, then unregister the linger. Fix the unregister helper to avoid clearing r_osd (and also simplify the parallel check in __unregister_request()). Reported-by: Henry Chang Signed-off-by: Sage Weil --- net/ceph/osd_client.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 3b91d651fe08..9204de484c2e 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -579,9 +579,15 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, list_for_each_entry_safe(req, nreq, &osd->o_linger_requests, r_linger_osd) { - __unregister_linger_request(osdc, req); + /* + * reregister request prior to unregistering linger so + * that r_osd is preserved. + */ + BUG_ON(!list_empty(&req->r_req_lru_item)); __register_request(osdc, req); - list_move(&req->r_req_lru_item, &osdc->req_unsent); + list_add(&req->r_req_lru_item, &osdc->req_unsent); + list_add(&req->r_osd_item, &req->r_osd->o_requests); + __unregister_linger_request(osdc, req); dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid, osd->o_osd); } @@ -798,7 +804,7 @@ static void __register_request(struct ceph_osd_client *osdc, req->r_request->hdr.tid = cpu_to_le64(req->r_tid); INIT_LIST_HEAD(&req->r_req_lru_item); - dout("register_request %p tid %lld\n", req, req->r_tid); + dout("__register_request %p tid %lld\n", req, req->r_tid); __insert_request(osdc, req); ceph_osdc_get_request(req); osdc->num_requests++; From 8f74c0661c42104b3e3d2c032bc61efde15360ad Mon Sep 17 00:00:00 2001 From: Shubhrajyoti D Date: Wed, 6 Apr 2011 15:31:22 -0700 Subject: [PATCH 011/200] Input: twl4030_keypad - avoid potential NULL-pointer dereference Signed-off-by: Shubhrajyoti D Acked-by: Axel Lin Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/twl4030_keypad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/twl4030_keypad.c b/drivers/input/keyboard/twl4030_keypad.c index 09bef79d9da1..cc06c4b2f920 100644 --- a/drivers/input/keyboard/twl4030_keypad.c +++ b/drivers/input/keyboard/twl4030_keypad.c @@ -338,7 +338,7 @@ static int __devinit twl4030_kp_probe(struct platform_device *pdev) u8 reg; int error; - if (!pdata || !pdata->rows || !pdata->cols || + if (!pdata || !pdata->rows || !pdata->cols || !pdata->keymap_data || pdata->rows > TWL4030_MAX_ROWS || pdata->cols > TWL4030_MAX_COLS) { dev_err(&pdev->dev, "Invalid platform_data\n"); return -EINVAL; From 908433833cac977563e9bef8b206990d846876b4 Mon Sep 17 00:00:00 2001 From: Christoph Fritz Date: Wed, 6 Apr 2011 15:33:16 -0700 Subject: [PATCH 012/200] Input: h3600_ts - fix error handling at connect In case of an error in h3600ts_connect(), deconstruct in correct order and with the right calls. Signed-off-by: Christoph Fritz Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/h3600_ts_input.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/input/touchscreen/h3600_ts_input.c b/drivers/input/touchscreen/h3600_ts_input.c index efa06882de00..45f93d0f5592 100644 --- a/drivers/input/touchscreen/h3600_ts_input.c +++ b/drivers/input/touchscreen/h3600_ts_input.c @@ -399,31 +399,34 @@ static int h3600ts_connect(struct serio *serio, struct serio_driver *drv) IRQF_SHARED | IRQF_DISABLED, "h3600_action", &ts->dev)) { printk(KERN_ERR "h3600ts.c: Could not allocate Action Button IRQ!\n"); err = -EBUSY; - goto fail2; + goto fail1; } if (request_irq(IRQ_GPIO_BITSY_NPOWER_BUTTON, npower_button_handler, IRQF_SHARED | IRQF_DISABLED, "h3600_suspend", &ts->dev)) { printk(KERN_ERR "h3600ts.c: Could not allocate Power Button IRQ!\n"); err = -EBUSY; - goto fail3; + goto fail2; } serio_set_drvdata(serio, ts); err = serio_open(serio, drv); if (err) - return err; + goto fail3; //h3600_flite_control(1, 25); /* default brightness */ - input_register_device(ts->dev); + err = input_register_device(ts->dev); + if (err) + goto fail4; return 0; -fail3: free_irq(IRQ_GPIO_BITSY_NPOWER_BUTTON, ts->dev); +fail4: serio_close(serio); +fail3: serio_set_drvdata(serio, NULL); + free_irq(IRQ_GPIO_BITSY_NPOWER_BUTTON, ts->dev); fail2: free_irq(IRQ_GPIO_BITSY_ACTION_BUTTON, ts->dev); -fail1: serio_set_drvdata(serio, NULL); - input_free_device(input_dev); +fail1: input_free_device(input_dev); kfree(ts); return err; } From 67c1b8c6aa354aad14aad85d36508fd73d1c6361 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 7 Apr 2011 09:39:44 +0800 Subject: [PATCH 013/200] RTC: rtc-mrst: follow on to the change of rtc_device_register() commit f44f7f96a20 (RTC: Initialize kernel state from RTC) will call rtc_read_alarm() inside rtc_device_register(), so rtc-mrst driver need to call dev_set_drvdata() before rtc_device_register() get called. Cc: Alessandro Zummo Cc: John Stultz Cc: Thomas Gleixner Signed-off-by: Feng Tang Signed-off-by: John Stultz --- drivers/rtc/rtc-mrst.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c index b86bc328463b..d2c397c369fc 100644 --- a/drivers/rtc/rtc-mrst.c +++ b/drivers/rtc/rtc-mrst.c @@ -342,6 +342,8 @@ vrtc_mrst_do_probe(struct device *dev, struct resource *iomem, int rtc_irq) mrst_rtc.irq = rtc_irq; mrst_rtc.iomem = iomem; + mrst_rtc.dev = dev; + dev_set_drvdata(dev, &mrst_rtc); mrst_rtc.rtc = rtc_device_register(driver_name, dev, &mrst_rtc_ops, THIS_MODULE); @@ -350,8 +352,6 @@ vrtc_mrst_do_probe(struct device *dev, struct resource *iomem, int rtc_irq) goto cleanup0; } - mrst_rtc.dev = dev; - dev_set_drvdata(dev, &mrst_rtc); rename_region(iomem, dev_name(&mrst_rtc.rtc->dev)); spin_lock_irq(&rtc_lock); @@ -376,9 +376,10 @@ vrtc_mrst_do_probe(struct device *dev, struct resource *iomem, int rtc_irq) return 0; cleanup1: - mrst_rtc.dev = NULL; rtc_device_unregister(mrst_rtc.rtc); cleanup0: + dev_set_drvdata(dev, NULL); + mrst_rtc.dev = NULL; release_region(iomem->start, iomem->end + 1 - iomem->start); dev_err(dev, "rtc-mrst: unable to initialise\n"); return retval; From 621d26567fd0c222f419e3b5ddf39e529e0fdcb3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Apr 2011 12:11:06 +0200 Subject: [PATCH 014/200] perf: Fix a build error with some GCC versions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix this: util/cgroup.c: In function ‘open_cgroup’: util/cgroup.c:16:16: error: ‘saved_ptr’ may be used uninitialized in this function util/cgroup.c:16:16: note: ‘saved_ptr’ was declared here Apparently newer GCC (4.6) can figure out that this variable is properly initialized - but some versions of GCC (such as 4.5.2) need help. Signed-off-by: Eric Dumazet Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/util/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 9fea75535221..96bee5c46008 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -13,7 +13,7 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen) { FILE *fp; char mountpoint[MAX_PATH+1], tokens[MAX_PATH+1], type[MAX_PATH+1]; - char *token, *saved_ptr; + char *token, *saved_ptr = NULL; int found = 0; fp = fopen("/proc/mounts", "r"); From be1a12a0dfed06cf1e62e35bf91620dc610a451a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 6 Apr 2011 13:05:22 -0400 Subject: [PATCH 015/200] Btrfs: deal with the case that we run out of space in the cache Currently we don't handle running out of space in the cache, so to fix this we keep track of how far in the cache we are. Then we only dirty the pages if we successfully modify all of them, otherwise if we have an error or run out of space we can just drop them and not worry about the vm writing them out. Thanks, Tested-by Johannes Hirte Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 5 ++ fs/btrfs/file.c | 21 +++---- fs/btrfs/free-space-cache.c | 117 +++++++++++++++++------------------- 3 files changed, 69 insertions(+), 74 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3458b5725540..0d00a07b5b29 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2576,6 +2576,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, struct inode *inode, u64 start, u64 end); int btrfs_release_file(struct inode *inode, struct file *file); +void btrfs_drop_pages(struct page **pages, size_t num_pages); +int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, + struct page **pages, size_t num_pages, + loff_t pos, size_t write_bytes, + struct extent_state **cached); /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e621ea54a3fd..75899a01dded 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -104,7 +104,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, /* * unlocks pages after btrfs_file_write is done with them */ -static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) +void btrfs_drop_pages(struct page **pages, size_t num_pages) { size_t i; for (i = 0; i < num_pages; i++) { @@ -127,16 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) * this also makes the decision about creating an inline extent vs * doing real data extents, marking pages dirty and delalloc as required. */ -static noinline int dirty_and_release_pages(struct btrfs_root *root, - struct file *file, - struct page **pages, - size_t num_pages, - loff_t pos, - size_t write_bytes) +int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, + struct page **pages, size_t num_pages, + loff_t pos, size_t write_bytes, + struct extent_state **cached) { int err = 0; int i; - struct inode *inode = fdentry(file)->d_inode; u64 num_bytes; u64 start_pos; u64 end_of_last_block; @@ -149,7 +146,7 @@ static noinline int dirty_and_release_pages(struct btrfs_root *root, end_of_last_block = start_pos + num_bytes - 1; err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, - NULL); + cached); if (err) return err; @@ -992,9 +989,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, } if (copied > 0) { - ret = dirty_and_release_pages(root, file, pages, - dirty_pages, pos, - copied); + ret = btrfs_dirty_pages(root, inode, pages, + dirty_pages, pos, copied, + NULL); if (ret) { btrfs_delalloc_release_space(inode, dirty_pages << PAGE_CACHE_SHIFT); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f561c953205b..a3f420def0e9 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -508,6 +508,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode; struct rb_node *node; struct list_head *pos, *n; + struct page **pages; struct page *page; struct extent_state *cached_state = NULL; struct btrfs_free_cluster *cluster = NULL; @@ -517,13 +518,13 @@ int btrfs_write_out_cache(struct btrfs_root *root, u64 start, end, len; u64 bytes = 0; u32 *crc, *checksums; - pgoff_t index = 0, last_index = 0; unsigned long first_page_offset; - int num_checksums; + int index = 0, num_pages = 0; int entries = 0; int bitmaps = 0; int ret = 0; bool next_page = false; + bool out_of_space = false; root = root->fs_info->tree_root; @@ -551,24 +552,31 @@ int btrfs_write_out_cache(struct btrfs_root *root, return 0; } - last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; + num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; filemap_write_and_wait(inode->i_mapping); btrfs_wait_ordered_range(inode, inode->i_size & ~(root->sectorsize - 1), (u64)-1); /* We need a checksum per page. */ - num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; - crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); + crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); if (!crc) { iput(inode); return 0; } + pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); + if (!pages) { + kfree(crc); + iput(inode); + return 0; + } + /* Since the first page has all of our checksums and our generation we * need to calculate the offset into the page that we can start writing * our entries. */ - first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); + first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); /* Get the cluster for this block_group if it exists */ if (!list_empty(&block_group->cluster_list)) @@ -590,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root, * after find_get_page at this point. Just putting this here so people * know and don't freak out. */ - while (index <= last_index) { + while (index < num_pages) { page = grab_cache_page(inode->i_mapping, index); if (!page) { - pgoff_t i = 0; + int i; - while (i < index) { - page = find_get_page(inode->i_mapping, i); - unlock_page(page); - page_cache_release(page); - page_cache_release(page); - i++; + for (i = 0; i < num_pages; i++) { + unlock_page(pages[i]); + page_cache_release(pages[i]); } goto out_free; } + pages[index] = page; index++; } @@ -631,7 +637,12 @@ int btrfs_write_out_cache(struct btrfs_root *root, offset = start_offset; } - page = find_get_page(inode->i_mapping, index); + if (index >= num_pages) { + out_of_space = true; + break; + } + + page = pages[index]; addr = kmap(page); entry = addr + start_offset; @@ -708,23 +719,6 @@ int btrfs_write_out_cache(struct btrfs_root *root, bytes += PAGE_CACHE_SIZE; - ClearPageChecked(page); - set_page_extent_mapped(page); - SetPageUptodate(page); - set_page_dirty(page); - - /* - * We need to release our reference we got for grab_cache_page, - * except for the first page which will hold our checksums, we - * do that below. - */ - if (index != 0) { - unlock_page(page); - page_cache_release(page); - } - - page_cache_release(page); - index++; } while (node || next_page); @@ -734,6 +728,10 @@ int btrfs_write_out_cache(struct btrfs_root *root, struct btrfs_free_space *entry = list_entry(pos, struct btrfs_free_space, list); + if (index >= num_pages) { + out_of_space = true; + break; + } page = find_get_page(inode->i_mapping, index); addr = kmap(page); @@ -745,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root, crc++; bytes += PAGE_CACHE_SIZE; - ClearPageChecked(page); - set_page_extent_mapped(page); - SetPageUptodate(page); - set_page_dirty(page); - unlock_page(page); - page_cache_release(page); - page_cache_release(page); list_del_init(&entry->list); index++; } + if (out_of_space) { + btrfs_drop_pages(pages, num_pages); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, + i_size_read(inode) - 1, &cached_state, + GFP_NOFS); + ret = 0; + goto out_free; + } + /* Zero out the rest of the pages just to make sure */ - while (index <= last_index) { + while (index < num_pages) { void *addr; - page = find_get_page(inode->i_mapping, index); - + page = pages[index]; addr = kmap(page); memset(addr, 0, PAGE_CACHE_SIZE); kunmap(page); - ClearPageChecked(page); - set_page_extent_mapped(page); - SetPageUptodate(page); - set_page_dirty(page); - unlock_page(page); - page_cache_release(page); - page_cache_release(page); bytes += PAGE_CACHE_SIZE; index++; } - btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state); - /* Write the checksums and trans id to the first page */ { void *addr; u64 *gen; - page = find_get_page(inode->i_mapping, 0); + page = pages[0]; addr = kmap(page); - memcpy(addr, checksums, sizeof(u32) * num_checksums); - gen = addr + (sizeof(u32) * num_checksums); + memcpy(addr, checksums, sizeof(u32) * num_pages); + gen = addr + (sizeof(u32) * num_pages); *gen = trans->transid; kunmap(page); - ClearPageChecked(page); - set_page_extent_mapped(page); - SetPageUptodate(page); - set_page_dirty(page); - unlock_page(page); - page_cache_release(page); - page_cache_release(page); } - BTRFS_I(inode)->generation = trans->transid; + ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, + bytes, &cached_state); + btrfs_drop_pages(pages, num_pages); unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, &cached_state, GFP_NOFS); + if (ret) { + ret = 0; + goto out_free; + } + + BTRFS_I(inode)->generation = trans->transid; + filemap_write_and_wait(inode->i_mapping); key.objectid = BTRFS_FREE_SPACE_OBJECTID; @@ -853,6 +845,7 @@ out_free: BTRFS_I(inode)->generation = 0; } kfree(checksums); + kfree(pages); btrfs_update_inode(trans, root, inode); iput(inode); return ret; From 06d5a5899d6d3ac401d2359b5eac6d2a3a0fe331 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Apr 2011 11:57:27 -0400 Subject: [PATCH 016/200] Btrfs: only retry transaction reservation once I saw a lockup where we kept getting into this start transaction->commit transaction loop because of enospce. The fact is if we fail to make our reservation, we've tried _everything_ several times, so we only need to try and commit the transaction once, and if that doesn't work then we really are out of space and need to just exit. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5b158da7e0bb..4583008217e6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -181,6 +181,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, { struct btrfs_trans_handle *h; struct btrfs_transaction *cur_trans; + int retries = 0; int ret; if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) @@ -224,10 +225,18 @@ again: if (num_items > 0) { ret = btrfs_trans_reserve_metadata(h, root, num_items); - if (ret == -EAGAIN) { + if (ret == -EAGAIN && !retries) { + retries++; btrfs_commit_transaction(h, root); goto again; + } else if (ret == -EAGAIN) { + /* + * We have already retried and got EAGAIN, so really we + * don't have space, so set ret to -ENOSPC. + */ + ret = -ENOSPC; } + if (ret < 0) { btrfs_end_transaction(h, root); return ERR_PTR(ret); From 12ddb96cb6752218d8a1aeb696ec9b0ca7adb42f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Apr 2011 13:02:27 -0400 Subject: [PATCH 017/200] Btrfs: map the inode item when doing fill_inode_item Instead of calling kmap_atomic for every thing we set in the inode item, map the entire inode item at the start and unmap it at the end. This makes a sequential dd of 400mb O_DIRECT something like 1% faster. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cc6022842e0c..da2680263d9f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2589,6 +2589,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, struct btrfs_inode_item *item, struct inode *inode) { + if (!leaf->map_token) + map_private_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_inode_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + btrfs_set_inode_uid(leaf, item, inode->i_uid); btrfs_set_inode_gid(leaf, item, inode->i_gid); btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); @@ -2617,6 +2624,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_inode_rdev(leaf, item, inode->i_rdev); btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); + + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } } /* From 1ef30be142d2cc60e2687ef267de864cf31be995 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Apr 2011 19:25:36 -0400 Subject: [PATCH 018/200] Btrfs: do not call btrfs_update_inode in endio if nothing changed In the DIO code we often don't update the i_disk_size because the i_size isn't updated until after the DIO is completed, so basically we are allocating a path, doing a search, and updating the inode item for no reason since nothing changed. btrfs_ordered_update_i_size will return 1 if it didn't update i_disk_size, so only run btrfs_update_inode if btrfs_ordered_update_i_size returns 0. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index da2680263d9f..4a238d676e5b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1769,9 +1769,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) add_pending_csums(trans, inode, ordered_extent->file_offset, &ordered_extent->list); - btrfs_ordered_update_i_size(inode, 0, ordered_extent); - ret = btrfs_update_inode(trans, root, inode); - BUG_ON(ret); + ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); + if (!ret) { + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + } + ret = 0; out: if (nolock) { if (trans) @@ -5865,8 +5868,10 @@ again: } add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); - btrfs_ordered_update_i_size(inode, 0, ordered); - btrfs_update_inode(trans, root, inode); + ret = btrfs_ordered_update_i_size(inode, 0, ordered); + if (!ret) + btrfs_update_inode(trans, root, inode); + ret = 0; out_unlock: unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, ordered->file_offset + ordered->len - 1, From 02f57c7aedef1a537f4b16db7061cdd8efa3bb4e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 6 Apr 2011 14:25:44 -0400 Subject: [PATCH 019/200] Btrfs: don't split dio bios if we don't have to We have been unconditionally allocating a new bio and re-adding all pages from our original bio to the new bio. This is needed if our original bio is larger than our stripe size, but if it is smaller than the stripe size then there is no need to do this. So check the map length and if we are under that then go ahead and submit the original bio. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4a238d676e5b..149c77fd1eb5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6006,13 +6006,6 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, int ret = 0; int write = rw & REQ_WRITE; - bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); - if (!bio) - return -ENOMEM; - bio->bi_private = dip; - bio->bi_end_io = btrfs_end_dio_bio; - atomic_inc(&dip->pending_bios); - map_length = orig_bio->bi_size; ret = btrfs_map_block(map_tree, READ, start_sector << 9, &map_length, NULL, 0); @@ -6021,6 +6014,18 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, return -EIO; } + if (map_length >= orig_bio->bi_size) { + bio = orig_bio; + goto submit; + } + + bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); + if (!bio) + return -ENOMEM; + bio->bi_private = dip; + bio->bi_end_io = btrfs_end_dio_bio; + atomic_inc(&dip->pending_bios); + while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { if (unlikely(map_length < submit_len + bvec->bv_len || bio_add_page(bio, bvec->bv_page, bvec->bv_len, @@ -6071,6 +6076,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, } } +submit: ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, csums); if (!ret) From 1ae399382512b3e4d6c923e53da9e45935577040 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 6 Apr 2011 14:41:34 -0400 Subject: [PATCH 020/200] Btrfs: do not use async submit for small DIO io's When looking at our DIO performance Chris said that for small IO's doing the async submit stuff tends to be more overhead than it's worth. With this on top of my other fixes I get about a 17-20% speedup doing a sequential dd with 4k IO's. Basically if we don't have to split the bio for the map length it's small enough to be directly submitted, otherwise go back to the async submit. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 149c77fd1eb5..2bb76c6157a4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5957,7 +5957,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, int rw, u64 file_offset, int skip_sum, - u32 *csums) + u32 *csums, int async_submit) { int write = rw & REQ_WRITE; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -5968,13 +5968,24 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, if (ret) goto err; - if (write && !skip_sum) { + if (skip_sum) + goto map; + + if (write && async_submit) { ret = btrfs_wq_submit_bio(root->fs_info, inode, rw, bio, 0, 0, file_offset, __btrfs_submit_bio_start_direct_io, __btrfs_submit_bio_done); goto err; + } else if (write) { + /* + * If we aren't doing async submit, calculate the csum of the + * bio now. + */ + ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1); + if (ret) + goto err; } else if (!skip_sum) { ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset, csums); @@ -5982,7 +5993,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, goto err; } - ret = btrfs_map_bio(root, rw, bio, 0, 1); +map: + ret = btrfs_map_bio(root, rw, bio, 0, async_submit); err: bio_put(bio); return ret; @@ -6004,6 +6016,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, int nr_pages = 0; u32 *csums = dip->csums; int ret = 0; + int async_submit = 0; int write = rw & REQ_WRITE; map_length = orig_bio->bi_size; @@ -6019,6 +6032,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, goto submit; } + async_submit = 1; bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); if (!bio) return -ENOMEM; @@ -6039,7 +6053,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, atomic_inc(&dip->pending_bios); ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, - csums); + csums, async_submit); if (ret) { bio_put(bio); atomic_dec(&dip->pending_bios); @@ -6078,7 +6092,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, submit: ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, - csums); + csums, async_submit); if (!ret) return 0; From 16d299ac7446b5a75c5683a9ae11d7907d444c86 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 6 Apr 2011 14:53:07 -0400 Subject: [PATCH 021/200] Btrfs: reuse the extent_map we found when calling btrfs_get_extent In btrfs_get_block_direct we call btrfs_get_extent to lookup the extent for the range that we are looking for. If we don't find an extent, btrfs_get_extent will insert a extent_map for that area and mark it as a hole. So it does the job of allocating a new extent map and inserting it into the io tree. But if we're creating a new extent we free it up and redo all of that work. So instead pass the em to btrfs_new_extent_direct(), and if it will work just allocate the disk space and set it up properly and bypass the freeing/allocating of a new extent map and the expensive operation of inserting the thing into the io_tree. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2bb76c6157a4..24310c9cb14f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5445,17 +5445,30 @@ out: } static struct extent_map *btrfs_new_extent_direct(struct inode *inode, + struct extent_map *em, u64 start, u64 len) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; - struct extent_map *em; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct btrfs_key ins; u64 alloc_hint; int ret; + bool insert = false; - btrfs_drop_extent_cache(inode, start, start + len - 1, 0); + /* + * Ok if the extent map we looked up is a hole and is for the exact + * range we want, there is no reason to allocate a new one, however if + * it is not right then we need to free this one and drop the cache for + * our range. + */ + if (em->block_start != EXTENT_MAP_HOLE || em->start != start || + em->len != len) { + free_extent_map(em); + em = NULL; + insert = true; + btrfs_drop_extent_cache(inode, start, start + len - 1, 0); + } trans = btrfs_join_transaction(root, 0); if (IS_ERR(trans)) @@ -5471,10 +5484,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, goto out; } - em = alloc_extent_map(GFP_NOFS); if (!em) { - em = ERR_PTR(-ENOMEM); - goto out; + em = alloc_extent_map(GFP_NOFS); + if (!em) { + em = ERR_PTR(-ENOMEM); + goto out; + } } em->start = start; @@ -5484,9 +5499,15 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, em->block_start = ins.objectid; em->block_len = ins.offset; em->bdev = root->fs_info->fs_devices->latest_bdev; + + /* + * We need to do this because if we're using the original em we searched + * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that. + */ + em->flags = 0; set_bit(EXTENT_FLAG_PINNED, &em->flags); - while (1) { + while (insert) { write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); write_unlock(&em_tree->lock); @@ -5704,8 +5725,7 @@ must_cow: * it above */ len = bh_result->b_size; - free_extent_map(em); - em = btrfs_new_extent_direct(inode, start, len); + em = btrfs_new_extent_direct(inode, em, start, len); if (IS_ERR(em)) return PTR_ERR(em); len = min(len, em->len - (start - em->start)); From 93a54bc4c28a125978cddbe2db9e347391e3522d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 6 Apr 2011 15:11:44 -0400 Subject: [PATCH 022/200] Btrfs: check for duplicate iov_base's when doing dio reads Apparently it is ok to submit a read to an IDE device with the same target page for different offsets. This is what Windows does under qemu. The problem is under DIO we expect them to be different buffers for checksumming reasons, and so this sort of thing will result in checksum errors, when in reality the file is fine. So when reading, check to make sure that all iov bases are different, and if they aren't fall back to buffered mode, since that will work out right. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 24310c9cb14f..00d59c6a9769 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6207,6 +6207,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io unsigned long nr_segs) { int seg; + int i; size_t size; unsigned long addr; unsigned blocksize_mask = root->sectorsize - 1; @@ -6221,8 +6222,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io addr = (unsigned long)iov[seg].iov_base; size = iov[seg].iov_len; end += size; - if ((addr & blocksize_mask) || (size & blocksize_mask)) + if ((addr & blocksize_mask) || (size & blocksize_mask)) goto out; + + /* If this is a write we don't need to check anymore */ + if (rw & WRITE) + continue; + + /* + * Check to make sure we don't have duplicate iov_base's in this + * iovec, if so return EINVAL, otherwise we'll get csum errors + * when reading back. + */ + for (i = seg + 1; i < nr_segs; i++) { + if (iov[seg].iov_base == iov[i].iov_base) + goto out; + } } retval = 0; out: From e89c0d7090c54d7b11b9b091e495a1ae345dd3ff Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 5 Apr 2011 13:57:53 +0100 Subject: [PATCH 023/200] ARM: 6864/1: hw_breakpoint: clear DBGVCR out of reset The DBGVCR, used for configuring vector catch debug events, is UNKNOWN out of reset on ARMv7. When enabling monitor mode, this must be zeroed to avoid UNPREDICTABLE behaviour. This patch adds the zeroing code to the debug reset path. Cc: stable Reported-by: Stepan Moskovchenko Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/hw_breakpoint.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 8dbc126f7152..87acc25d7a3e 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -868,6 +868,13 @@ static void reset_ctrl_regs(void *info) */ asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0)); isb(); + + /* + * Clear any configured vector-catch events before + * enabling monitor mode. + */ + asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0)); + isb(); } if (enable_monitor_mode()) From 6759788b944139793bffa889761cc3d8d703fdc0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 5 Apr 2011 14:01:24 +0100 Subject: [PATCH 024/200] ARM: 6865/1: perf: ensure pass through zero is counted on overflow Commit a737823d ("ARM: perf: ensure overflows aren't missed due to IRQ latency") changed the way that event deltas are calculated on overflow so that we don't miss events when the new count value overtakes the previous one. Unfortunately, we forget to count the event that passes through zero so we end up being off by 1. This patch adds on the correction. Reported-by: Chris Moore Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 69cfee0fe00f..979da3947f42 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -221,7 +221,7 @@ again: prev_raw_count &= armpmu->max_period; if (overflow) - delta = armpmu->max_period - prev_raw_count + new_raw_count; + delta = armpmu->max_period - prev_raw_count + new_raw_count + 1; else delta = new_raw_count - prev_raw_count; From 974508262e94b567f9d5b7ba1eef9fc493561f63 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 6 Apr 2011 16:14:25 +0100 Subject: [PATCH 025/200] ARM: 6866/1: Do not restrict HIGHPTE to !OUTER_CACHE The HIGHPTE config option depends on !OUTER_CACHE. However, there is no set_pte_ext() function that does outer cache maintenance by physical address, hence no need for such restriction. Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9954c9bef679..e8cbecc60fbe 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1540,7 +1540,6 @@ config HIGHMEM config HIGHPTE bool "Allocate 2nd-level pagetables from highmem" depends on HIGHMEM - depends on !OUTER_CACHE config HW_PERF_EVENTS bool "Enable hardware performance counter support for perf events" From 2e82669acf03e5bf2080f5d3ef005168e67d8a51 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 6 Apr 2011 16:16:29 +0100 Subject: [PATCH 026/200] ARM: 6867/1: Introduce THREAD_NOTIFY_COPY for copy_thread() hooks This patch adds THREAD_NOTIFY_COPY for calling registered handlers during the copy_thread() function call. It also changes the VFP handler to use a switch statement rather than if..else and ignore this event. Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/include/asm/thread_notify.h | 1 + arch/arm/kernel/process.c | 2 ++ arch/arm/vfp/vfpmodule.c | 22 +++++++++++++++------- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/arm/include/asm/thread_notify.h b/arch/arm/include/asm/thread_notify.h index c4391ba20350..1dc980675894 100644 --- a/arch/arm/include/asm/thread_notify.h +++ b/arch/arm/include/asm/thread_notify.h @@ -43,6 +43,7 @@ static inline void thread_notify(unsigned long rc, struct thread_info *thread) #define THREAD_NOTIFY_FLUSH 0 #define THREAD_NOTIFY_EXIT 1 #define THREAD_NOTIFY_SWITCH 2 +#define THREAD_NOTIFY_COPY 3 #endif #endif diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 94bbedbed639..5e1e54197227 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -372,6 +372,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, if (clone_flags & CLONE_SETTLS) thread->tp_value = regs->ARM_r3; + thread_notify(THREAD_NOTIFY_COPY, thread); + return 0; } diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index bbf3da012afd..1c88bbdfd34d 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -104,12 +104,17 @@ static void vfp_thread_exit(struct thread_info *thread) static int vfp_notifier(struct notifier_block *self, unsigned long cmd, void *v) { struct thread_info *thread = v; + u32 fpexc; +#ifdef CONFIG_SMP + unsigned int cpu; +#endif - if (likely(cmd == THREAD_NOTIFY_SWITCH)) { - u32 fpexc = fmrx(FPEXC); + switch (cmd) { + case THREAD_NOTIFY_SWITCH: + fpexc = fmrx(FPEXC); #ifdef CONFIG_SMP - unsigned int cpu = thread->cpu; + cpu = thread->cpu; /* * On SMP, if VFP is enabled, save the old state in @@ -134,13 +139,16 @@ static int vfp_notifier(struct notifier_block *self, unsigned long cmd, void *v) * old state. */ fmxr(FPEXC, fpexc & ~FPEXC_EN); - return NOTIFY_DONE; - } + break; - if (cmd == THREAD_NOTIFY_FLUSH) + case THREAD_NOTIFY_FLUSH: vfp_thread_flush(thread); - else + break; + + case THREAD_NOTIFY_EXIT: vfp_thread_exit(thread); + break; + } return NOTIFY_DONE; } From c98c09773d80db93cae349f0496fef109feab54d Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 6 Apr 2011 16:17:17 +0100 Subject: [PATCH 027/200] ARM: 6868/1: Preserve the VFP state during fork VFP registers d16-d31 are callee saved registers and must be preserved during function calls, including fork(). The VFP configuration should also be preserved. The patch copies the full VFP state to the child process. Reported-by: Paul Wright Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/vfp/vfpmodule.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 1c88bbdfd34d..f74695075e64 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -78,6 +78,14 @@ static void vfp_thread_exit(struct thread_info *thread) put_cpu(); } +static void vfp_thread_copy(struct thread_info *thread) +{ + struct thread_info *parent = current_thread_info(); + + vfp_sync_hwstate(parent); + thread->vfpstate = parent->vfpstate; +} + /* * When this function is called with the following 'cmd's, the following * is true while this function is being run: @@ -148,6 +156,10 @@ static int vfp_notifier(struct notifier_block *self, unsigned long cmd, void *v) case THREAD_NOTIFY_EXIT: vfp_thread_exit(thread); break; + + case THREAD_NOTIFY_COPY: + vfp_thread_copy(thread); + break; } return NOTIFY_DONE; From aec995900fbc8cffa9f0f9e797ef07a0beb2b079 Mon Sep 17 00:00:00 2001 From: "Justin P. Mattock" Date: Fri, 8 Apr 2011 16:46:12 +0100 Subject: [PATCH 028/200] ARM: 6872/1: arch:common:Makefile Remove unused config in the Makefile. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patch below removes an unused config variable found by using a kernel cleanup script. Signed-off-by: Justin P. Mattock Acked-by: Uwe Kleine-König Acked-by: Jean-Christophe PLAGNIOL-VILLARD Signed-off-by: Russell King --- arch/arm/common/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index e7521bca2c35..6ea9b6f3607a 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -16,5 +16,4 @@ obj-$(CONFIG_SHARP_SCOOP) += scoop.o obj-$(CONFIG_ARCH_IXP2000) += uengine.o obj-$(CONFIG_ARCH_IXP23XX) += uengine.o obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o -obj-$(CONFIG_COMMON_CLKDEV) += clkdev.o obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o From e566b76ed30768140df8f0023904aed5a41244f7 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 6 Apr 2011 02:54:54 +0200 Subject: [PATCH 029/200] perf_event: Fix cgrp event scheduling bug in perf_enable_on_exec() There is a bug in perf_event_enable_on_exec() when cgroup events are active on a CPU: the cgroup events may be scheduled twice causing event state corruptions which eventually may lead to kernel panics. The reason is that the function needs to first schedule out the cgroup events, just like for the per-thread events. The cgroup event are scheduled back in automatically from the perf_event_context_sched_in() function. The patch also adds a WARN_ON_ONCE() is perf_cgroup_switch() to catch any bogus state. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110406005454.GA1062@quad Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 27960f114efd..8e81a9860a0d 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -364,6 +364,7 @@ void perf_cgroup_switch(struct task_struct *task, int mode) } if (mode & PERF_CGROUP_SWIN) { + WARN_ON_ONCE(cpuctx->cgrp); /* set cgrp before ctxsw in to * allow event_filter_match() to not * have to pass task around @@ -2423,6 +2424,14 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) if (!ctx || !ctx->nr_events) goto out; + /* + * We must ctxsw out cgroup events to avoid conflict + * when invoking perf_task_event_sched_in() later on + * in this function. Otherwise we end up trying to + * ctxswin cgroup events which are already scheduled + * in. + */ + perf_cgroup_sched_out(current); task_ctx_sched_out(ctx, EVENT_ALL); raw_spin_lock(&ctx->lock); @@ -2447,6 +2456,9 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) raw_spin_unlock(&ctx->lock); + /* + * Also calls ctxswin for cgroup events, if any: + */ perf_event_context_sched_in(ctx, ctx->task); out: local_irq_restore(flags); From 109b81296c63228578d4760794d8dd46e02eddfb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 11 Apr 2011 14:13:10 +0200 Subject: [PATCH 030/200] block: splice plug list to local context If the request_fn ends up blocking, we could be re-entering the plug flush. Since the list is protected by explicitly not allowing schedule events, this isn't a terribly good idea. Additionally, it can cause us to recurse. As request_fn called by __blk_run_queue is allowed to 'schedule()' (after dropping the queue lock of course), it is possible to get a recursive call: schedule -> blk_flush_plug -> __blk_finish_plug -> flush_plug_list -> __blk_run_queue -> request_fn -> schedule We must make sure that the second schedule does not call into blk_flush_plug again. So instead of leaving the list of requests on blk_plug->list, move them to a separate list leaving blk_plug->list empty. Signed-off-by: Jens Axboe --- block/blk-core.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 90f22cc30799..eeaca0998df5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2673,19 +2673,24 @@ static void flush_plug_list(struct blk_plug *plug) struct request_queue *q; unsigned long flags; struct request *rq; + LIST_HEAD(list); BUG_ON(plug->magic != PLUG_MAGIC); if (list_empty(&plug->list)) return; - if (plug->should_sort) - list_sort(NULL, &plug->list, plug_rq_cmp); + list_splice_init(&plug->list, &list); + + if (plug->should_sort) { + list_sort(NULL, &list, plug_rq_cmp); + plug->should_sort = 0; + } q = NULL; local_irq_save(flags); - while (!list_empty(&plug->list)) { - rq = list_entry_rq(plug->list.next); + while (!list_empty(&list)) { + rq = list_entry_rq(list.next); list_del_init(&rq->queuelist); BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG)); BUG_ON(!rq->q); @@ -2713,7 +2718,6 @@ static void flush_plug_list(struct blk_plug *plug) spin_unlock(q->queue_lock); } - BUG_ON(!list_empty(&plug->list)); local_irq_restore(flags); } From 30d746c68025ee69ac17219aacc9b1614d951f01 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 7 Apr 2011 14:13:15 +0200 Subject: [PATCH 031/200] x86/ce4100: Add reg property to bridges without the reg property Ben's new code won't find the PCI & ISA bridge and the devices won't get the DT-node attached. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Thomas Gleixner Cc: davem@davemloft.net Cc: monstr@monstr.eu Cc: Benjamin Herrenschmidt Link: http://lkml.kernel.org/r/20110407121315.GA9204@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/platform/ce4100/falconfalls.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts index dc701ea58546..2d6d226f2b10 100644 --- a/arch/x86/platform/ce4100/falconfalls.dts +++ b/arch/x86/platform/ce4100/falconfalls.dts @@ -74,6 +74,7 @@ compatible = "intel,ce4100-pci", "pci"; device_type = "pci"; bus-range = <1 1>; + reg = <0x0800 0x0 0x0 0x0 0x0>; ranges = <0x2000000 0 0xdffe0000 0x2000000 0 0xdffe0000 0 0x1000>; interrupt-parent = <&ioapic2>; @@ -412,6 +413,7 @@ #address-cells = <2>; #size-cells = <1>; compatible = "isa"; + reg = <0xf800 0x0 0x0 0x0 0x0>; ranges = <1 0 0 0 0 0x100>; rtc@70 { From 8fb27640d0e2b43c5584bf0087431b7b8d3c319a Mon Sep 17 00:00:00 2001 From: Yoshinori Sano Date: Sat, 9 Apr 2011 02:30:07 +0000 Subject: [PATCH 032/200] Btrfs: fix memory leaks in btrfs_new_inode() This patch fixes memory leaks in btrfs_new_inode(). Signed-off-by: Yoshinori Sano Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cc6022842e0c..2d1208f964eb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4526,14 +4526,17 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BUG_ON(!path); inode = new_inode(root->fs_info->sb); - if (!inode) + if (!inode) { + btrfs_free_path(path); return ERR_PTR(-ENOMEM); + } if (dir) { trace_btrfs_inode_request(dir); ret = btrfs_set_inode_index(dir, index); if (ret) { + btrfs_free_path(path); iput(inode); return ERR_PTR(ret); } From 3387206f26e1b48703e810175b98611a4fd8e8ea Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Mon, 11 Apr 2011 21:52:52 +0000 Subject: [PATCH 033/200] btrfs: properly handle overlapping areas in memmove_extent_buffer Fix data corruption caused by memcpy() usage on overlapping data. I've observed it first when found out usermode linux crash on btrfs. ?all chain is the following: ------------[ cut here ]------------ WARNING: at /home/slyfox/linux-2.6/fs/btrfs/extent_io.c:3900 memcpy_extent_buffer+0x1a5/0x219() Call Trace: 6fa39a58: [<601b495e>] _raw_spin_unlock_irqrestore+0x18/0x1c 6fa39a68: [<60029ad9>] warn_slowpath_common+0x59/0x70 6fa39aa8: [<60029b05>] warn_slowpath_null+0x15/0x17 6fa39ab8: [<600efc97>] memcpy_extent_buffer+0x1a5/0x219 6fa39b48: [<600efd9f>] memmove_extent_buffer+0x94/0x208 6fa39bc8: [<600becbf>] btrfs_del_items+0x214/0x473 6fa39c78: [<600ce1b0>] btrfs_delete_one_dir_name+0x7c/0xda 6fa39cc8: [<600dad6b>] __btrfs_unlink_inode+0xad/0x25d 6fa39d08: [<600d7864>] btrfs_start_transaction+0xe/0x10 6fa39d48: [<600dc9ff>] btrfs_unlink_inode+0x1b/0x3b 6fa39d78: [<600e04bc>] btrfs_unlink+0x70/0xef 6fa39dc8: [<6007f0d0>] vfs_unlink+0x58/0xa3 6fa39df8: [<60080278>] do_unlinkat+0xd4/0x162 6fa39e48: [<600517db>] call_rcu_sched+0xe/0x10 6fa39e58: [<600452a8>] __put_cred+0x58/0x5a 6fa39e78: [<6007446c>] sys_faccessat+0x154/0x166 6fa39ed8: [<60080317>] sys_unlink+0x11/0x13 6fa39ee8: [<60016b80>] handle_syscall+0x58/0x70 6fa39f08: [<60021377>] userspace+0x2d4/0x381 6fa39fc8: [<60014507>] fork_handler+0x62/0x69 ---[ end trace 70b0ca2ef0266b93 ]--- http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg09302.html Signed-off-by: Sergei Trofimovich Reviewed-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 77c65a0bea34..864e0496cc1c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3885,6 +3885,12 @@ static void move_pages(struct page *dst_page, struct page *src_page, kunmap_atomic(dst_kaddr, KM_USER0); } +static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) +{ + unsigned long distance = (src > dst) ? src - dst : dst - src; + return distance < len; +} + static void copy_pages(struct page *dst_page, struct page *src_page, unsigned long dst_off, unsigned long src_off, unsigned long len) @@ -3892,10 +3898,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page, char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); char *src_kaddr; - if (dst_page != src_page) + if (dst_page != src_page) { src_kaddr = kmap_atomic(src_page, KM_USER1); - else + } else { src_kaddr = dst_kaddr; + BUG_ON(areas_overlap(src_off, dst_off, len)); + } memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); kunmap_atomic(dst_kaddr, KM_USER0); @@ -3970,7 +3978,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, "len %lu len %lu\n", dst_offset, len, dst->len); BUG_ON(1); } - if (dst_offset < src_offset) { + if (!areas_overlap(src_offset, dst_offset, len)) { memcpy_extent_buffer(dst, dst_offset, src_offset, len); return; } From a1b75f7d961955e697ec377f90115e3517df98f9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 8 Apr 2011 15:51:18 +0000 Subject: [PATCH 034/200] Btrfs: check for duplicate iov_base's when doing dio reads Apparently it is ok to submit a read to an IDE device with the same target page for different offsets. This is what Windows does under qemu. The problem is under DIO we expect them to be different buffers for checksumming reasons, and so this sort of thing will result in checksum errors, when in reality the file is fine. So when reading, check to make sure that all iov bases are different, and if they aren't fall back to buffered mode, since that will work out right. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2d1208f964eb..edafc28883af 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6153,6 +6153,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io unsigned long nr_segs) { int seg; + int i; size_t size; unsigned long addr; unsigned blocksize_mask = root->sectorsize - 1; @@ -6167,8 +6168,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io addr = (unsigned long)iov[seg].iov_base; size = iov[seg].iov_len; end += size; - if ((addr & blocksize_mask) || (size & blocksize_mask)) + if ((addr & blocksize_mask) || (size & blocksize_mask)) goto out; + + /* If this is a write we don't need to check anymore */ + if (rw & WRITE) + continue; + + /* + * Check to make sure we don't have duplicate iov_base's in this + * iovec, if so return EINVAL, otherwise we'll get csum errors + * when reading back. + */ + for (i = seg + 1; i < nr_segs; i++) { + if (iov[seg].iov_base == iov[i].iov_base) + goto out; + } } retval = 0; out: From 13f2696f1da9700d401db0ac2bc27ebc17068b22 Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Mon, 11 Apr 2011 15:56:31 +0000 Subject: [PATCH 035/200] fix user annotation in ioctl.c Fix address space annotation correct in ioctl.c. Signed-off-by: Daniel J Blueman BTRFS_BLOCK_GROUP_SYSTEM, @@ -2387,7 +2387,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) up_read(&info->groups_sem); } - user_dest = (struct btrfs_ioctl_space_info *) + user_dest = (struct btrfs_ioctl_space_info __user *) (arg + sizeof(struct btrfs_ioctl_space_args)); if (copy_to_user(user_dest, dest_orig, alloc_size)) Reviewed-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f9c93a9ed4a7..f580a3a5d2fc 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2287,7 +2287,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) struct btrfs_ioctl_space_info space; struct btrfs_ioctl_space_info *dest; struct btrfs_ioctl_space_info *dest_orig; - struct btrfs_ioctl_space_info *user_dest; + struct btrfs_ioctl_space_info __user *user_dest; struct btrfs_space_info *info; u64 types[] = {BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_SYSTEM, From e15d0542426f063dc53b4c51bdfc11e0bbe4d298 Mon Sep 17 00:00:00 2001 From: Xin Zhong Date: Wed, 6 Apr 2011 07:33:51 +0000 Subject: [PATCH 036/200] Btrfs: fix subvolume mount by name problem when default mount subvolume is set We create two subvolumes (meego_root and meego_home) in btrfs root directory. And set meego_root as default mount subvolume. After we remount btrfs, meego_root is mounted to top directory by default. Then when we try to mount meego_home (subvol=meego_home) to a subdirectory, it failed. The problem is when default mount subvolume is set to meego_root, we search meego_home in meego_root but can not find it. So the solution is to add a new mount option (subvolrootid) to specify subvol id of root and search subvol name in it. For our case, now we can use "-o subvolrootid=0,subvol=meego_home) to mount meego_home. Detail information can be found in meego bugzilla: https://bugs.meego.com/show_bug.cgi?id=15055 Signed-off-by: Zhong, Xin Signed-off-by: Chris Mason --- fs/btrfs/super.c | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 58e7de9cc90c..0ac712efcdf2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -159,7 +159,7 @@ enum { Opt_compress_type, Opt_compress_force, Opt_compress_force_type, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, - Opt_enospc_debug, Opt_err, + Opt_enospc_debug, Opt_subvolrootid, Opt_err, }; static match_table_t tokens = { @@ -189,6 +189,7 @@ static match_table_t tokens = { {Opt_clear_cache, "clear_cache"}, {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, {Opt_enospc_debug, "enospc_debug"}, + {Opt_subvolrootid, "subvolrootid=%d"}, {Opt_err, NULL}, }; @@ -232,6 +233,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) break; case Opt_subvol: case Opt_subvolid: + case Opt_subvolrootid: case Opt_device: /* * These are parsed by btrfs_parse_early_options @@ -388,7 +390,7 @@ out: */ static int btrfs_parse_early_options(const char *options, fmode_t flags, void *holder, char **subvol_name, u64 *subvol_objectid, - struct btrfs_fs_devices **fs_devices) + u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices) { substring_t args[MAX_OPT_ARGS]; char *opts, *orig, *p; @@ -429,6 +431,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, *subvol_objectid = intarg; } break; + case Opt_subvolrootid: + intarg = 0; + error = match_int(&args[0], &intarg); + if (!error) { + /* we want the original fs_tree */ + if (!intarg) + *subvol_rootid = + BTRFS_FS_TREE_OBJECTID; + else + *subvol_rootid = intarg; + } + break; case Opt_device: error = btrfs_scan_one_device(match_strdup(&args[0]), flags, holder, fs_devices); @@ -736,6 +750,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, fmode_t mode = FMODE_READ; char *subvol_name = NULL; u64 subvol_objectid = 0; + u64 subvol_rootid = 0; int error = 0; if (!(flags & MS_RDONLY)) @@ -743,7 +758,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, error = btrfs_parse_early_options(data, mode, fs_type, &subvol_name, &subvol_objectid, - &fs_devices); + &subvol_rootid, &fs_devices); if (error) return ERR_PTR(error); @@ -807,15 +822,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, s->s_flags |= MS_ACTIVE; } - root = get_default_root(s, subvol_objectid); - if (IS_ERR(root)) { - error = PTR_ERR(root); - deactivate_locked_super(s); - goto error_free_subvol_name; - } /* if they gave us a subvolume name bind mount into that */ if (strcmp(subvol_name, ".")) { struct dentry *new_root; + + root = get_default_root(s, subvol_rootid); + if (IS_ERR(root)) { + error = PTR_ERR(root); + deactivate_locked_super(s); + goto error_free_subvol_name; + } + mutex_lock(&root->d_inode->i_mutex); new_root = lookup_one_len(subvol_name, root, strlen(subvol_name)); @@ -836,6 +853,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, } dput(root); root = new_root; + } else { + root = get_default_root(s, subvol_objectid); + if (IS_ERR(root)) { + error = PTR_ERR(root); + deactivate_locked_super(s); + goto error_free_subvol_name; + } } kfree(subvol_name); From 13c5a93e7005d7dae0b6d070d25203593e692d13 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Apr 2011 15:45:29 -0400 Subject: [PATCH 037/200] Btrfs: avoid taking the trans_mutex in btrfs_end_transaction I've been working on making our O_DIRECT latency not suck and I noticed we were taking the trans_mutex in btrfs_end_transaction. So to do this we convert num_writers and use_count to atomic_t's and just decrement them in btrfs_end_transaction. Instead of deleting the transaction from the trans list in put_transaction we do that in btrfs_commit_transaction() since that's the only time it actually needs to be removed from the list. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/transaction.c | 37 ++++++++++++++++--------------------- fs/btrfs/transaction.h | 4 ++-- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a272bfd74ea0..ef6865c17cd6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3136,7 +3136,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) btrfs_destroy_pinned_extent(root, root->fs_info->pinned_extents); - t->use_count = 0; + atomic_set(&t->use_count, 0); list_del_init(&t->list); memset(t, 0, sizeof(*t)); kmem_cache_free(btrfs_transaction_cachep, t); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4583008217e6..c571734d5e5a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -32,10 +32,8 @@ static noinline void put_transaction(struct btrfs_transaction *transaction) { - WARN_ON(transaction->use_count == 0); - transaction->use_count--; - if (transaction->use_count == 0) { - list_del_init(&transaction->list); + WARN_ON(atomic_read(&transaction->use_count) == 0); + if (atomic_dec_and_test(&transaction->use_count)) { memset(transaction, 0, sizeof(*transaction)); kmem_cache_free(btrfs_transaction_cachep, transaction); } @@ -60,14 +58,14 @@ static noinline int join_transaction(struct btrfs_root *root) if (!cur_trans) return -ENOMEM; root->fs_info->generation++; - cur_trans->num_writers = 1; + atomic_set(&cur_trans->num_writers, 1); cur_trans->num_joined = 0; cur_trans->transid = root->fs_info->generation; init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); cur_trans->in_commit = 0; cur_trans->blocked = 0; - cur_trans->use_count = 1; + atomic_set(&cur_trans->use_count, 1); cur_trans->commit_done = 0; cur_trans->start_time = get_seconds(); @@ -88,7 +86,7 @@ static noinline int join_transaction(struct btrfs_root *root) root->fs_info->running_transaction = cur_trans; spin_unlock(&root->fs_info->new_trans_lock); } else { - cur_trans->num_writers++; + atomic_inc(&cur_trans->num_writers); cur_trans->num_joined++; } @@ -145,7 +143,7 @@ static void wait_current_trans(struct btrfs_root *root) cur_trans = root->fs_info->running_transaction; if (cur_trans && cur_trans->blocked) { DEFINE_WAIT(wait); - cur_trans->use_count++; + atomic_inc(&cur_trans->use_count); while (1) { prepare_to_wait(&root->fs_info->transaction_wait, &wait, TASK_UNINTERRUPTIBLE); @@ -205,7 +203,7 @@ again: } cur_trans = root->fs_info->running_transaction; - cur_trans->use_count++; + atomic_inc(&cur_trans->use_count); if (type != TRANS_JOIN_NOLOCK) mutex_unlock(&root->fs_info->trans_mutex); @@ -336,7 +334,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) goto out_unlock; /* nothing committing|committed */ } - cur_trans->use_count++; + atomic_inc(&cur_trans->use_count); mutex_unlock(&root->fs_info->trans_mutex); wait_for_commit(root, cur_trans); @@ -466,18 +464,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, wake_up_process(info->transaction_kthread); } - if (lock) - mutex_lock(&info->trans_mutex); WARN_ON(cur_trans != info->running_transaction); - WARN_ON(cur_trans->num_writers < 1); - cur_trans->num_writers--; + WARN_ON(atomic_read(&cur_trans->num_writers) < 1); + atomic_dec(&cur_trans->num_writers); smp_mb(); if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); put_transaction(cur_trans); - if (lock) - mutex_unlock(&info->trans_mutex); if (current->journal_info == trans) current->journal_info = NULL; @@ -1187,7 +1181,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, /* take transaction reference */ mutex_lock(&root->fs_info->trans_mutex); cur_trans = trans->transaction; - cur_trans->use_count++; + atomic_inc(&cur_trans->use_count); mutex_unlock(&root->fs_info->trans_mutex); btrfs_end_transaction(trans, root); @@ -1246,7 +1240,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->trans_mutex); if (cur_trans->in_commit) { - cur_trans->use_count++; + atomic_inc(&cur_trans->use_count); mutex_unlock(&root->fs_info->trans_mutex); btrfs_end_transaction(trans, root); @@ -1268,7 +1262,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, prev_trans = list_entry(cur_trans->list.prev, struct btrfs_transaction, list); if (!prev_trans->commit_done) { - prev_trans->use_count++; + atomic_inc(&prev_trans->use_count); mutex_unlock(&root->fs_info->trans_mutex); wait_for_commit(root, prev_trans); @@ -1309,14 +1303,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, TASK_UNINTERRUPTIBLE); smp_mb(); - if (cur_trans->num_writers > 1) + if (atomic_read(&cur_trans->num_writers) > 1) schedule_timeout(MAX_SCHEDULE_TIMEOUT); else if (should_grow) schedule_timeout(1); mutex_lock(&root->fs_info->trans_mutex); finish_wait(&cur_trans->writer_wait, &wait); - } while (cur_trans->num_writers > 1 || + } while (atomic_read(&cur_trans->num_writers) > 1 || (should_grow && cur_trans->num_joined != joined)); ret = create_pending_snapshots(trans, root->fs_info); @@ -1403,6 +1397,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wake_up(&cur_trans->commit_wait); + list_del_init(&cur_trans->list); put_transaction(cur_trans); put_transaction(cur_trans); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 229a594cacd5..e441acc6c584 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -27,11 +27,11 @@ struct btrfs_transaction { * total writers in this transaction, it must be zero before the * transaction can end */ - unsigned long num_writers; + atomic_t num_writers; unsigned long num_joined; int in_commit; - int use_count; + atomic_t use_count; int commit_done; int blocked; struct list_head list; From 507903b81840a70cc6a179d4eb03584ad50e8c5b Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Wed, 6 Apr 2011 10:02:20 +0000 Subject: [PATCH 038/200] btrfs: using cached extent_state in set/unlock combinations In several places the sequence (set_extent_uptodate, unlock_extent) is used. This leads to a duplicate lookup of the extent state. This patch lets set_extent_uptodate return a cached extent_state which can be passed to unlock_extent_cached. The occurences of the above sequences are updated to use the cache. Only end_bio_extent_readpage is updated that it first gets a cached state to pass it to the readpage_end_io_hook as the prototype requested and is later on being used for set/unlock. Signed-off-by: Arne Jansen Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 70 +++++++++++++++++++++++++++++++++----------- fs/btrfs/extent_io.h | 2 +- fs/btrfs/inode.c | 2 +- 3 files changed, 55 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 864e0496cc1c..8dcfb77678de 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -690,6 +690,17 @@ static void cache_state(struct extent_state *state, } } +static void uncache_state(struct extent_state **cached_ptr) +{ + if (cached_ptr && (*cached_ptr)) { + struct extent_state *state = *cached_ptr; + if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) { + *cached_ptr = NULL; + free_extent_state(state); + } + } +} + /* * set some bits on a range in the tree. This may require allocations or * sleeping, so the gfp mask is used to indicate what is allowed. @@ -940,10 +951,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, } int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) + struct extent_state **cached_state, gfp_t mask) { - return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, - NULL, mask); + return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, + NULL, cached_state, mask); } static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, @@ -1012,8 +1023,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, mask); } -int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) +int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, mask); @@ -1735,6 +1745,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) do { struct page *page = bvec->bv_page; + struct extent_state *cached = NULL; + struct extent_state *state; + tree = &BTRFS_I(page->mapping->host)->io_tree; start = ((u64)page->index << PAGE_CACHE_SHIFT) + @@ -1749,9 +1762,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err) if (++bvec <= bvec_end) prefetchw(&bvec->bv_page->flags); + spin_lock(&tree->lock); + state = find_first_extent_bit_state(tree, start, 0); + if (state) { + /* + * take a reference on the state, unlock will drop + * the ref + */ + cache_state(state, &cached); + } + spin_unlock(&tree->lock); + if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { ret = tree->ops->readpage_end_io_hook(page, start, end, - NULL); + state); if (ret) uptodate = 0; } @@ -1764,15 +1788,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err) test_bit(BIO_UPTODATE, &bio->bi_flags); if (err) uptodate = 0; + uncache_state(&cached); continue; } } if (uptodate) { - set_extent_uptodate(tree, start, end, + set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC); } - unlock_extent(tree, start, end, GFP_ATOMIC); + unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); if (whole_page) { if (uptodate) { @@ -1811,6 +1836,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) do { struct page *page = bvec->bv_page; + struct extent_state *cached = NULL; tree = &BTRFS_I(page->mapping->host)->io_tree; start = ((u64)page->index << PAGE_CACHE_SHIFT) + @@ -1821,13 +1847,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) prefetchw(&bvec->bv_page->flags); if (uptodate) { - set_extent_uptodate(tree, start, end, GFP_ATOMIC); + set_extent_uptodate(tree, start, end, &cached, + GFP_ATOMIC); } else { ClearPageUptodate(page); SetPageError(page); } - unlock_extent(tree, start, end, GFP_ATOMIC); + unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); } while (bvec >= bio->bi_io_vec); @@ -2016,14 +2043,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree, while (cur <= end) { if (cur >= last_byte) { char *userpage; + struct extent_state *cached = NULL; + iosize = PAGE_CACHE_SIZE - page_offset; userpage = kmap_atomic(page, KM_USER0); memset(userpage + page_offset, 0, iosize); flush_dcache_page(page); kunmap_atomic(userpage, KM_USER0); set_extent_uptodate(tree, cur, cur + iosize - 1, - GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + &cached, GFP_NOFS); + unlock_extent_cached(tree, cur, cur + iosize - 1, + &cached, GFP_NOFS); break; } em = get_extent(inode, page, page_offset, cur, @@ -2063,14 +2093,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree, /* we've found a hole, just zero and go on */ if (block_start == EXTENT_MAP_HOLE) { char *userpage; + struct extent_state *cached = NULL; + userpage = kmap_atomic(page, KM_USER0); memset(userpage + page_offset, 0, iosize); flush_dcache_page(page); kunmap_atomic(userpage, KM_USER0); set_extent_uptodate(tree, cur, cur + iosize - 1, - GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + &cached, GFP_NOFS); + unlock_extent_cached(tree, cur, cur + iosize - 1, + &cached, GFP_NOFS); cur = cur + iosize; page_offset += iosize; continue; @@ -2789,9 +2822,12 @@ int extent_prepare_write(struct extent_io_tree *tree, iocount++; block_start = block_start + iosize; } else { - set_extent_uptodate(tree, block_start, cur_end, + struct extent_state *cached = NULL; + + set_extent_uptodate(tree, block_start, cur_end, &cached, GFP_NOFS); - unlock_extent(tree, block_start, cur_end, GFP_NOFS); + unlock_extent_cached(tree, block_start, cur_end, + &cached, GFP_NOFS); block_start = cur_end + 1; } page_offset = block_start & (PAGE_CACHE_SIZE - 1); @@ -3457,7 +3493,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - GFP_NOFS); + NULL, GFP_NOFS); for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index f62c5442835d..af2d7179c372 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -208,7 +208,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, int exclusive_bits, u64 *failed_start, struct extent_state **cached_state, gfp_t mask); int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask); + struct extent_state **cached_state, gfp_t mask); int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index edafc28883af..5a993e0ec865 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5226,7 +5226,7 @@ again: btrfs_mark_buffer_dirty(leaf); } set_extent_uptodate(io_tree, em->start, - extent_map_end(em) - 1, GFP_NOFS); + extent_map_end(em) - 1, NULL, GFP_NOFS); goto insert; } else { printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); From 9d90e49da57fe73a2f35334fdd2fb60dbf3933ed Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 8 Apr 2011 11:23:00 -0700 Subject: [PATCH 039/200] x86/mrst: Fix boot crash caused by incorrect pin to irq mapping Moorestown systems crash on boot because the secondary CPU clockevent (apbt1) will fail to request irq#1, which does not have ioapic chip in its irq_desc[] entry. Background: Moorestown platform does not have ISA bus nor legacy IRQs. It reuses the range of legacy IRQs for regular device interrupts. The routing information of early system device IRQs (timers) are obtained from firmware provided SFI tables. We reuse/fake MP configuration table to facilitate IRQ setup with IOAPIC. Maintaining a 1:1 mapping of IOAPIC pin (RTE entry) and IRQ# makes routing information clean and easy to understand on Moorestown. Though optional. This patch allows SFI timer and vRTC IRQ to be treated as ISA IRQ so that pin2irq mapping will be 1:1. Also fixed MP table type and use macros to clearly set MP IRQ entries. As a result, apbt timer and RTC interrupts on Moorestown are within legacy IRQ range: # cat /proc/interrupts CPU0 CPU1 0: 11249 0 IO-APIC-edge apbt0 1: 0 12271 IO-APIC-edge apbt1 8: 887 0 IO-APIC-fasteoi dw_spi 13: 0 0 IO-APIC-fasteoi INTEL_MID_DMAC2 14: 0 0 IO-APIC-fasteoi rtc0 Further discussion of this patch can be found at: https://lkml.org/lkml/2010/6/10/70 Suggested-by: "Eric W. Biederman" Signed-off-by: Jacob Pan Cc: Feng Tang Cc: Alan Cox Cc: Arjan van de Ven Link: http://lkml.kernel.org/r/1302286980-21139-1-git-send-email-jacob.jun.pan@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/mrst/mrst.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index 5c0207bf959b..275dbc19e2cf 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c @@ -97,11 +97,11 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table) pentry->freq_hz, pentry->irq); if (!pentry->irq) continue; - mp_irq.type = MP_IOAPIC; + mp_irq.type = MP_INTSRC; mp_irq.irqtype = mp_INT; /* triggering mode edge bit 2-3, active high polarity bit 0-1 */ mp_irq.irqflag = 5; - mp_irq.srcbus = 0; + mp_irq.srcbus = MP_BUS_ISA; mp_irq.srcbusirq = pentry->irq; /* IRQ */ mp_irq.dstapic = MP_APIC_ALL; mp_irq.dstirq = pentry->irq; @@ -168,10 +168,10 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table) for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n", totallen, (u32)pentry->phys_addr, pentry->irq); - mp_irq.type = MP_IOAPIC; + mp_irq.type = MP_INTSRC; mp_irq.irqtype = mp_INT; mp_irq.irqflag = 0xf; /* level trigger and active low */ - mp_irq.srcbus = 0; + mp_irq.srcbus = MP_BUS_ISA; mp_irq.srcbusirq = pentry->irq; /* IRQ */ mp_irq.dstapic = MP_APIC_ALL; mp_irq.dstirq = pentry->irq; @@ -282,7 +282,7 @@ void __init x86_mrst_early_setup(void) /* Avoid searching for BIOS MP tables */ x86_init.mpparse.find_smp_config = x86_init_noop; x86_init.mpparse.get_smp_config = x86_init_uint_noop; - + set_bit(MP_BUS_ISA, mp_bus_not_pci); } /* From c68b2081250a87ba9b9a173faa5a600cc684602e Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 11 Apr 2011 23:50:02 -0700 Subject: [PATCH 040/200] Input: twl4030_keypad - fix potential NULL dereference in twl4030_kp_probe() We should first check whether platform data is NULL or not, before dereferencing it to get the keymap. Signed-off-by: Axel Lin Reviewed-by: Felipe Balbi Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/twl4030_keypad.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/input/keyboard/twl4030_keypad.c b/drivers/input/keyboard/twl4030_keypad.c index cc06c4b2f920..a26922cf0e84 100644 --- a/drivers/input/keyboard/twl4030_keypad.c +++ b/drivers/input/keyboard/twl4030_keypad.c @@ -332,7 +332,7 @@ static int __devinit twl4030_kp_program(struct twl4030_keypad *kp) static int __devinit twl4030_kp_probe(struct platform_device *pdev) { struct twl4030_keypad_data *pdata = pdev->dev.platform_data; - const struct matrix_keymap_data *keymap_data = pdata->keymap_data; + const struct matrix_keymap_data *keymap_data; struct twl4030_keypad *kp; struct input_dev *input; u8 reg; @@ -344,6 +344,8 @@ static int __devinit twl4030_kp_probe(struct platform_device *pdev) return -EINVAL; } + keymap_data = pdata->keymap_data; + kp = kzalloc(sizeof(*kp), GFP_KERNEL); input = input_allocate_device(); if (!kp || !input) { From d9c97833179036408e53ef5f3f5c7eaf781769bc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Apr 2011 10:06:33 +0200 Subject: [PATCH 041/200] block: remove block_unplug_timer() trace point We no longer have an unplug timer running, so no point in keeping the trace point. Signed-off-by: Jens Axboe --- include/trace/events/block.h | 14 -------------- kernel/trace/blktrace.c | 17 ----------------- 2 files changed, 31 deletions(-) diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 78f18adb49c8..43a985390bb6 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -418,20 +418,6 @@ DECLARE_EVENT_CLASS(block_unplug, TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) ); -/** - * block_unplug_timer - timed release of operations requests in queue to device driver - * @q: request queue to unplug - * - * Unplug the request queue @q because a timer expired and allow block - * operation requests to be sent to the device driver. - */ -DEFINE_EVENT(block_unplug, block_unplug_timer, - - TP_PROTO(struct request_queue *q), - - TP_ARGS(q) -); - /** * block_unplug_io - release of operations requests in request queue * @q: request queue to unplug diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7aa40f8e182d..824708cbfb7b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -863,19 +863,6 @@ static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q) } } -static void blk_add_trace_unplug_timer(void *ignore, struct request_queue *q) -{ - struct blk_trace *bt = q->blk_trace; - - if (bt) { - unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; - __be64 rpdu = cpu_to_be64(pdu); - - __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0, - sizeof(rpdu), &rpdu); - } -} - static void blk_add_trace_split(void *ignore, struct request_queue *q, struct bio *bio, unsigned int pdu) @@ -1015,8 +1002,6 @@ static void blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_plug(blk_add_trace_plug, NULL); WARN_ON(ret); - ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); - WARN_ON(ret); ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); WARN_ON(ret); ret = register_trace_block_split(blk_add_trace_split, NULL); @@ -1033,7 +1018,6 @@ static void blk_unregister_tracepoints(void) unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); unregister_trace_block_split(blk_add_trace_split, NULL); unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); - unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); unregister_trace_block_plug(blk_add_trace_plug, NULL); unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); unregister_trace_block_getrq(blk_add_trace_getrq, NULL); @@ -1348,7 +1332,6 @@ static const struct { [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error }, [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug }, [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug }, - [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug }, [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic }, [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split }, [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic }, From 94b5eb28b41cc79d9713696e0005ae167b5afd1b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Apr 2011 10:12:19 +0200 Subject: [PATCH 042/200] block: fixup block IO unplug trace call It was removed with the on-stack plugging, readd it and track the depth of requests added when flushing the plug. Signed-off-by: Jens Axboe --- block/blk-core.c | 15 +++++++++++++-- include/trace/events/block.h | 11 ++++++----- kernel/trace/blktrace.c | 6 +++--- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index eeaca0998df5..d20ce1e849c8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2668,12 +2668,19 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) return !(rqa->q <= rqb->q); } +static void queue_unplugged(struct request_queue *q, unsigned int depth) +{ + trace_block_unplug_io(q, depth); + __blk_run_queue(q, false); +} + static void flush_plug_list(struct blk_plug *plug) { struct request_queue *q; unsigned long flags; struct request *rq; LIST_HEAD(list); + unsigned int depth; BUG_ON(plug->magic != PLUG_MAGIC); @@ -2688,6 +2695,7 @@ static void flush_plug_list(struct blk_plug *plug) } q = NULL; + depth = 0; local_irq_save(flags); while (!list_empty(&list)) { rq = list_entry_rq(list.next); @@ -2696,10 +2704,11 @@ static void flush_plug_list(struct blk_plug *plug) BUG_ON(!rq->q); if (rq->q != q) { if (q) { - __blk_run_queue(q, false); + queue_unplugged(q, depth); spin_unlock(q->queue_lock); } q = rq->q; + depth = 0; spin_lock(q->queue_lock); } rq->cmd_flags &= ~REQ_ON_PLUG; @@ -2711,10 +2720,12 @@ static void flush_plug_list(struct blk_plug *plug) __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); else __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); + + depth++; } if (q) { - __blk_run_queue(q, false); + queue_unplugged(q, depth); spin_unlock(q->queue_lock); } diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 43a985390bb6..006e60b58306 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -401,9 +401,9 @@ TRACE_EVENT(block_plug, DECLARE_EVENT_CLASS(block_unplug, - TP_PROTO(struct request_queue *q), + TP_PROTO(struct request_queue *q, unsigned int depth), - TP_ARGS(q), + TP_ARGS(q, depth), TP_STRUCT__entry( __field( int, nr_rq ) @@ -411,7 +411,7 @@ DECLARE_EVENT_CLASS(block_unplug, ), TP_fast_assign( - __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; + __entry->nr_rq = depth; memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -421,15 +421,16 @@ DECLARE_EVENT_CLASS(block_unplug, /** * block_unplug_io - release of operations requests in request queue * @q: request queue to unplug + * @depth: number of requests just added to the queue * * Unplug request queue @q because device driver is scheduled to work * on elements in the request queue. */ DEFINE_EVENT(block_unplug, block_unplug_io, - TP_PROTO(struct request_queue *q), + TP_PROTO(struct request_queue *q, unsigned int depth), - TP_ARGS(q) + TP_ARGS(q, depth) ); /** diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 824708cbfb7b..3e3970d53d14 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -850,13 +850,13 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q) __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); } -static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q) +static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q, + unsigned int depth) { struct blk_trace *bt = q->blk_trace; if (bt) { - unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; - __be64 rpdu = cpu_to_be64(pdu); + __be64 rpdu = cpu_to_be64(depth); __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0, sizeof(rpdu), &rpdu); From 188112722cce083c8f1a7d0d84f55c2cd885920c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Apr 2011 10:11:24 +0200 Subject: [PATCH 043/200] block: add comment on why we save and disable interrupts in flush_plug_list() It's done at the top to avoid doing it for every queue we unplug. Signed-off-by: Jens Axboe --- block/blk-core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index d20ce1e849c8..0c0ea10e61ea 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2696,6 +2696,11 @@ static void flush_plug_list(struct blk_plug *plug) q = NULL; depth = 0; + + /* + * Save and disable interrupts here, to avoid doing it for every + * queue lock we have to take. + */ local_irq_save(flags); while (!list_empty(&list)) { rq = list_entry_rq(list.next); From f75664570d8b75469cc468f23c2b27220984983b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Apr 2011 10:17:31 +0200 Subject: [PATCH 044/200] block: add callback function for unplug notification MD would like to know when a queue is unplugged, so it can flush it's bitmap writes. Add such a callback. Signed-off-by: Jens Axboe --- block/blk-core.c | 3 +++ block/blk-settings.c | 16 ++++++++++++++++ include/linux/blkdev.h | 3 +++ 3 files changed, 22 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 0c0ea10e61ea..76850fc9cf23 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2672,6 +2672,9 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth) { trace_block_unplug_io(q, depth); __blk_run_queue(q, false); + + if (q->unplugged_fn) + q->unplugged_fn(q); } static void flush_plug_list(struct blk_plug *plug) diff --git a/block/blk-settings.c b/block/blk-settings.c index 1fa769293597..eb949045bb12 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -790,6 +790,22 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush) } EXPORT_SYMBOL_GPL(blk_queue_flush); +/** + * blk_queue_unplugged - register a callback for an unplug event + * @q: the request queue for the device + * @fn: the function to call + * + * Some stacked drivers may need to know when IO is dispatched on an + * unplug event. By registrering a callback here, they will be notified + * when someone flushes their on-stack queue plug. The function will be + * called with the queue lock held. + */ +void blk_queue_unplugged(struct request_queue *q, unplugged_fn *fn) +{ + q->unplugged_fn = fn; +} +EXPORT_SYMBOL(blk_queue_unplugged); + static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 32176cc8e715..c07ffafac5d4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -196,6 +196,7 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); +typedef void (unplugged_fn) (struct request_queue *); struct bio_vec; struct bvec_merge_data { @@ -283,6 +284,7 @@ struct request_queue rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; lld_busy_fn *lld_busy_fn; + unplugged_fn *unplugged_fn; /* * Dispatch queue sorting @@ -841,6 +843,7 @@ extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); +extern void blk_queue_unplugged(struct request_queue *, unplugged_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); From dc6d36c9710d1fed42d1bbe7d8e4f742abd844c6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Apr 2011 10:28:28 +0200 Subject: [PATCH 045/200] block: readd plug trace event This was removed with the queue plug state. But we can easily readd by checking if this is the first request going to this queue. It's good information to have when tracing to see how effective the plugging is. Signed-off-by: Jens Axboe --- block/blk-core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 76850fc9cf23..52e756c526be 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1311,7 +1311,15 @@ get_rq: plug = current->plug; if (plug) { - if (!plug->should_sort && !list_empty(&plug->list)) { + /* + * If this is the first request added after a plug, fire + * of a plug trace. If others have been added before, check + * if we have multiple devices in this plug. If so, make a + * note to sort the list before dispatch. + */ + if (list_empty(&plug->list)) + trace_block_plug(q); + else if (!plug->should_sort) { struct request *__rq; __rq = list_entry_rq(plug->list.prev); From cf82c798394cd443eed7d91f998b79a63f341e91 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Apr 2011 10:30:53 +0200 Subject: [PATCH 046/200] block: kill queue_sync_plugs() The original use for this dates back to when we had to track write requests for serializing around barriers. That's not needed anymore, so kill it. Signed-off-by: Jens Axboe --- block/blk-core.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 52e756c526be..c6eaa1f56466 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -198,19 +198,6 @@ void blk_dump_rq_flags(struct request *rq, char *msg) } EXPORT_SYMBOL(blk_dump_rq_flags); -/* - * Make sure that plugs that were pending when this function was entered, - * are now complete and requests pushed to the queue. -*/ -static inline void queue_sync_plugs(struct request_queue *q) -{ - /* - * If the current process is plugged and has barriers submitted, - * we will livelock if we don't unplug first. - */ - blk_flush_plug(current); -} - static void blk_delay_work(struct work_struct *work) { struct request_queue *q; @@ -298,7 +285,6 @@ void blk_sync_queue(struct request_queue *q) { del_timer_sync(&q->timeout); cancel_delayed_work_sync(&q->delay_work); - queue_sync_plugs(q); } EXPORT_SYMBOL(blk_sync_queue); From ef1fd2df85f5ea8ca8876a45d0ebb152d3a144d1 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Thu, 31 Mar 2011 12:31:09 +0530 Subject: [PATCH 047/200] powerpc/85xx: Don't add disabled PCIe devices PCIe nodes with the property status="disabled" are not usable and so avoid adding "disabled" PCIe bridge with the system. Signed-off-by: Prabhakar Kushwaha Signed-off-by: Kumar Gala --- arch/powerpc/sysdev/fsl_pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index f8f7f28c6343..68ca9290df94 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -324,6 +324,11 @@ int __init fsl_add_bridge(struct device_node *dev, int is_primary) struct resource rsrc; const int *bus_range; + if (!of_device_is_available(dev)) { + pr_warning("%s: disabled\n", dev->full_name); + return -ENODEV; + } + pr_debug("Adding PCI host bridge %s\n", dev->full_name); /* Fetch host bridge registers address */ From 07d9fce24d871785dbd25458469032fea73f17b8 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Wed, 6 Apr 2011 12:56:29 +0530 Subject: [PATCH 048/200] powerpc: Check device status before adding serial device serial port nodes with the property status="disabled" are not usable and so avoid adding "disabled" port with the system. Signed-off-by: Prabhakar Kushwaha Signed-off-by: Kumar Gala --- arch/powerpc/kernel/legacy_serial.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index c834757bebc0..2b97b80d6d7d 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -330,9 +330,11 @@ void __init find_legacy_serial_ports(void) if (!parent) continue; if (of_match_node(legacy_serial_parents, parent) != NULL) { - index = add_legacy_soc_port(np, np); - if (index >= 0 && np == stdout) - legacy_serial_console = index; + if (of_device_is_available(np)) { + index = add_legacy_soc_port(np, np); + if (index >= 0 && np == stdout) + legacy_serial_console = index; + } } of_node_put(parent); } From 11ed0db9f6c7811233632d2ab79c50c011b89902 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Wed, 6 Apr 2011 00:11:06 -0500 Subject: [PATCH 049/200] powerpc/book3e: Fix CPU feature handling on 64-bit e5500 The CPU_FTRS_POSSIBLE and CPU_FTRS_ALWAYS defines did not encompass e5500 CPU features when built for 64-bit. This causes issues with cpu_has_feature() as it utilizes the POSSIBLE & ALWAYS defines as part of its check. Create a unique CPU_FTRS_E5500 (as its different from CPU_FTRS_E500MC), created a new group for 64-bit Book3e based CPUs and add CPU_FTRS_E5500 to that group. Signed-off-by: Kumar Gala --- arch/powerpc/include/asm/cputable.h | 13 +++++++++++++ arch/powerpc/kernel/cputable.c | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index be3cdf9134ce..f1fbf6092ed2 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -386,6 +386,9 @@ extern const char *powerpc_base_platform; CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \ CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL) +#define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ + CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ + CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD) #define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ @@ -435,11 +438,15 @@ extern const char *powerpc_base_platform; #define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2) #ifdef __powerpc64__ +#ifdef CONFIG_PPC_BOOK3E +#define CPU_FTRS_POSSIBLE (CPU_FTRS_E5500) +#else #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 | \ CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 | \ CPU_FTRS_POWER7 | CPU_FTRS_CELL | CPU_FTRS_PA6T | \ CPU_FTR_1T_SEGMENT | CPU_FTR_VSX) +#endif #else enum { CPU_FTRS_POSSIBLE = @@ -473,16 +480,21 @@ enum { #endif #ifdef CONFIG_E500 CPU_FTRS_E500 | CPU_FTRS_E500_2 | CPU_FTRS_E500MC | + CPU_FTRS_E5500 | #endif 0, }; #endif /* __powerpc64__ */ #ifdef __powerpc64__ +#ifdef CONFIG_PPC_BOOK3E +#define CPU_FTRS_ALWAYS (CPU_FTRS_E5500) +#else #define CPU_FTRS_ALWAYS \ (CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 & \ CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & CPU_FTRS_POWER6 & \ CPU_FTRS_POWER7 & CPU_FTRS_CELL & CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE) +#endif #else enum { CPU_FTRS_ALWAYS = @@ -513,6 +525,7 @@ enum { #endif #ifdef CONFIG_E500 CPU_FTRS_E500 & CPU_FTRS_E500_2 & CPU_FTRS_E500MC & + CPU_FTRS_E5500 & #endif CPU_FTRS_POSSIBLE, }; diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c9b68d07ac4f..b9602ee06deb 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1973,7 +1973,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .pvr_mask = 0xffff0000, .pvr_value = 0x80240000, .cpu_name = "e5500", - .cpu_features = CPU_FTRS_E500MC, + .cpu_features = CPU_FTRS_E5500, .cpu_user_features = COMMON_USER_BOOKE, .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX, From d51ad91535b75c043f074f093ef913fe20ff2b5e Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 27 May 2010 17:35:12 -0500 Subject: [PATCH 050/200] powerpc/e500mc: Remove CPU_FTR_MAYBE_CAN_NAP/CPU_FTR_MAYBE_CAN_DOZE e500mc does not support the HID0/MSR mechanism that is used by e500_idle (and there are also issues with waking on certain types of interrupts). Further, even if napping is never actually enabled, just having CPU_FTR_CAN_NAP will cause machine_init() to overwrite the board's supplied ppc_md.power_save(). We drop CPU_FTR_MAYBE_CAN_DOZE becuase we should use 'wait' instead on e500mc. Signed-off-by: Scott Wood Signed-off-by: Kumar Gala --- arch/powerpc/include/asm/cputable.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index f1fbf6092ed2..1833d1a07e79 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -382,8 +382,7 @@ extern const char *powerpc_base_platform; #define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \ CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_E500MC (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \ - CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \ +#define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL) #define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ From e5462d16f76ad7a9156a82a97fbafba298da9ca6 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Fri, 8 Apr 2011 04:20:54 -0500 Subject: [PATCH 051/200] powerpc/85xx: disable Suspend support if SMP enabled We currently dont have CPU Hotplug support working on 85xx so we need to disable Suspsend support as it will force enabling of CPU Hotplug. arch/powerpc/kernel/built-in.o: In function `cpu_die': arch/powerpc/kernel/smp.c:702: undefined reference to `start_secondary_resume' make: *** [.tmp_vmlinux1] Error 1 Signed-off-by: Kumar Gala --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b6ff882f695b..8f4d50b0adfa 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -209,7 +209,7 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_SUSPEND_POSSIBLE def_bool y depends on ADB_PMU || PPC_EFIKA || PPC_LITE5200 || PPC_83xx || \ - PPC_85xx || PPC_86xx || PPC_PSERIES || 44x || 40x + (PPC_85xx && !SMP) || PPC_86xx || PPC_PSERIES || 44x || 40x config PPC_DCR_NATIVE bool From f4af3c3d077a004762aaad052049c809fd8c6f0c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Apr 2011 14:58:51 +0200 Subject: [PATCH 052/200] block: move queue run on unplug to kblockd There are worries that we are now consuming a lot more stack in some cases, since we potentially call into IO dispatch from schedule() or io_schedule(). We can reduce this problem by moving the running of the queue to kblockd, like the old plugging scheme did as well. This may or may not be a good idea from a performance perspective, depending on how many tasks have queue plugs running at the same time. For even the slightly contended case, doing just a single queue run from kblockd instead of multiple runs directly from the unpluggers will be faster. Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index c6eaa1f56466..36b1a7559f94 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2665,7 +2665,7 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) static void queue_unplugged(struct request_queue *q, unsigned int depth) { trace_block_unplug_io(q, depth); - __blk_run_queue(q, false); + __blk_run_queue(q, true); if (q->unplugged_fn) q->unplugged_fn(q); From a6d710fefd1b2c209353a452d0f4c831b3af0da0 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 22 Mar 2011 13:09:50 +0100 Subject: [PATCH 053/200] ARM: pxafb: Fix access to nonexistent member of pxafb_info In case CONFIG_FB_PXA_OVERLAY is not defined, the pxafb_freq_transition() function tests nonexistent member of pxafb_info (since the member is not part of the structure). Fix this by wraping the test in ifdef, even if I don't really like how the code looks now. The check doesn't have to happen if overlays are disabled at all as the check is always true then. Signed-off-by: Marek Vasut Acked-by: Vasily Khoruzhick Signed-off-by: Eric Miao --- drivers/video/pxafb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c index a2e5b5100ab4..0f4e8c942f9e 100644 --- a/drivers/video/pxafb.c +++ b/drivers/video/pxafb.c @@ -1648,7 +1648,9 @@ pxafb_freq_transition(struct notifier_block *nb, unsigned long val, void *data) switch (val) { case CPUFREQ_PRECHANGE: - if (!fbi->overlay[0].usage && !fbi->overlay[1].usage) +#ifdef CONFIG_FB_PXA_OVERLAY + if (!(fbi->overlay[0].usage || fbi->overlay[1].usage)) +#endif set_ctrlr_state(fbi, C_DISABLE_CLKCHANGE); break; From af21cbb1ef6aa366fcb629ea4d4330300ba22de7 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 1 Apr 2011 13:28:45 +0400 Subject: [PATCH 054/200] pcmcia: limit pxa2xx_balloon3 subdriver to balloon3 platform pxa2xx_balloon3 tries to register pxa2xx-pcmcia device not checking whether machine is really balloon3, thus messing multi-machine kernels. Fix it up. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Eric Miao --- drivers/pcmcia/pxa2xx_balloon3.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/pcmcia/pxa2xx_balloon3.c b/drivers/pcmcia/pxa2xx_balloon3.c index 453c54c97612..4c3e94c0ae85 100644 --- a/drivers/pcmcia/pxa2xx_balloon3.c +++ b/drivers/pcmcia/pxa2xx_balloon3.c @@ -25,6 +25,8 @@ #include +#include + #include "soc_common.h" /* @@ -127,6 +129,9 @@ static int __init balloon3_pcmcia_init(void) { int ret; + if (!machine_is_balloon3()) + return -ENODEV; + balloon3_pcmcia_device = platform_device_alloc("pxa2xx-pcmcia", -1); if (!balloon3_pcmcia_device) return -ENOMEM; From 735443fede138fbbad062ec258e4d49648a0a56d Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 1 Apr 2011 13:28:46 +0400 Subject: [PATCH 055/200] pcmcia: limit pxa2xx_trizeps4 subdriver to trizeps4 platform pxa2xx_trizeps4 tries to register pxa2xx-pcmcia device not checking whether machine is really trizeps4, thus messing multi-machine kernels. Fix it up. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Eric Miao --- drivers/pcmcia/pxa2xx_trizeps4.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pcmcia/pxa2xx_trizeps4.c b/drivers/pcmcia/pxa2xx_trizeps4.c index b7e596620db1..5bb4e1a94d25 100644 --- a/drivers/pcmcia/pxa2xx_trizeps4.c +++ b/drivers/pcmcia/pxa2xx_trizeps4.c @@ -226,6 +226,9 @@ static int __init trizeps_pcmcia_init(void) { int ret; + if (!machine_is_trizeps4() && !machine_is_trizeps4wl()) + return -ENODEV; + trizeps_pcmcia_device = platform_device_alloc("pxa2xx-pcmcia", -1); if (!trizeps_pcmcia_device) return -ENOMEM; From 6932613060b77e2495843d0ce0ce8453d01961a5 Mon Sep 17 00:00:00 2001 From: Haojian Zhuang Date: Fri, 8 Apr 2011 20:15:39 +0800 Subject: [PATCH 056/200] ARM: pxa: always clear LPM bits for PXA168 MFPR Bit[9:7] should always be zero in PXA168. Signed-off-by: Haojian Zhuang Signed-off-by: Eric Miao --- arch/arm/mach-mmp/include/mach/mfp-pxa168.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm/mach-mmp/include/mach/mfp-pxa168.h b/arch/arm/mach-mmp/include/mach/mfp-pxa168.h index 4621067c7720..713be155a44d 100644 --- a/arch/arm/mach-mmp/include/mach/mfp-pxa168.h +++ b/arch/arm/mach-mmp/include/mach/mfp-pxa168.h @@ -8,6 +8,15 @@ #define MFP_DRIVE_MEDIUM (0x2 << 13) #define MFP_DRIVE_FAST (0x3 << 13) +#undef MFP_CFG +#undef MFP_CFG_DRV + +#define MFP_CFG(pin, af) \ + (MFP_LPM_INPUT | MFP_PIN(MFP_PIN_##pin) | MFP_##af | MFP_DRIVE_MEDIUM) + +#define MFP_CFG_DRV(pin, af, drv) \ + (MFP_LPM_INPUT | MFP_PIN(MFP_PIN_##pin) | MFP_##af | MFP_DRIVE_##drv) + /* GPIO */ #define GPIO0_GPIO MFP_CFG(GPIO0, AF5) #define GPIO1_GPIO MFP_CFG(GPIO1, AF5) From a0a4dcbca73a418477f439e812193e2591b46751 Mon Sep 17 00:00:00 2001 From: Haojian Zhuang Date: Fri, 8 Apr 2011 20:15:42 +0800 Subject: [PATCH 057/200] ARM: pxa: align NR_BUILTIN_GPIO with GPIO interrupt number Avoid to mismatch between NR_BUILTIN_GPIO and GPIO interrupt number Signed-off-by: Haojian Zhuang Signed-off-by: Eric Miao --- arch/arm/mach-pxa/include/mach/gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-pxa/include/mach/gpio.h b/arch/arm/mach-pxa/include/mach/gpio.h index b024a8b37439..48ef925ffb3b 100644 --- a/arch/arm/mach-pxa/include/mach/gpio.h +++ b/arch/arm/mach-pxa/include/mach/gpio.h @@ -99,7 +99,7 @@ #define GAFR(x) GPIO_REG(0x54 + (((x) & 0x70) >> 2)) -#define NR_BUILTIN_GPIO 128 +#define NR_BUILTIN_GPIO PXA_GPIO_IRQ_NUM #define gpio_to_bank(gpio) ((gpio) >> 5) #define gpio_to_irq(gpio) IRQ_GPIO(gpio) From 83fd6c685bd8e83be1e29e2841bab94dd831e186 Mon Sep 17 00:00:00 2001 From: Haojian Zhuang Date: Fri, 8 Apr 2011 20:15:43 +0800 Subject: [PATCH 058/200] ARM: mmp: align NR_BUILTIN_GPIO with gpio interrupt number Avoid to mismatch between NR_BUILTIN_GPIO and gpio interrupt number. Signed-off-by: Haojian Zhuang Signed-off-by: Eric Miao --- arch/arm/mach-mmp/include/mach/gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-mmp/include/mach/gpio.h b/arch/arm/mach-mmp/include/mach/gpio.h index ee8b02ed8011..7bfb827f3fe3 100644 --- a/arch/arm/mach-mmp/include/mach/gpio.h +++ b/arch/arm/mach-mmp/include/mach/gpio.h @@ -10,7 +10,7 @@ #define BANK_OFF(n) (((n) < 3) ? (n) << 2 : 0x100 + (((n) - 3) << 2)) #define GPIO_REG(x) (*((volatile u32 *)(GPIO_REGS_VIRT + (x)))) -#define NR_BUILTIN_GPIO (192) +#define NR_BUILTIN_GPIO IRQ_GPIO_NUM #define gpio_to_bank(gpio) ((gpio) >> 5) #define gpio_to_irq(gpio) (IRQ_GPIO_START + (gpio)) From 109b36a2bb3eebf5c9994980e724958a5b2b62b6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Apr 2011 13:57:39 -0400 Subject: [PATCH 059/200] Btrfs: make uncache_state unconditional The extent_io code can take cached pointers into the extent state trees, and these can make lookups much faster in common operations. The caching only happens when specific bits are set that prevent merging and splitting of the extent state. A help function was added to uncache the state, and it was testing the same set of conditionals. This can leak in very strange corner cases where the lock bit goes away unexpectedly. The uncaching should be unconditional. Once we have a ref on the extent we should always give it up. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8dcfb77678de..1c462f895c98 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -694,10 +694,8 @@ static void uncache_state(struct extent_state **cached_ptr) { if (cached_ptr && (*cached_ptr)) { struct extent_state *state = *cached_ptr; - if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) { - *cached_ptr = NULL; - free_extent_state(state); - } + *cached_ptr = NULL; + free_extent_state(state); } } @@ -1764,7 +1762,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) spin_lock(&tree->lock); state = find_first_extent_bit_state(tree, start, 0); - if (state) { + if (state && state->start == start) { /* * take a reference on the state, unlock will drop * the ref From 7db6a7fa09884b34d2a5d4e6e4ed58664a5f0cf8 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Tue, 12 Apr 2011 18:39:39 +0800 Subject: [PATCH 060/200] ARM: pxa: convert incorrect IRQ_TO_IRQ() to irq_to_gpio() This fixes the failure to register the IRQ_RTCAlrm alarm as a wakeup event. It is misinterpreted as a gpio irq not a PWER bitmask. Fixed this by converting the incorrect IRQ_TO_IRQ() to a correct version of irq_to_gpio(). Reported-by: Nick Bane Signed-off-by: Eric Miao --- arch/arm/mach-pxa/include/mach/gpio.h | 15 ++++++++++++++- arch/arm/mach-pxa/include/mach/irqs.h | 3 --- arch/arm/mach-pxa/pxa25x.c | 2 +- arch/arm/mach-pxa/pxa27x.c | 2 +- drivers/pcmcia/pxa2xx_trizeps4.c | 12 ++++++------ 5 files changed, 22 insertions(+), 12 deletions(-) diff --git a/arch/arm/mach-pxa/include/mach/gpio.h b/arch/arm/mach-pxa/include/mach/gpio.h index 48ef925ffb3b..c4639502efca 100644 --- a/arch/arm/mach-pxa/include/mach/gpio.h +++ b/arch/arm/mach-pxa/include/mach/gpio.h @@ -103,7 +103,20 @@ #define gpio_to_bank(gpio) ((gpio) >> 5) #define gpio_to_irq(gpio) IRQ_GPIO(gpio) -#define irq_to_gpio(irq) IRQ_TO_GPIO(irq) + +static inline int irq_to_gpio(unsigned int irq) +{ + int gpio; + + if (irq == IRQ_GPIO0 || irq == IRQ_GPIO1) + return irq - IRQ_GPIO0; + + gpio = irq - PXA_GPIO_IRQ_BASE; + if (gpio >= 2 && gpio < NR_BUILTIN_GPIO) + return gpio; + + return -1; +} #ifdef CONFIG_CPU_PXA26x /* GPIO86/87/88/89 on PXA26x have their direction bits in GPDR2 inverted, diff --git a/arch/arm/mach-pxa/include/mach/irqs.h b/arch/arm/mach-pxa/include/mach/irqs.h index a4285fc00878..038402404e39 100644 --- a/arch/arm/mach-pxa/include/mach/irqs.h +++ b/arch/arm/mach-pxa/include/mach/irqs.h @@ -93,9 +93,6 @@ #define GPIO_2_x_TO_IRQ(x) (PXA_GPIO_IRQ_BASE + (x)) #define IRQ_GPIO(x) (((x) < 2) ? (IRQ_GPIO0 + (x)) : GPIO_2_x_TO_IRQ(x)) -#define IRQ_TO_GPIO_2_x(i) ((i) - PXA_GPIO_IRQ_BASE) -#define IRQ_TO_GPIO(i) (((i) < IRQ_GPIO(2)) ? ((i) - IRQ_GPIO0) : IRQ_TO_GPIO_2_x(i)) - /* * The following interrupts are for board specific purposes. Since * the kernel can only run on one machine at a time, we can re-use diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c index 6bde5956358d..a4af8c52d7ee 100644 --- a/arch/arm/mach-pxa/pxa25x.c +++ b/arch/arm/mach-pxa/pxa25x.c @@ -285,7 +285,7 @@ static inline void pxa25x_init_pm(void) {} static int pxa25x_set_wake(struct irq_data *d, unsigned int on) { - int gpio = IRQ_TO_GPIO(d->irq); + int gpio = irq_to_gpio(d->irq); uint32_t mask = 0; if (gpio >= 0 && gpio < 85) diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c index 1cb5d0f9723f..909756eaf4b7 100644 --- a/arch/arm/mach-pxa/pxa27x.c +++ b/arch/arm/mach-pxa/pxa27x.c @@ -345,7 +345,7 @@ static inline void pxa27x_init_pm(void) {} */ static int pxa27x_set_wake(struct irq_data *d, unsigned int on) { - int gpio = IRQ_TO_GPIO(d->irq); + int gpio = irq_to_gpio(d->irq); uint32_t mask; if (gpio >= 0 && gpio < 128) diff --git a/drivers/pcmcia/pxa2xx_trizeps4.c b/drivers/pcmcia/pxa2xx_trizeps4.c index 5bb4e1a94d25..b829e655457b 100644 --- a/drivers/pcmcia/pxa2xx_trizeps4.c +++ b/drivers/pcmcia/pxa2xx_trizeps4.c @@ -69,15 +69,15 @@ static int trizeps_pcmcia_hw_init(struct soc_pcmcia_socket *skt) for (i = 0; i < ARRAY_SIZE(irqs); i++) { if (irqs[i].sock != skt->nr) continue; - if (gpio_request(IRQ_TO_GPIO(irqs[i].irq), irqs[i].str) < 0) { + if (gpio_request(irq_to_gpio(irqs[i].irq), irqs[i].str) < 0) { pr_err("%s: sock %d unable to request gpio %d\n", - __func__, skt->nr, IRQ_TO_GPIO(irqs[i].irq)); + __func__, skt->nr, irq_to_gpio(irqs[i].irq)); ret = -EBUSY; goto error; } - if (gpio_direction_input(IRQ_TO_GPIO(irqs[i].irq)) < 0) { + if (gpio_direction_input(irq_to_gpio(irqs[i].irq)) < 0) { pr_err("%s: sock %d unable to set input gpio %d\n", - __func__, skt->nr, IRQ_TO_GPIO(irqs[i].irq)); + __func__, skt->nr, irq_to_gpio(irqs[i].irq)); ret = -EINVAL; goto error; } @@ -86,7 +86,7 @@ static int trizeps_pcmcia_hw_init(struct soc_pcmcia_socket *skt) error: for (; i >= 0; i--) { - gpio_free(IRQ_TO_GPIO(irqs[i].irq)); + gpio_free(irq_to_gpio(irqs[i].irq)); } return (ret); } @@ -97,7 +97,7 @@ static void trizeps_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt) /* free allocated gpio's */ gpio_free(GPIO_PRDY); for (i = 0; i < ARRAY_SIZE(irqs); i++) - gpio_free(IRQ_TO_GPIO(irqs[i].irq)); + gpio_free(irq_to_gpio(irqs[i].irq)); } static unsigned long trizeps_pcmcia_status[2]; From 2e6a00356a066d34cd00872b067589549169ad48 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 17 Mar 2011 15:17:59 +0800 Subject: [PATCH 061/200] Btrfs: Check if btrfs_next_leaf() returns error in btrfs_listxattr() btrfs_next_leaf() can return -errno, and we should propagate it to userspace. This also simplifies how we walk the btree path. Signed-off-by: Li Zefan --- fs/btrfs/xattr.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index e5d22f280956..07b9bc350d5d 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -180,11 +180,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) struct btrfs_path *path; struct extent_buffer *leaf; struct btrfs_dir_item *di; - int ret = 0, slot, advance; + int ret = 0, slot; size_t total_size = 0, size_left = size; unsigned long name_ptr; size_t name_len; - u32 nritems; /* * ok we want all objects associated with this id. @@ -204,34 +203,24 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; - advance = 0; + while (1) { leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; /* this is where we start walking through the path */ - if (advance || slot >= nritems) { + if (slot >= btrfs_header_nritems(leaf)) { /* * if we've reached the last slot in this leaf we need * to go to the next leaf and reset everything */ - if (slot >= nritems-1) { - ret = btrfs_next_leaf(root, path); - if (ret) - break; - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); - slot = path->slots[0]; - } else { - /* - * just walking through the slots on this leaf - */ - slot++; - path->slots[0]++; - } + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto err; + else if (ret > 0) + break; + continue; } - advance = 1; btrfs_item_key_to_cpu(leaf, &found_key, slot); @@ -250,7 +239,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) /* we are just looking for how big our buffer needs to be */ if (!size) - continue; + goto next; if (!buffer || (name_len + 1) > size_left) { ret = -ERANGE; @@ -263,6 +252,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) size_left -= name_len + 1; buffer += name_len + 1; +next: + path->slots[0]++; } ret = total_size; From b9e03af0bcc11310f6be4a3951c9ee2c26465011 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 23 Mar 2011 10:43:58 +0800 Subject: [PATCH 062/200] Btrfs: Check if btrfs_next_leaf() returns error in btrfs_real_readdir() btrfs_next_leaf() can return -errno, and we should propagate it to userspace. This also simplifies how we walk the btree path. Signed-off-by: Li Zefan --- fs/btrfs/inode.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 55a6a0b416d7..b9f7f5258343 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4221,10 +4221,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, struct btrfs_key found_key; struct btrfs_path *path; int ret; - u32 nritems; struct extent_buffer *leaf; int slot; - int advance; unsigned char d_type; int over = 0; u32 di_cur; @@ -4267,27 +4265,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; - advance = 0; while (1) { leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; - if (advance || slot >= nritems) { - if (slot >= nritems - 1) { - ret = btrfs_next_leaf(root, path); - if (ret) - break; - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); - slot = path->slots[0]; - } else { - slot++; - path->slots[0]++; - } + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto err; + else if (ret > 0) + break; + continue; } - advance = 1; item = btrfs_item_nr(leaf, slot); btrfs_item_key_to_cpu(leaf, &found_key, slot); @@ -4296,7 +4286,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, if (btrfs_key_type(&found_key) != key_type) break; if (found_key.offset < filp->f_pos) - continue; + goto next; filp->f_pos = found_key.offset; @@ -4349,6 +4339,8 @@ skip: di_cur += di_len; di = (struct btrfs_dir_item *)((char *)di + di_len); } +next: + path->slots[0]++; } /* Reached end of directory/root. Bump pos past the last item. */ From 3153495d8ed6a9bb9f00aea42c18dc488a885dd6 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 13 Apr 2011 13:19:21 +0800 Subject: [PATCH 063/200] Btrfs: Fix incorrect inode nlink in btrfs_link() Link count of the inode is not decreased if btrfs_set_inode_index() fails. Signed-off-by: Miao Xie Singed-off-by: Li Zefan --- fs/btrfs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b9f7f5258343..a4157cfdd533 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4846,9 +4846,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink == ~0U) return -EMLINK; - btrfs_inc_nlink(inode); - inode->i_ctime = CURRENT_TIME; - err = btrfs_set_inode_index(dir, &index); if (err) goto fail; @@ -4864,6 +4861,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, goto fail; } + btrfs_inc_nlink(inode); + inode->i_ctime = CURRENT_TIME; + btrfs_set_trans_block_group(trans, dir); ihold(inode); From 329c5056be8774255db04b01242a9ff4f02eb8ea Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 13 Apr 2011 14:07:59 +0800 Subject: [PATCH 064/200] Btrfs: Check validity before setting an acl Call posix_acl_valid() to check if an acl is valid or not. Signed-off-by: Miao Xie Signed-off-by: Li Zefan --- fs/btrfs/acl.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 9c949348510b..a892bc27f13a 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -178,16 +178,17 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, if (value) { acl = posix_acl_from_xattr(value, size); - if (acl == NULL) { - value = NULL; - size = 0; + if (acl) { + ret = posix_acl_valid(acl); + if (ret) + goto out; } else if (IS_ERR(acl)) { return PTR_ERR(acl); } } ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type); - +out: posix_acl_release(acl); return ret; From ba6a078b77e0dc1309d7e6e2ee034b92ab91f88c Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Tue, 12 Apr 2011 23:13:08 -0700 Subject: [PATCH 065/200] Input: add KEY_IMAGES specifically for AL Image Browser Many media center remotes have buttons intended for jumping straight to one type of media browser or another -- commonly, images/photos/pictures, audio/music, television, and movies. At present, remotes with an images or photos or pictures button use any number of different keycodes which sort of maybe fit. I've seen at least KEY_MEDIA, KEY_CAMERA, KEY_GRAPHICSEDITOR and KEY_PRESENTATION. None of those seem quite right. In my mind, KEY_MEDIA should be something more like a media center application launcher (and I'd like to standardize on that for things like the windows media center button on the mce remotes). KEY_CAMERA is used in a lot of webcams, and typically means "take a picture now". KEY_GRAPHICSEDITOR implies an editor, not a browser. KEY_PRESENTATION might be the closest fit here, if you think "photo slide show", but it may well be more intended for "run application in full-screen presentation mode" or to launch something like magicpoint, I dunno. And thus, I'd like to have a KEY_IMAGES, which matches the HID Usage AL Image Browser, the meaning of which I think is crystal-clear. I believe AL Audio Browser is already covered by KEY_AUDIO, and AL Movie Browser by KEY_VIDEO, so I'm also adding appropriate comments next to those keys. Signed-off-by: Jarod Wilson Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/linux/input.h b/include/linux/input.h index 056ae8a5bd9b..0cc25e4ce2ab 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -553,8 +553,8 @@ struct input_keymap_entry { #define KEY_DVD 0x185 /* Media Select DVD */ #define KEY_AUX 0x186 #define KEY_MP3 0x187 -#define KEY_AUDIO 0x188 -#define KEY_VIDEO 0x189 +#define KEY_AUDIO 0x188 /* AL Audio Browser */ +#define KEY_VIDEO 0x189 /* AL Movie Browser */ #define KEY_DIRECTORY 0x18a #define KEY_LIST 0x18b #define KEY_MEMO 0x18c /* Media Select Messages */ @@ -603,8 +603,9 @@ struct input_keymap_entry { #define KEY_FRAMEFORWARD 0x1b5 #define KEY_CONTEXT_MENU 0x1b6 /* GenDesc - system context menu */ #define KEY_MEDIA_REPEAT 0x1b7 /* Consumer - transport control */ -#define KEY_10CHANNELSUP 0x1b8 /* 10 channels up (10+) */ -#define KEY_10CHANNELSDOWN 0x1b9 /* 10 channels down (10-) */ +#define KEY_10CHANNELSUP 0x1b8 /* 10 channels up (10+) */ +#define KEY_10CHANNELSDOWN 0x1b9 /* 10 channels down (10-) */ +#define KEY_IMAGES 0x1ba /* AL Image Browser */ #define KEY_DEL_EOL 0x1c0 #define KEY_DEL_EOS 0x1c1 From b1e064b81e238d47cb56544b34c9baf473e09837 Mon Sep 17 00:00:00 2001 From: Chase Douglas Date: Tue, 12 Apr 2011 23:29:07 -0700 Subject: [PATCH 066/200] Input: document event types and codes and their intended use This commit adds the file Documentation/input/event-codes.txt. Acked-by: Henrik Rydberg Reviewed-by: Peter Hutterer Signed-off-by: Chase Douglas Signed-off-by: Chris Bagwell Signed-off-by: Dmitry Torokhov --- Documentation/input/event-codes.txt | 256 ++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) create mode 100644 Documentation/input/event-codes.txt diff --git a/Documentation/input/event-codes.txt b/Documentation/input/event-codes.txt new file mode 100644 index 000000000000..f13aee550409 --- /dev/null +++ b/Documentation/input/event-codes.txt @@ -0,0 +1,256 @@ +The input protocol uses a map of types and codes to express input device values +to userspace. This document describes the types and codes and how and when they +may be used. + +A single hardware event generates multiple input events. Each input event +contains the new value of a single data item. A special event type, EV_SYN, is +used to separate input events into packets of input data changes occurring at +the same moment in time. In the following, the term "event" refers to a single +input event encompassing a type, code, and value. + +The input protocol is a stateful protocol. Events are emitted only when values +of event codes have changed. However, the state is maintained within the Linux +input subsystem; drivers do not need to maintain the state and may attempt to +emit unchanged values without harm. Userspace may obtain the current state of +event code values using the EVIOCG* ioctls defined in linux/input.h. The event +reports supported by a device are also provided by sysfs in +class/input/event*/device/capabilities/, and the properties of a device are +provided in class/input/event*/device/properties. + +Types: +========== +Types are groupings of codes under a logical input construct. Each type has a +set of applicable codes to be used in generating events. See the Codes section +for details on valid codes for each type. + +* EV_SYN: + - Used as markers to separate events. Events may be separated in time or in + space, such as with the multitouch protocol. + +* EV_KEY: + - Used to describe state changes of keyboards, buttons, or other key-like + devices. + +* EV_REL: + - Used to describe relative axis value changes, e.g. moving the mouse 5 units + to the left. + +* EV_ABS: + - Used to describe absolute axis value changes, e.g. describing the + coordinates of a touch on a touchscreen. + +* EV_MSC: + - Used to describe miscellaneous input data that do not fit into other types. + +* EV_SW: + - Used to describe binary state input switches. + +* EV_LED: + - Used to turn LEDs on devices on and off. + +* EV_SND: + - Used to output sound to devices. + +* EV_REP: + - Used for autorepeating devices. + +* EV_FF: + - Used to send force feedback commands to an input device. + +* EV_PWR: + - A special type for power button and switch input. + +* EV_FF_STATUS: + - Used to receive force feedback device status. + +Codes: +========== +Codes define the precise type of event. + +EV_SYN: +---------- +EV_SYN event values are undefined. Their usage is defined only by when they are +sent in the evdev event stream. + +* SYN_REPORT: + - Used to synchronize and separate events into packets of input data changes + occurring at the same moment in time. For example, motion of a mouse may set + the REL_X and REL_Y values for one motion, then emit a SYN_REPORT. The next + motion will emit more REL_X and REL_Y values and send another SYN_REPORT. + +* SYN_CONFIG: + - TBD + +* SYN_MT_REPORT: + - Used to synchronize and separate touch events. See the + multi-touch-protocol.txt document for more information. + +EV_KEY: +---------- +EV_KEY events take the form KEY_ or BTN_. For example, KEY_A is used +to represent the 'A' key on a keyboard. When a key is depressed, an event with +the key's code is emitted with value 1. When the key is released, an event is +emitted with value 0. Some hardware send events when a key is repeated. These +events have a value of 2. In general, KEY_ is used for keyboard keys, and +BTN_ is used for other types of momentary switch events. + +A few EV_KEY codes have special meanings: + +* BTN_TOOL_: + - These codes are used in conjunction with input trackpads, tablets, and + touchscreens. These devices may be used with fingers, pens, or other tools. + When an event occurs and a tool is used, the corresponding BTN_TOOL_ + code should be set to a value of 1. When the tool is no longer interacting + with the input device, the BTN_TOOL_ code should be reset to 0. All + trackpads, tablets, and touchscreens should use at least one BTN_TOOL_ + code when events are generated. + +* BTN_TOUCH: + BTN_TOUCH is used for touch contact. While an input tool is determined to be + within meaningful physical contact, the value of this property must be set + to 1. Meaningful physical contact may mean any contact, or it may mean + contact conditioned by an implementation defined property. For example, a + touchpad may set the value to 1 only when the touch pressure rises above a + certain value. BTN_TOUCH may be combined with BTN_TOOL_ codes. For + example, a pen tablet may set BTN_TOOL_PEN to 1 and BTN_TOUCH to 0 while the + pen is hovering over but not touching the tablet surface. + +Note: For appropriate function of the legacy mousedev emulation driver, +BTN_TOUCH must be the first evdev code emitted in a synchronization frame. + +Note: Historically a touch device with BTN_TOOL_FINGER and BTN_TOUCH was +interpreted as a touchpad by userspace, while a similar device without +BTN_TOOL_FINGER was interpreted as a touchscreen. For backwards compatibility +with current userspace it is recommended to follow this distinction. In the +future, this distinction will be deprecated and the device properties ioctl +EVIOCGPROP, defined in linux/input.h, will be used to convey the device type. + +* BTN_TOOL_FINGER, BTN_TOOL_DOUBLETAP, BTN_TOOL_TRIPLETAP, BTN_TOOL_QUADTAP: + - These codes denote one, two, three, and four finger interaction on a + trackpad or touchscreen. For example, if the user uses two fingers and moves + them on the touchpad in an effort to scroll content on screen, + BTN_TOOL_DOUBLETAP should be set to value 1 for the duration of the motion. + Note that all BTN_TOOL_ codes and the BTN_TOUCH code are orthogonal in + purpose. A trackpad event generated by finger touches should generate events + for one code from each group. At most only one of these BTN_TOOL_ + codes should have a value of 1 during any synchronization frame. + +Note: Historically some drivers emitted multiple of the finger count codes with +a value of 1 in the same synchronization frame. This usage is deprecated. + +Note: In multitouch drivers, the input_mt_report_finger_count() function should +be used to emit these codes. Please see multi-touch-protocol.txt for details. + +EV_REL: +---------- +EV_REL events describe relative changes in a property. For example, a mouse may +move to the left by a certain number of units, but its absolute position in +space is unknown. If the absolute position is known, EV_ABS codes should be used +instead of EV_REL codes. + +A few EV_REL codes have special meanings: + +* REL_WHEEL, REL_HWHEEL: + - These codes are used for vertical and horizontal scroll wheels, + respectively. + +EV_ABS: +---------- +EV_ABS events describe absolute changes in a property. For example, a touchpad +may emit coordinates for a touch location. + +A few EV_ABS codes have special meanings: + +* ABS_DISTANCE: + - Used to describe the distance of a tool from an interaction surface. This + event should only be emitted while the tool is hovering, meaning in close + proximity of the device and while the value of the BTN_TOUCH code is 0. If + the input device may be used freely in three dimensions, consider ABS_Z + instead. + +* ABS_MT_: + - Used to describe multitouch input events. Please see + multi-touch-protocol.txt for details. + +EV_SW: +---------- +EV_SW events describe stateful binary switches. For example, the SW_LID code is +used to denote when a laptop lid is closed. + +Upon binding to a device or resuming from suspend, a driver must report +the current switch state. This ensures that the device, kernel, and userspace +state is in sync. + +Upon resume, if the switch state is the same as before suspend, then the input +subsystem will filter out the duplicate switch state reports. The driver does +not need to keep the state of the switch at any time. + +EV_MSC: +---------- +EV_MSC events are used for input and output events that do not fall under other +categories. + +EV_LED: +---------- +EV_LED events are used for input and output to set and query the state of +various LEDs on devices. + +EV_REP: +---------- +EV_REP events are used for specifying autorepeating events. + +EV_SND: +---------- +EV_SND events are used for sending sound commands to simple sound output +devices. + +EV_FF: +---------- +EV_FF events are used to initialize a force feedback capable device and to cause +such device to feedback. + +EV_PWR: +---------- +EV_PWR events are a special type of event used specifically for power +mangement. Its usage is not well defined. To be addressed later. + +Guidelines: +========== +The guidelines below ensure proper single-touch and multi-finger functionality. +For multi-touch functionality, see the multi-touch-protocol.txt document for +more information. + +Mice: +---------- +REL_{X,Y} must be reported when the mouse moves. BTN_LEFT must be used to report +the primary button press. BTN_{MIDDLE,RIGHT,4,5,etc.} should be used to report +further buttons of the device. REL_WHEEL and REL_HWHEEL should be used to report +scroll wheel events where available. + +Touchscreens: +---------- +ABS_{X,Y} must be reported with the location of the touch. BTN_TOUCH must be +used to report when a touch is active on the screen. +BTN_{MOUSE,LEFT,MIDDLE,RIGHT} must not be reported as the result of touch +contact. BTN_TOOL_ events should be reported where possible. + +Trackpads: +---------- +Legacy trackpads that only provide relative position information must report +events like mice described above. + +Trackpads that provide absolute touch position must report ABS_{X,Y} for the +location of the touch. BTN_TOUCH should be used to report when a touch is active +on the trackpad. Where multi-finger support is available, BTN_TOOL_ should +be used to report the number of touches active on the trackpad. + +Tablets: +---------- +BTN_TOOL_ events must be reported when a stylus or other tool is active on +the tablet. ABS_{X,Y} must be reported with the location of the tool. BTN_TOUCH +should be used to report when the tool is in contact with the tablet. +BTN_{STYLUS,STYLUS2} should be used to report buttons on the tool itself. Any +button may be used for buttons on the tablet except BTN_{MOUSE,LEFT}. +BTN_{0,1,2,etc} are good generic codes for unlabeled buttons. Do not use +meaningful buttons, like BTN_FORWARD, unless the button is labeled for that +purpose on the device. From 9fb0f14e31b6101a0cc69a333b43541044f9b0a6 Mon Sep 17 00:00:00 2001 From: Jeff Brown Date: Tue, 12 Apr 2011 23:29:38 -0700 Subject: [PATCH 067/200] Input: evdev - indicate buffer overrun with SYN_DROPPED Add a new EV_SYN code, SYN_DROPPED, to inform the client when input events have been dropped from the evdev input buffer due to a buffer overrun. The client should use this event as a hint to reset its state or ignore all following events until the next packet begins. Signed-off-by: Jeff Brown [dtor@mail.ru: Implement Henrik's suggestion and drop old events in case of overflow.] Signed-off-by: Dmitry Torokhov --- Documentation/input/event-codes.txt | 6 ++++++ drivers/input/evdev.c | 33 ++++++++++++++++++----------- include/linux/input.h | 1 + 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/Documentation/input/event-codes.txt b/Documentation/input/event-codes.txt index f13aee550409..23fcb05175be 100644 --- a/Documentation/input/event-codes.txt +++ b/Documentation/input/event-codes.txt @@ -85,6 +85,12 @@ sent in the evdev event stream. - Used to synchronize and separate touch events. See the multi-touch-protocol.txt document for more information. +* SYN_DROPPED: + - Used to indicate buffer overrun in the evdev client's event queue. + Client should ignore all events up to and including next SYN_REPORT + event and query the device (using EVIOCG* ioctls) to obtain its + current state. + EV_KEY: ---------- EV_KEY events take the form KEY_ or BTN_. For example, KEY_A is used diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 7f42d3a454d2..88d8e4cb419a 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -39,13 +39,13 @@ struct evdev { }; struct evdev_client { - int head; - int tail; + unsigned int head; + unsigned int tail; spinlock_t buffer_lock; /* protects access to buffer, head and tail */ struct fasync_struct *fasync; struct evdev *evdev; struct list_head node; - int bufsize; + unsigned int bufsize; struct input_event buffer[]; }; @@ -55,16 +55,25 @@ static DEFINE_MUTEX(evdev_table_mutex); static void evdev_pass_event(struct evdev_client *client, struct input_event *event) { - /* - * Interrupts are disabled, just acquire the lock. - * Make sure we don't leave with the client buffer - * "empty" by having client->head == client->tail. - */ + /* Interrupts are disabled, just acquire the lock. */ spin_lock(&client->buffer_lock); - do { - client->buffer[client->head++] = *event; - client->head &= client->bufsize - 1; - } while (client->head == client->tail); + + client->buffer[client->head++] = *event; + client->head &= client->bufsize - 1; + + if (unlikely(client->head == client->tail)) { + /* + * This effectively "drops" all unconsumed events, leaving + * EV_SYN/SYN_DROPPED plus the newest event in the queue. + */ + client->tail = (client->head - 2) & (client->bufsize - 1); + + client->buffer[client->tail].time = event->time; + client->buffer[client->tail].type = EV_SYN; + client->buffer[client->tail].code = SYN_DROPPED; + client->buffer[client->tail].value = 0; + } + spin_unlock(&client->buffer_lock); if (event->type == EV_SYN) diff --git a/include/linux/input.h b/include/linux/input.h index 0cc25e4ce2ab..73a8c6ee595b 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -167,6 +167,7 @@ struct input_keymap_entry { #define SYN_REPORT 0 #define SYN_CONFIG 1 #define SYN_MT_REPORT 2 +#define SYN_DROPPED 3 /* * Keys and buttons From 78530bf7f2559b317c04991b52217c1608d5a58d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 13 Apr 2011 10:31:52 +0300 Subject: [PATCH 068/200] UBIFS: fix oops when R/O file-system is fsync'ed This patch fixes severe UBIFS bug: UBIFS oopses when we 'fsync()' an file on R/O-mounter file-system. We (the UBIFS authors) incorrectly thought that VFS would not propagate 'fsync()' down to the file-system if it is read-only, but this is not the case. It is easy to exploit this bug using the following simple perl script: use strict; use File::Sync qw(fsync sync); die "File path is not specified" if not defined $ARGV[0]; my $path = $ARGV[0]; open FILE, "<", "$path" or die "Cannot open $path: $!"; fsync(\*FILE) or die "cannot fsync $path: $!"; close FILE or die "Cannot close $path: $!"; Thanks to Reuben Dowle for reporting about this issue. Signed-off-by: Artem Bityutskiy Reported-by: Reuben Dowle Cc: stable@kernel.org --- fs/ubifs/file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 28be1e6a65e8..b286db79c686 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1312,6 +1312,9 @@ int ubifs_fsync(struct file *file, int datasync) dbg_gen("syncing inode %lu", inode->i_ino); + if (inode->i_sb->s_flags & MS_RDONLY) + return 0; + /* * VFS has already synchronized dirty pages for this inode. Synchronize * the inode unless this is a 'datasync()' call. From 100d4a9d20015315bf4215d11c2cf4b1f30c33b8 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Tue, 15 Mar 2011 16:24:24 +0100 Subject: [PATCH 069/200] usb: musb: clear AUTOSET while clearing DMAENAB On the completion of tx dma, dma is disabled by clearing MUSB_TXCSR_DMAENAB in TXCSR. If MUSB_TXCSR_AUTOSET was set in txstate() it will remain set although it is not needed in PIO mode. Clear it as soon as it is not needed. Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Felipe Balbi --- drivers/usb/musb/musb_gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 98519c5d8b5c..9ecb057902ef 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -535,7 +535,7 @@ void musb_g_tx(struct musb *musb, u8 epnum) is_dma = 1; csr |= MUSB_TXCSR_P_WZC_BITS; csr &= ~(MUSB_TXCSR_DMAENAB | MUSB_TXCSR_P_UNDERRUN | - MUSB_TXCSR_TXPKTRDY); + MUSB_TXCSR_TXPKTRDY | MUSB_TXCSR_AUTOSET); musb_writew(epio, MUSB_TXCSR, csr); /* Ensure writebuffer is empty. */ csr = musb_readw(epio, MUSB_TXCSR); From 8726606424738ca0341e1bb93ebac956d80f6d29 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Tue, 15 Mar 2011 16:24:29 +0100 Subject: [PATCH 070/200] usb: musb: ux500: copy dma mask from platform device to musb device musb code checks dma mask before calling dma hooks. Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Felipe Balbi --- drivers/usb/musb/ux500.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/musb/ux500.c b/drivers/usb/musb/ux500.c index d6384e4aeef9..f7e04bf34a13 100644 --- a/drivers/usb/musb/ux500.c +++ b/drivers/usb/musb/ux500.c @@ -93,6 +93,8 @@ static int __init ux500_probe(struct platform_device *pdev) } musb->dev.parent = &pdev->dev; + musb->dev.dma_mask = pdev->dev.dma_mask; + musb->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; glue->dev = &pdev->dev; glue->musb = musb; From ec63bf6c06b01ceeb6048a2b9fa9e73060259307 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 20 Mar 2011 14:14:36 +0300 Subject: [PATCH 071/200] USB: musb: add missing unlock in cppi_interrupt() We should unlock before returning here. Signed-off-by: Dan Carpenter Signed-off-by: Felipe Balbi --- drivers/usb/musb/cppi_dma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c index de55a3c3259a..6385eeb44a07 100644 --- a/drivers/usb/musb/cppi_dma.c +++ b/drivers/usb/musb/cppi_dma.c @@ -1167,8 +1167,11 @@ irqreturn_t cppi_interrupt(int irq, void *dev_id) tx = musb_readl(tibase, DAVINCI_TXCPPI_MASKED_REG); rx = musb_readl(tibase, DAVINCI_RXCPPI_MASKED_REG); - if (!tx && !rx) + if (!tx && !rx) { + if (cppi->irq) + spin_unlock_irqrestore(&musb->lock, flags); return IRQ_NONE; + } DBG(4, "CPPI IRQ Tx%x Rx%x\n", tx, rx); From aca7f353219abfb7b8a1530fbba1b1acf0e30da4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 20 Mar 2011 14:15:24 +0300 Subject: [PATCH 072/200] USB: musb: using 0 instead of NULL Sparse complains (and rightly so): drivers/usb/musb/cppi_dma.c:1458:33: warning: Using plain integer as NULL pointer Signed-off-by: Dan Carpenter Signed-off-by: Felipe Balbi --- drivers/usb/musb/cppi_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c index 6385eeb44a07..0db0f5e7d3a4 100644 --- a/drivers/usb/musb/cppi_dma.c +++ b/drivers/usb/musb/cppi_dma.c @@ -1455,7 +1455,7 @@ static int cppi_channel_abort(struct dma_channel *channel) * compare mode by writing 1 to the tx_complete register. */ cppi_reset_tx(tx_ram, 1); - cppi_ch->head = 0; + cppi_ch->head = NULL; musb_writel(&tx_ram->tx_complete, 0, 1); cppi_dump_tx(5, cppi_ch, " (done teardown)"); From 2fbcf3fa43af809ebf4e4ad33c2f0a17e903385c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 20 Mar 2011 14:16:17 +0300 Subject: [PATCH 073/200] USB: musb: silence printk format warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gcc gives the following warnings: drivers/usb/musb/cppi_dma.c: In function ‘cppi_next_tx_segment’: drivers/usb/musb/cppi_dma.c:600: warning: format ‘%x’ expects type ‘unsigned int’, but argument 8 has type ‘dma_addr_t’ drivers/usb/musb/cppi_dma.c: In function ‘cppi_next_rx_segment’: drivers/usb/musb/cppi_dma.c:822: warning: format ‘%x’ expects type ‘unsigned int’, but argument 9 has type ‘dma_addr_t’ drivers/usb/musb/cppi_dma.c: In function ‘cppi_rx_scan’: drivers/usb/musb/cppi_dma.c:1042: warning: format ‘%08x’ expects type ‘unsigned int’, but argument 4 has type ‘dma_addr_t’ drivers/usb/musb/cppi_dma.c:1114: warning: format ‘%08x’ expects type ‘unsigned int’, but argument 7 has type ‘dma_addr_t’ dma_addr_t is sometimes 32 bit and sometimes 64. We normally cast them to unsigned long long for printk(). Signed-off-by: Dan Carpenter Signed-off-by: Felipe Balbi --- drivers/usb/musb/cppi_dma.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c index 0db0f5e7d3a4..eb9161ae57a4 100644 --- a/drivers/usb/musb/cppi_dma.c +++ b/drivers/usb/musb/cppi_dma.c @@ -597,12 +597,12 @@ cppi_next_tx_segment(struct musb *musb, struct cppi_channel *tx) length = min(n_bds * maxpacket, length); } - DBG(4, "TX DMA%d, pktSz %d %s bds %d dma 0x%x len %u\n", + DBG(4, "TX DMA%d, pktSz %d %s bds %d dma 0x%llx len %u\n", tx->index, maxpacket, rndis ? "rndis" : "transparent", n_bds, - addr, length); + (unsigned long long)addr, length); cppi_rndis_update(tx, 0, musb->ctrl_base, rndis); @@ -820,7 +820,7 @@ cppi_next_rx_segment(struct musb *musb, struct cppi_channel *rx, int onepacket) length = min(n_bds * maxpacket, length); DBG(4, "RX DMA%d seg, maxp %d %s bds %d (cnt %d) " - "dma 0x%x len %u %u/%u\n", + "dma 0x%llx len %u %u/%u\n", rx->index, maxpacket, onepacket ? (is_rndis ? "rndis" : "onepacket") @@ -829,7 +829,8 @@ cppi_next_rx_segment(struct musb *musb, struct cppi_channel *rx, int onepacket) musb_readl(tibase, DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4)) & 0xffff, - addr, length, rx->channel.actual_len, rx->buf_len); + (unsigned long long)addr, length, + rx->channel.actual_len, rx->buf_len); /* only queue one segment at a time, since the hardware prevents * correct queue shutdown after unexpected short packets @@ -1039,9 +1040,9 @@ static bool cppi_rx_scan(struct cppi *cppi, unsigned ch) if (!completed && (bd->hw_options & CPPI_OWN_SET)) break; - DBG(5, "C/RXBD %08x: nxt %08x buf %08x " + DBG(5, "C/RXBD %llx: nxt %08x buf %08x " "off.len %08x opt.len %08x (%d)\n", - bd->dma, bd->hw_next, bd->hw_bufp, + (unsigned long long)bd->dma, bd->hw_next, bd->hw_bufp, bd->hw_off_len, bd->hw_options, rx->channel.actual_len); @@ -1111,11 +1112,12 @@ static bool cppi_rx_scan(struct cppi *cppi, unsigned ch) musb_ep_select(cppi->mregs, rx->index + 1); csr = musb_readw(regs, MUSB_RXCSR); if (csr & MUSB_RXCSR_DMAENAB) { - DBG(4, "list%d %p/%p, last %08x%s, csr %04x\n", + DBG(4, "list%d %p/%p, last %llx%s, csr %04x\n", rx->index, rx->head, rx->tail, rx->last_processed - ? rx->last_processed->dma + ? (unsigned long long) + rx->last_processed->dma : 0, completed ? ", completed" : "", csr); From 2e10f5e70f670d981f789075e3ebc394f5bb51e3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 20 Mar 2011 14:18:26 +0300 Subject: [PATCH 074/200] USB: musb: dereferencing an iomem pointer "tx_ram" points to io memory. We can't dereference it directly. Sparse complains about this: "drivers/usb/musb/cppi_dma.c:1205:25: warning: dereference of noderef expression" Signed-off-by: Dan Carpenter Signed-off-by: Felipe Balbi --- drivers/usb/musb/cppi_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c index eb9161ae57a4..ab434fbd8c35 100644 --- a/drivers/usb/musb/cppi_dma.c +++ b/drivers/usb/musb/cppi_dma.c @@ -1204,7 +1204,7 @@ irqreturn_t cppi_interrupt(int irq, void *dev_id) */ if (NULL == bd) { DBG(1, "null BD\n"); - tx_ram->tx_complete = 0; + musb_writel(&tx_ram->tx_complete, 0, 0); continue; } From 7a180e70cfc56e131bfe4796773df2acfc7d4180 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 22 Mar 2011 11:31:37 +0200 Subject: [PATCH 075/200] usb: musb: temporarily make it bool Due to the recent changes to musb's glue layers, we can't compile musb-hdrc as a module - compilation will break due to undefined symbol musb_debug. In order to fix that, we need a big re-work of the debug support on the MUSB driver. Because that would mean a lot of new code coming into the -rc series, it's best to defer that to next merge window and for now just disable module support for MUSB. Once we get the refactor of the debugging support done, we can simply revert this patch and things will go back to normal again. Cc: stable@kernel.org # v2.6.38 Signed-off-by: Felipe Balbi --- drivers/usb/musb/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index 4cbb7e4b368d..74073b363c30 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -14,7 +14,7 @@ config USB_MUSB_HDRC select TWL4030_USB if MACH_OMAP_3430SDP select TWL6030_USB if MACH_OMAP_4430SDP || MACH_OMAP4_PANDA select USB_OTG_UTILS - tristate 'Inventra Highspeed Dual Role Controller (TI, ADI, ...)' + bool 'Inventra Highspeed Dual Role Controller (TI, ADI, ...)' help Say Y here if your system has a dual role high speed USB controller based on the Mentor Graphics silicon IP. Then @@ -30,8 +30,8 @@ config USB_MUSB_HDRC If you do not know what this is, please say N. - To compile this driver as a module, choose M here; the - module will be called "musb-hdrc". +# To compile this driver as a module, choose M here; the +# module will be called "musb-hdrc". choice prompt "Platform Glue Layer" From 3d5ad13eac320292f64071ea7ded1b661edd9430 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 22 Mar 2011 11:38:49 +0200 Subject: [PATCH 076/200] usb: musb: gadget: check the correct list_head We are now using our own list_head, so we should be checking against that, not the gadget driver's list_head. Signed-off-by: Felipe Balbi --- drivers/usb/musb/musb_gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 9ecb057902ef..6dfbf9ffd7a6 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1296,7 +1296,7 @@ static int musb_gadget_dequeue(struct usb_ep *ep, struct usb_request *request) } /* if the hardware doesn't have the request, easy ... */ - if (musb_ep->req_list.next != &request->list || musb_ep->busy) + if (musb_ep->req_list.next != &req->list || musb_ep->busy) musb_g_giveback(musb_ep, request, -ECONNRESET); /* ... else abort the dma transfer ... */ From 4f9edd2d7e8dd170d10780532cb76eb5890468b4 Mon Sep 17 00:00:00 2001 From: Hema HK Date: Tue, 22 Mar 2011 16:02:12 +0530 Subject: [PATCH 077/200] usb: musb: Fix the crash issue during reboot Below crash observed with commit 7acc6197b76edd0b932a7cbcc6cfad0a8a87f026 (usb: musb: Idle path retention and offmode support for OMAP3) during board reboot. The musb clock was disabled when musb_shutdown() was called by platform_drv_shutdown in which there are register accesses. call pm_runtime_get_sync() and pm_runtime_put_sync() in the musb_shutdown function. / # [ 172.368774] Unhandled fault: imprecise external abort (0x1406) at 0x400f0000 [ 172.376190] Internal error: : 1406 [#1] SMP [ 172.380554] last sysfs file: /sys/devices/platform/omap/omap_i2c.4/i2c-4/i2c-dev/i2c-4/dev [ 172.389221] Modules linked in: [ 172.392456] CPU: 0 Tainted: G W (2.6.38-06671-geddecbb #33) [ 172.399475] PC is at do_raw_spin_unlock+0x50/0xc0 [ 172.404418] LR is at _raw_spin_unlock_irqrestore+0x24/0x44 [ 172.410186] pc : [] lr : [] psr: 60000093 [ 172.410186] sp : ee993e40 ip : c0d00240 fp : bea9cf14 [ 172.422241] r10: 00000000 r9 : ee992000 r8 : c04b2fa8 [ 172.427703] r7 : 00000000 r6 : c0fa46c0 r5 : ef966124 r4 : ef966124 [ 172.434539] r3 : ef92cbc0 r2 : ef92cbc0 r1 : 00000000 r0 : ef966124 [ 172.441406] Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment user [ 172.448974] Control: 10c5387d Table: ae8d804a DAC: 00000015 [ 172.454986] Process init (pid: 1094, stack limit = 0xee9922f8) [ 172.461120] Stack: (0xee993e40 to 0xee994000) [ 172.465667] 3e40: a0000013 c085a7f8 ef966124 a0000013 c0fa46c0 c0761ab4 c0761a70 ef95c008 [ 172.474273] 3e60: ef95c014 c06e2fd0 c06e2fbc c06dea90 00000000 01234567 28121969 c04fccb4 [ 172.482849] 3e80: 00000000 c04fcd04 c0a302bc c04fce44 c0a02600 00000001 00000000 c085cd04 [ 172.491424] 3ea0: 00000000 00000002 c09ea000 c085afc0 ee993f24 00000000 00040001 00000445 [ 172.499999] 3ec0: a8eb9d34 00000027 00000000 00000000 00000000 c0a56a4c 00000000 00000000 [ 172.508575] 3ee0: 00000002 60000093 00000000 c0519aac 00000002 00000080 00000000 c0550420 [ 172.517150] 3f00: 00000000 00000002 ee970000 c0a56a3c c0a56a20 00000002 c0a56a3c 00000000 [ 172.525726] 3f20: c0a56a3c 0000000a c1580e00 c0a56a20 00000002 c0a56a3c c1580e00 c0a56a20 [ 172.534301] 3f40: ef92cbc0 c05173a0 00000001 ef92cbc0 c0576190 c04e3174 20000013 c0517324 [ 172.542877] 3f60: ef815c00 ee90b720 c04e3174 c0576190 00000001 ef92cbc0 c04b2f00 ffffffff [ 172.551483] 3f80: 00000058 c0517324 00000000 00000000 ffffffff 00000000 00000000 ffffffff [ 172.560058] 3fa0: 00000058 c04b2de0 00000000 00000000 fee1dead 28121969 01234567 00000000 [ 172.568634] 3fc0: 00000000 00000000 ffffffff 00000058 00000000 00000001 400aa000 bea9cf14 [ 172.577209] 3fe0: 000ea148 bea9c958 000aa750 40225728 60000010 fee1dead 00000000 00000000 [ 172.585784] [] (do_raw_spin_unlock+0x50/0xc0) from [] (_raw_spin_unlock_irqrestore+0x24/0x44) [ 172.596588] [] (_raw_spin_unlock_irqrestore+0x24/0x44) from [] (musb_shutdown+0x44/0x88) [ 172.606933] [] (musb_shutdown+0x44/0x88) from [] (platform_drv_shutdown+0x14/0x18) [ 172.616699] [] (platform_drv_shutdown+0x14/0x18) from [] (device_shutdown+0x74/0xb4) [ 172.626647] [] (device_shutdown+0x74/0xb4) from [] (kernel_restart_prepare+0x24/0x38) [ 172.636688] [] (kernel_restart_prepare+0x24/0x38) from [] (kernel_restart+0xc/0x48) [ 172.646545] [] (kernel_restart+0xc/0x48) from [] (sys_reboot+0xfc/0x1d8) [ 172.655426] [] (sys_reboot+0xfc/0x1d8) from [] (ret_fast_syscall+0x0/0x3c) [ 172.664459] Code: e3c3303f e594200c e593300c e1520003 (0a000002) [ 172.670867] ------------[ cut here ]------------ Signed-off-by: Hema HK Signed-off-by: Felipe Balbi --- drivers/usb/musb/musb_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 630ae7f3cd4c..f10ff00ca09e 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1030,6 +1030,7 @@ static void musb_shutdown(struct platform_device *pdev) struct musb *musb = dev_to_musb(&pdev->dev); unsigned long flags; + pm_runtime_get_sync(musb->controller); spin_lock_irqsave(&musb->lock, flags); musb_platform_disable(musb); musb_generic_disable(musb); @@ -1040,6 +1041,7 @@ static void musb_shutdown(struct platform_device *pdev) musb_writeb(musb->mregs, MUSB_DEVCTL, 0); musb_platform_exit(musb); + pm_runtime_put(musb->controller); /* FIXME power down */ } From 132543074af3cf1e94e3608abf162880edbdcbb3 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 30 Mar 2011 22:48:54 -0400 Subject: [PATCH 078/200] USB: musb: blackfin: work around anomaly 05000450 DMA mode 1 data corruption anomaly on Blackfin systems. This issue is specific to the Blackfin silicon as the bug appears to be related to the connection of the musb ip to the bus/dma fabric. Data corruption when using USB DMA mode 1. (Issue manager 17-01-0105) DMA mode 1 allows large size transfers to generate a single interrupt at the end of the entire transfer. The transfer is split up in packets of length specified in the Maximum Packet Size field for that endpoint. If the transfer size is not an integer multiple of the Maximum Packet Size, a short packet will be present at the end of the transfer. Under certain conditions this packet may be corrupted in the USB FIFO. Workaround: Use DMA mode 1 to transfer (n* Maximum Packet Size) and schedule DMA mode 0 to transfer the short packet. As an example if your transfer size is 33168 bytes and Maximum Packet Size equals 512, schedule [33168 - (33168 mod 512)] in DMA mode 1 and the remainder (33168 mod 512) in DMA mode 0. Signed-off-by: Mike Frysinger Signed-off-by: Felipe Balbi --- drivers/usb/musb/blackfin.c | 24 ++++++++++++++++++++++++ drivers/usb/musb/musb_core.h | 5 +++++ drivers/usb/musb/musbhsdma.c | 8 ++++++++ 3 files changed, 37 insertions(+) diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c index 52312e8af213..8e2a1ff8a35a 100644 --- a/drivers/usb/musb/blackfin.c +++ b/drivers/usb/musb/blackfin.c @@ -21,6 +21,7 @@ #include #include "musb_core.h" +#include "musbhsdma.h" #include "blackfin.h" struct bfin_glue { @@ -332,6 +333,27 @@ static int bfin_musb_set_mode(struct musb *musb, u8 musb_mode) return -EIO; } +static int bfin_musb_adjust_channel_params(struct dma_channel *channel, + u16 packet_sz, u8 *mode, + dma_addr_t *dma_addr, u32 *len) +{ + struct musb_dma_channel *musb_channel = channel->private_data; + + /* + * Anomaly 05000450 might cause data corruption when using DMA + * MODE 1 transmits with short packet. So to work around this, + * we truncate all MODE 1 transfers down to a multiple of the + * max packet size, and then do the last short packet transfer + * (if there is any) using MODE 0. + */ + if (ANOMALY_05000450) { + if (musb_channel->transmit && *mode == 1) + *len = *len - (*len % packet_sz); + } + + return 0; +} + static void bfin_musb_reg_init(struct musb *musb) { if (ANOMALY_05000346) { @@ -430,6 +452,8 @@ static const struct musb_platform_ops bfin_ops = { .vbus_status = bfin_musb_vbus_status, .set_vbus = bfin_musb_set_vbus, + + .adjust_channel_params = bfin_musb_adjust_channel_params, }; static u64 bfin_dmamask = DMA_BIT_MASK(32); diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index 4bd9e2145ee4..0e053b587960 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -261,6 +261,7 @@ enum musb_g_ep0_state { * @try_ilde: tries to idle the IP * @vbus_status: returns vbus status if possible * @set_vbus: forces vbus status + * @channel_program: pre check for standard dma channel_program func */ struct musb_platform_ops { int (*init)(struct musb *musb); @@ -274,6 +275,10 @@ struct musb_platform_ops { int (*vbus_status)(struct musb *musb); void (*set_vbus)(struct musb *musb, int on); + + int (*adjust_channel_params)(struct dma_channel *channel, + u16 packet_sz, u8 *mode, + dma_addr_t *dma_addr, u32 *len); }; /* diff --git a/drivers/usb/musb/musbhsdma.c b/drivers/usb/musb/musbhsdma.c index 0144a2d481fd..d281792db05c 100644 --- a/drivers/usb/musb/musbhsdma.c +++ b/drivers/usb/musb/musbhsdma.c @@ -169,6 +169,14 @@ static int dma_channel_program(struct dma_channel *channel, BUG_ON(channel->status == MUSB_DMA_STATUS_UNKNOWN || channel->status == MUSB_DMA_STATUS_BUSY); + /* Let targets check/tweak the arguments */ + if (musb->ops->adjust_channel_params) { + int ret = musb->ops->adjust_channel_params(channel, + packet_sz, &mode, &dma_addr, &len); + if (ret) + return ret; + } + /* * The DMA engine in RTL1.8 and above cannot handle * DMA addresses that are not aligned to a 4 byte boundary. From 1dcffad74183bb00e8129ba1c5bb2c9931d31bd7 Mon Sep 17 00:00:00 2001 From: Maksim Rayskiy Date: Tue, 12 Apr 2011 15:14:56 -0700 Subject: [PATCH 079/200] UBIFS: fix compilation warnings when compiling with gcc 4.5 When compiling UBIFS with CONFIG_UBIFS_FS_DEBUG not set, gcc-4.5.2 generates a slew of "warning: statement with no effect" on references to non-void functions defined as 0. To avoid these warnings, replace #defines with dummy inline functions. Artem: massage the patch a bit, also remove the duplicate 'dbg_check_lprops()' prototype. Signed-off-by: Maksim Rayskiy Acked-by: Mike Frysinger Signed-off-by: Artem Bityutskiy --- fs/ubifs/debug.h | 146 +++++++++++++++++++++++++++++------------------ 1 file changed, 91 insertions(+), 55 deletions(-) diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 919f0de29d8f..e6493cac193d 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -23,6 +23,12 @@ #ifndef __UBIFS_DEBUG_H__ #define __UBIFS_DEBUG_H__ +/* Checking helper functions */ +typedef int (*dbg_leaf_callback)(struct ubifs_info *c, + struct ubifs_zbranch *zbr, void *priv); +typedef int (*dbg_znode_callback)(struct ubifs_info *c, + struct ubifs_znode *znode, void *priv); + #ifdef CONFIG_UBIFS_FS_DEBUG /** @@ -270,11 +276,6 @@ void dbg_dump_tnc(struct ubifs_info *c); void dbg_dump_index(struct ubifs_info *c); void dbg_dump_lpt_lebs(const struct ubifs_info *c); -/* Checking helper functions */ -typedef int (*dbg_leaf_callback)(struct ubifs_info *c, - struct ubifs_zbranch *zbr, void *priv); -typedef int (*dbg_znode_callback)(struct ubifs_info *c, - struct ubifs_znode *znode, void *priv); int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, dbg_znode_callback znode_cb, void *priv); @@ -295,7 +296,6 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); int dbg_check_filesystem(struct ubifs_info *c); void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, int add_pos); -int dbg_check_lprops(struct ubifs_info *c); int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, int row, int col); int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, @@ -401,58 +401,94 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); #define DBGKEY(key) ((char *)(key)) #define DBGKEY1(key) ((char *)(key)) -#define ubifs_debugging_init(c) 0 -#define ubifs_debugging_exit(c) ({}) +static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; } +static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; } +static inline const char *dbg_ntype(int type) { return ""; } +static inline const char *dbg_cstate(int cmt_state) { return ""; } +static inline const char *dbg_jhead(int jhead) { return ""; } +static inline const char * +dbg_get_key_dump(const struct ubifs_info *c, + const union ubifs_key *key) { return ""; } +static inline void dbg_dump_inode(const struct ubifs_info *c, + const struct inode *inode) { return; } +static inline void dbg_dump_node(const struct ubifs_info *c, + const void *node) { return; } +static inline void dbg_dump_lpt_node(const struct ubifs_info *c, + void *node, int lnum, + int offs) { return; } +static inline void +dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; } +static inline void +dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; } +static inline void dbg_dump_budg(struct ubifs_info *c) { return; } +static inline void dbg_dump_lprop(const struct ubifs_info *c, + const struct ubifs_lprops *lp) { return; } +static inline void dbg_dump_lprops(struct ubifs_info *c) { return; } +static inline void dbg_dump_lpt_info(struct ubifs_info *c) { return; } +static inline void dbg_dump_leb(const struct ubifs_info *c, + int lnum) { return; } +static inline void +dbg_dump_znode(const struct ubifs_info *c, + const struct ubifs_znode *znode) { return; } +static inline void dbg_dump_heap(struct ubifs_info *c, + struct ubifs_lpt_heap *heap, + int cat) { return; } +static inline void dbg_dump_pnode(struct ubifs_info *c, + struct ubifs_pnode *pnode, + struct ubifs_nnode *parent, + int iip) { return; } +static inline void dbg_dump_tnc(struct ubifs_info *c) { return; } +static inline void dbg_dump_index(struct ubifs_info *c) { return; } +static inline void dbg_dump_lpt_lebs(const struct ubifs_info *c) { return; } -#define dbg_ntype(type) "" -#define dbg_cstate(cmt_state) "" -#define dbg_jhead(jhead) "" -#define dbg_get_key_dump(c, key) ({}) -#define dbg_dump_inode(c, inode) ({}) -#define dbg_dump_node(c, node) ({}) -#define dbg_dump_lpt_node(c, node, lnum, offs) ({}) -#define dbg_dump_budget_req(req) ({}) -#define dbg_dump_lstats(lst) ({}) -#define dbg_dump_budg(c) ({}) -#define dbg_dump_lprop(c, lp) ({}) -#define dbg_dump_lprops(c) ({}) -#define dbg_dump_lpt_info(c) ({}) -#define dbg_dump_leb(c, lnum) ({}) -#define dbg_dump_znode(c, znode) ({}) -#define dbg_dump_heap(c, heap, cat) ({}) -#define dbg_dump_pnode(c, pnode, parent, iip) ({}) -#define dbg_dump_tnc(c) ({}) -#define dbg_dump_index(c) ({}) -#define dbg_dump_lpt_lebs(c) ({}) +static inline int dbg_walk_index(struct ubifs_info *c, + dbg_leaf_callback leaf_cb, + dbg_znode_callback znode_cb, + void *priv) { return 0; } +static inline void dbg_save_space_info(struct ubifs_info *c) { return; } +static inline int dbg_check_space_info(struct ubifs_info *c) { return 0; } +static inline int dbg_check_lprops(struct ubifs_info *c) { return 0; } +static inline int +dbg_old_index_check_init(struct ubifs_info *c, + struct ubifs_zbranch *zroot) { return 0; } +static inline int +dbg_check_old_index(struct ubifs_info *c, + struct ubifs_zbranch *zroot) { return 0; } +static inline int dbg_check_cats(struct ubifs_info *c) { return 0; } +static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; } +static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; } +static inline int dbg_chk_lpt_sz(struct ubifs_info *c, + int action, int len) { return 0; } +static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; } +static inline int dbg_check_dir_size(struct ubifs_info *c, + const struct inode *dir) { return 0; } +static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; } +static inline int dbg_check_idx_size(struct ubifs_info *c, + long long idx_size) { return 0; } +static inline int dbg_check_filesystem(struct ubifs_info *c) { return 0; } +static inline void dbg_check_heap(struct ubifs_info *c, + struct ubifs_lpt_heap *heap, + int cat, int add_pos) { return; } +static inline int dbg_check_lpt_nodes(struct ubifs_info *c, + struct ubifs_cnode *cnode, int row, int col) { return 0; } +static inline int dbg_check_inode_size(struct ubifs_info *c, + const struct inode *inode, + loff_t size) { return 0; } +static inline int +dbg_check_data_nodes_order(struct ubifs_info *c, + struct list_head *head) { return 0; } +static inline int +dbg_check_nondata_nodes_order(struct ubifs_info *c, + struct list_head *head) { return 0; } -#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 -#define dbg_old_index_check_init(c, zroot) 0 -#define dbg_save_space_info(c) ({}) -#define dbg_check_space_info(c) 0 -#define dbg_check_old_index(c, zroot) 0 -#define dbg_check_cats(c) 0 -#define dbg_check_ltab(c) 0 -#define dbg_chk_lpt_free_spc(c) 0 -#define dbg_chk_lpt_sz(c, action, len) 0 -#define dbg_check_synced_i_size(inode) 0 -#define dbg_check_dir_size(c, dir) 0 -#define dbg_check_tnc(c, x) 0 -#define dbg_check_idx_size(c, idx_size) 0 -#define dbg_check_filesystem(c) 0 -#define dbg_check_heap(c, heap, cat, add_pos) ({}) -#define dbg_check_lprops(c) 0 -#define dbg_check_lpt_nodes(c, cnode, row, col) 0 -#define dbg_check_inode_size(c, inode, size) 0 -#define dbg_check_data_nodes_order(c, head) 0 -#define dbg_check_nondata_nodes_order(c, head) 0 -#define dbg_force_in_the_gaps_enabled 0 -#define dbg_force_in_the_gaps() 0 -#define dbg_failure_mode 0 +static inline int dbg_force_in_the_gaps(void) { return 0; } +#define dbg_force_in_the_gaps_enabled 0 +#define dbg_failure_mode 0 -#define dbg_debugfs_init() 0 -#define dbg_debugfs_exit() -#define dbg_debugfs_init_fs(c) 0 -#define dbg_debugfs_exit_fs(c) 0 +static inline int dbg_debugfs_init(void) { return 0; } +static inline void dbg_debugfs_exit(void) { return; } +static inline int dbg_debugfs_init_fs(struct ubifs_info *c) { return 0; } +static inline int dbg_debugfs_exit_fs(struct ubifs_info *c) { return 0; } #endif /* !CONFIG_UBIFS_FS_DEBUG */ #endif /* !__UBIFS_DEBUG_H__ */ From 80656b67b3988f83edd86a280d9937124fe62050 Mon Sep 17 00:00:00 2001 From: Liu Yuan Date: Wed, 13 Apr 2011 22:14:54 +0200 Subject: [PATCH 080/200] block, blk-sysfs: Use the variable directly instead of a function call In the function blk_register_queue(), var _dev_ is already assigned by disk_to_dev().So use it directly instead of calling disk_to_dev() again. Signed-off-by: Liu Yuan Modified by me to delete an empty line in the same function while in there anyway. Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 261c75c665ae..6d735122bc59 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -498,7 +498,6 @@ int blk_register_queue(struct gendisk *disk) { int ret; struct device *dev = disk_to_dev(disk); - struct request_queue *q = disk->queue; if (WARN_ON(!q)) @@ -521,7 +520,7 @@ int blk_register_queue(struct gendisk *disk) if (ret) { kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); - blk_trace_remove_sysfs(disk_to_dev(disk)); + blk_trace_remove_sysfs(dev); kobject_put(&dev->kobj); return ret; } From d06847fec256f4f902075ce5986e10f7c55fa250 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 15 Mar 2011 17:09:10 -0700 Subject: [PATCH 081/200] usb: fix ips1760-hcd printk format warning Fix printk format build warning and grammar typo on same line. drivers/usb/host/isp1760-hcd.c:300: warning: format '%lu' expects type 'long unsigned int', but argument 4 has type 'size_t' Signed-off-by: Randy Dunlap Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/isp1760-hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c index f50e84ac570a..795345ad45e6 100644 --- a/drivers/usb/host/isp1760-hcd.c +++ b/drivers/usb/host/isp1760-hcd.c @@ -295,7 +295,7 @@ static void alloc_mem(struct usb_hcd *hcd, struct isp1760_qtd *qtd) } dev_err(hcd->self.controller, - "%s: Can not allocate %lu bytes of memory\n" + "%s: Cannot allocate %zu bytes of memory\n" "Current memory map:\n", __func__, qtd->length); for (i = 0; i < BLOCKS; i++) { From 00cc7a5faf25b3ba5cf30fcffc62249bdd152006 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Wed, 16 Mar 2011 21:54:05 +0200 Subject: [PATCH 082/200] USB: usb-gadget: unlock data->lock mutex on error path in ep_read() ep_read() acquires data->lock mutex in get_ready_ep() and releases it on all paths except for one: when usb_endpoint_xfer_isoc() failed. The patch adds mutex_unlock(&data->lock) at that path. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 3ed73f49cf18..a01383f71f38 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -386,8 +386,10 @@ ep_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) /* halt any endpoint by doing a "wrong direction" i/o call */ if (usb_endpoint_dir_in(&data->desc)) { - if (usb_endpoint_xfer_isoc(&data->desc)) + if (usb_endpoint_xfer_isoc(&data->desc)) { + mutex_unlock(&data->lock); return -EINVAL; + } DBG (data->dev, "%s halt\n", data->name); spin_lock_irq (&data->dev->lock); if (likely (data->ep != NULL)) From 0291303d37585ab9056bae9fac1d1e13ae1ce913 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Mon, 4 Apr 2011 11:35:05 +0200 Subject: [PATCH 083/200] usb: Fix Kconfig unmet dependencies for Microblaze EHCI Disable USB_ARCH_HAS_EHCI in arch Kconfig and enable it in usb Kconfig Warning log: warning: (MICROBLAZE) selects USB_ARCH_HAS_EHCI which has unmet direct dependencies (USB_SUPPORT) Signed-off-by: Michal Simek Signed-off-by: Greg Kroah-Hartman --- arch/microblaze/Kconfig | 1 - drivers/usb/Kconfig | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 851b3bf6e962..eccdefe70d4e 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -6,7 +6,6 @@ config MICROBLAZE select HAVE_FUNCTION_GRAPH_TRACER select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD - select USB_ARCH_HAS_EHCI select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_OPROFILE select HAVE_ARCH_KGDB diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig index 41b6e51188e4..006489d82dc3 100644 --- a/drivers/usb/Kconfig +++ b/drivers/usb/Kconfig @@ -66,6 +66,7 @@ config USB_ARCH_HAS_EHCI default y if ARCH_VT8500 default y if PLAT_SPEAR default y if ARCH_MSM + default y if MICROBLAZE default PCI # ARM SA1111 chips have a non-PCI based "OHCI-compatible" USB host interface. From 2c2da1799ba776c4bd8d51ee46d9f00cb4cc6120 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 4 Apr 2011 13:09:22 +0900 Subject: [PATCH 084/200] usb: r8a66597-udc: fix spinlock usage Because the disconnect function in the composite driver will call spin_lock, this driver has to call spin_unlock before calling driver->disconnet(). Signed-off-by: Yoshihiro Shimoda Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/r8a66597-udc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/r8a66597-udc.c b/drivers/usb/gadget/r8a66597-udc.c index 015118535f77..6dcc1f68fa60 100644 --- a/drivers/usb/gadget/r8a66597-udc.c +++ b/drivers/usb/gadget/r8a66597-udc.c @@ -1083,7 +1083,9 @@ static void irq_device_state(struct r8a66597 *r8a66597) if (dvsq == DS_DFLT) { /* bus reset */ + spin_unlock(&r8a66597->lock); r8a66597->driver->disconnect(&r8a66597->gadget); + spin_lock(&r8a66597->lock); r8a66597_update_usb_speed(r8a66597); } if (r8a66597->old_dvsq == DS_CNFG && dvsq != DS_CNFG) From 5a9443f08c83c294c5c806a689c1184b27cb26b3 Mon Sep 17 00:00:00 2001 From: Christian Simon Date: Mon, 28 Mar 2011 21:54:47 +0200 Subject: [PATCH 085/200] USB: ftdi_sio: Added IDs for CTI USB Serial Devices I added new ProdutIds for two devices from CTI GmbH Leipzig. Signed-off-by: Christian Simon Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 2 ++ drivers/usb/serial/ftdi_sio_ids.h | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index a973c7a29d6e..8a02407bc41f 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -151,6 +151,8 @@ static struct ftdi_sio_quirk ftdi_stmclite_quirk = { * /sys/bus/usb/ftdi_sio/new_id, then send patch/report! */ static struct usb_device_id id_table_combined [] = { + { USB_DEVICE(FTDI_VID, FTDI_CTI_MINI_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_CTI_NANO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_AMC232_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CANUSB_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CANDAPTER_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index c543e55bafba..770b5dd7114d 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1141,3 +1141,12 @@ #define QIHARDWARE_VID 0x20B7 #define MILKYMISTONE_JTAGSERIAL_PID 0x0713 +/* + * CTI GmbH RS485 Converter http://www.cti-lean.com/ + */ +/* USB-485-Mini*/ +#define FTDI_CTI_MINI_PID 0xF608 +/* USB-Nano-485*/ +#define FTDI_CTI_NANO_PID 0xF60B + + From 505d1f69ec4f8697a74711fb3a01ed151fda3834 Mon Sep 17 00:00:00 2001 From: Yauheni Kaliuta Date: Tue, 5 Apr 2011 16:55:25 +0300 Subject: [PATCH 086/200] usb: gadget: eem: fix echo command processing During processing of bunch of eem frames if "echo" command is found skb is cloned and the cloned version should be used to send reply. Unfortunately, the data of the original skb were actually used and the cloned skb is never freed. Using the cloned skb and freeing the skb in the completion callback for usb request. Signed-off-by: Yauheni Kaliuta Reviewed-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/f_eem.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/f_eem.c b/drivers/usb/gadget/f_eem.c index 95dd4662d6a8..b3c304290150 100644 --- a/drivers/usb/gadget/f_eem.c +++ b/drivers/usb/gadget/f_eem.c @@ -314,6 +314,9 @@ eem_unbind(struct usb_configuration *c, struct usb_function *f) static void eem_cmd_complete(struct usb_ep *ep, struct usb_request *req) { + struct sk_buff *skb = (struct sk_buff *)req->context; + + dev_kfree_skb_any(skb); } /* @@ -428,10 +431,11 @@ static int eem_unwrap(struct gether *port, skb_trim(skb2, len); put_unaligned_le16(BIT(15) | BIT(11) | len, skb_push(skb2, 2)); - skb_copy_bits(skb, 0, req->buf, skb->len); - req->length = skb->len; + skb_copy_bits(skb2, 0, req->buf, skb2->len); + req->length = skb2->len; req->complete = eem_cmd_complete; req->zero = 1; + req->context = skb2; if (usb_ep_queue(port->in_ep, req, GFP_ATOMIC)) DBG(cdev, "echo response queue fail\n"); break; From 94ae4976e253757e9b03a44d27d41b20f1829d80 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 5 Apr 2011 13:36:15 -0400 Subject: [PATCH 087/200] USB: EHCI: unlink unused QHs when the controller is stopped This patch (as1458) fixes a problem affecting ultra-reliable systems: When hardware failover of an EHCI controller occurs, the data structures do not get released correctly. This is because the routine responsible for removing unused QHs from the async schedule assumes the controller is running properly (the frame counter is used in determining how long the QH has been idle) -- but when a failover causes the controller to be electronically disconnected from the PCI bus, obviously it stops running. The solution is simple: Allow scan_async() to remove a QH from the async schedule if it has been idle for long enough _or_ if the controller is stopped. Signed-off-by: Alan Stern Reported-and-Tested-by: Dan Duval CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-q.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c index 98ded66e8d3f..42abd0f603bf 100644 --- a/drivers/usb/host/ehci-q.c +++ b/drivers/usb/host/ehci-q.c @@ -1247,24 +1247,27 @@ static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh) static void scan_async (struct ehci_hcd *ehci) { + bool stopped; struct ehci_qh *qh; enum ehci_timer_action action = TIMER_IO_WATCHDOG; ehci->stamp = ehci_readl(ehci, &ehci->regs->frame_index); timer_action_done (ehci, TIMER_ASYNC_SHRINK); rescan: + stopped = !HC_IS_RUNNING(ehci_to_hcd(ehci)->state); qh = ehci->async->qh_next.qh; if (likely (qh != NULL)) { do { /* clean any finished work for this qh */ - if (!list_empty (&qh->qtd_list) - && qh->stamp != ehci->stamp) { + if (!list_empty(&qh->qtd_list) && (stopped || + qh->stamp != ehci->stamp)) { int temp; /* unlinks could happen here; completion * reporting drops the lock. rescan using * the latest schedule, but don't rescan - * qhs we already finished (no looping). + * qhs we already finished (no looping) + * unless the controller is stopped. */ qh = qh_get (qh); qh->stamp = ehci->stamp; @@ -1285,9 +1288,9 @@ rescan: */ if (list_empty(&qh->qtd_list) && qh->qh_state == QH_STATE_LINKED) { - if (!ehci->reclaim - && ((ehci->stamp - qh->stamp) & 0x1fff) - >= (EHCI_SHRINK_FRAMES * 8)) + if (!ehci->reclaim && (stopped || + ((ehci->stamp - qh->stamp) & 0x1fff) + >= EHCI_SHRINK_FRAMES * 8)) start_unlink_async(ehci, qh); else action = TIMER_ASYNC_SHRINK; From 36a52c009a39049893c9a3a4091f8f8f48585f47 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 5 Apr 2011 19:50:34 +0200 Subject: [PATCH 088/200] usb: musb: omap2430: fix build failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix build failure introduced by commit 7acc6197b76edd0b932a7cbcc6cfad0a8a87f026 (usb: musb: Idle path retention and offmode support for OMAP3) when building without gadget support. CC drivers/usb/musb/omap2430.o drivers/usb/musb/omap2430.c: In function ‘musb_otg_notifications’: drivers/usb/musb/omap2430.c:262: error: ‘struct musb’ has no member named ‘gadget_driver’ Signed-off-by: Johan Hovold Cc: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/omap2430.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 25cb8b0003b1..57a27fa954b4 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -259,9 +259,10 @@ static int musb_otg_notifications(struct notifier_block *nb, case USB_EVENT_VBUS: DBG(4, "VBUS Connect\n"); +#ifdef CONFIG_USB_GADGET_MUSB_HDRC if (musb->gadget_driver) pm_runtime_get_sync(musb->controller); - +#endif otg_init(musb->xceiv); break; From 11a31d84129dc3133417d626643d714c9df5317e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 8 Apr 2011 17:38:22 +0200 Subject: [PATCH 089/200] USB: ftdi_sio: add PID for OCT DK201 docking station Add PID 0x0103 for serial port of the OCT DK201 docking station. Reported-by: Jan Hoogenraad Signed-off-by: Johan Hovold Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/ftdi_sio_ids.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 8a02407bc41f..51b780134f59 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -527,6 +527,7 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_8_PID) }, { USB_DEVICE(IDTECH_VID, IDTECH_IDT1221U_PID) }, { USB_DEVICE(OCT_VID, OCT_US101_PID) }, + { USB_DEVICE(OCT_VID, OCT_DK201_PID) }, { USB_DEVICE(FTDI_VID, FTDI_HE_TIRA1_PID), .driver_info = (kernel_ulong_t)&ftdi_HE_TIRA1_quirk }, { USB_DEVICE(FTDI_VID, FTDI_USB_UIRT_PID), diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 770b5dd7114d..0bdba7a96293 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -572,6 +572,7 @@ /* Note: OCT US101 is also rebadged as Dick Smith Electronics (NZ) XH6381 */ /* Also rebadged as Dick Smith Electronics (Aus) XH6451 */ /* Also rebadged as SIIG Inc. model US2308 hardware version 1 */ +#define OCT_DK201_PID 0x0103 /* OCT DK201 USB docking station */ #define OCT_US101_PID 0x0421 /* OCT US101 USB to RS-232 */ /* From 0fdf65c3394358a3634ad1064ea4984a03a6aa07 Mon Sep 17 00:00:00 2001 From: Yoichi Yuasa Date: Mon, 11 Apr 2011 21:56:39 +0900 Subject: [PATCH 090/200] USB: ohci-au1xxx: fix warning "__BIG_ENDIAN" is not defined In file included from drivers/usb/host/ohci-hcd.c:1028:0: drivers/usb/host/ohci-au1xxx.c:36:7: warning: "__BIG_ENDIAN" is not defined Signed-off-by: Yoichi Yuasa Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-au1xxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/ohci-au1xxx.c b/drivers/usb/host/ohci-au1xxx.c index 17a6043c1fa0..958d985f2951 100644 --- a/drivers/usb/host/ohci-au1xxx.c +++ b/drivers/usb/host/ohci-au1xxx.c @@ -33,7 +33,7 @@ #ifdef __LITTLE_ENDIAN #define USBH_ENABLE_INIT (USBH_ENABLE_CE | USBH_ENABLE_E | USBH_ENABLE_C) -#elif __BIG_ENDIAN +#elif defined(__BIG_ENDIAN) #define USBH_ENABLE_INIT (USBH_ENABLE_CE | USBH_ENABLE_E | USBH_ENABLE_C | \ USBH_ENABLE_BE) #else From 16a2f970f3b4beb8d21009b2c45b9b5ab56bb621 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 11 Apr 2011 20:44:30 +0200 Subject: [PATCH 091/200] usb/gadget: don't leak hs_descriptors We should free both descriptors. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/f_audio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/f_audio.c b/drivers/usb/gadget/f_audio.c index 9abecfddb27d..0111f8a9cf7f 100644 --- a/drivers/usb/gadget/f_audio.c +++ b/drivers/usb/gadget/f_audio.c @@ -706,6 +706,7 @@ f_audio_unbind(struct usb_configuration *c, struct usb_function *f) struct f_audio *audio = func_to_audio(f); usb_free_descriptors(f->descriptors); + usb_free_descriptors(f->hs_descriptors); kfree(audio); } From 9ab7927bb845cf2549110b19c212fe44a2bfbacb Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 13 Apr 2011 08:38:16 +0200 Subject: [PATCH 092/200] USB host: Fix lockdep warning in AMD PLL quirk Booting latest kernel on my test machine produces a lockdep warning from the usb_amd_find_chipset_info() function: WARNING: at /data/lemmy/linux.trees.git/kernel/lockdep.c:2465 lockdep_trace_alloc+0x95/0xc2() Hardware name: Snook Modules linked in: Pid: 959, comm: work_for_cpu Not tainted 2.6.39-rc2+ #22 Call Trace: [] warn_slowpath_common+0x80/0x98 [] ? T.492+0x24/0x26 [] warn_slowpath_null+0x15/0x17 [] lockdep_trace_alloc+0x95/0xc2 [] slab_pre_alloc_hook+0x18/0x3b [] kmem_cache_alloc_trace+0x25/0xba [] T.492+0x24/0x26 [] pci_get_subsys+0x2e/0x73 [] pci_get_device+0x11/0x13 [] usb_amd_find_chipset_info+0x3f/0x18a ... It turns out that this function calls pci_get_device under a spin_lock with irqs disabled, but the pci_get_device function is only allowed in preemptible context. This patch fixes the warning by making all data-structure modifications on temporal storage and commiting this back into the visible structure at the end. While at it, this patch also moves the pci_dev_put calls out of the spinlocks because this function might sleep too. Signed-off-by: Joerg Roedel Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/pci-quirks.c | 117 +++++++++++++++++++++------------- 1 file changed, 74 insertions(+), 43 deletions(-) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 1d586d4f7b56..9b166d70ae91 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -84,65 +84,92 @@ int usb_amd_find_chipset_info(void) { u8 rev = 0; unsigned long flags; + struct amd_chipset_info info; + int ret; spin_lock_irqsave(&amd_lock, flags); - amd_chipset.probe_count++; /* probe only once */ - if (amd_chipset.probe_count > 1) { + if (amd_chipset.probe_count > 0) { + amd_chipset.probe_count++; spin_unlock_irqrestore(&amd_lock, flags); return amd_chipset.probe_result; } + memset(&info, 0, sizeof(info)); + spin_unlock_irqrestore(&amd_lock, flags); - amd_chipset.smbus_dev = pci_get_device(PCI_VENDOR_ID_ATI, 0x4385, NULL); - if (amd_chipset.smbus_dev) { - rev = amd_chipset.smbus_dev->revision; + info.smbus_dev = pci_get_device(PCI_VENDOR_ID_ATI, 0x4385, NULL); + if (info.smbus_dev) { + rev = info.smbus_dev->revision; if (rev >= 0x40) - amd_chipset.sb_type = 1; + info.sb_type = 1; else if (rev >= 0x30 && rev <= 0x3b) - amd_chipset.sb_type = 3; + info.sb_type = 3; } else { - amd_chipset.smbus_dev = pci_get_device(PCI_VENDOR_ID_AMD, - 0x780b, NULL); - if (!amd_chipset.smbus_dev) { - spin_unlock_irqrestore(&amd_lock, flags); - return 0; + info.smbus_dev = pci_get_device(PCI_VENDOR_ID_AMD, + 0x780b, NULL); + if (!info.smbus_dev) { + ret = 0; + goto commit; } - rev = amd_chipset.smbus_dev->revision; + + rev = info.smbus_dev->revision; if (rev >= 0x11 && rev <= 0x18) - amd_chipset.sb_type = 2; + info.sb_type = 2; } - if (amd_chipset.sb_type == 0) { - if (amd_chipset.smbus_dev) { - pci_dev_put(amd_chipset.smbus_dev); - amd_chipset.smbus_dev = NULL; + if (info.sb_type == 0) { + if (info.smbus_dev) { + pci_dev_put(info.smbus_dev); + info.smbus_dev = NULL; } - spin_unlock_irqrestore(&amd_lock, flags); - return 0; + ret = 0; + goto commit; } - amd_chipset.nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x9601, NULL); - if (amd_chipset.nb_dev) { - amd_chipset.nb_type = 1; + info.nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x9601, NULL); + if (info.nb_dev) { + info.nb_type = 1; } else { - amd_chipset.nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, - 0x1510, NULL); - if (amd_chipset.nb_dev) { - amd_chipset.nb_type = 2; - } else { - amd_chipset.nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, - 0x9600, NULL); - if (amd_chipset.nb_dev) - amd_chipset.nb_type = 3; + info.nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1510, NULL); + if (info.nb_dev) { + info.nb_type = 2; + } else { + info.nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, + 0x9600, NULL); + if (info.nb_dev) + info.nb_type = 3; } } - amd_chipset.probe_result = 1; + ret = info.probe_result = 1; printk(KERN_DEBUG "QUIRK: Enable AMD PLL fix\n"); - spin_unlock_irqrestore(&amd_lock, flags); - return amd_chipset.probe_result; +commit: + + spin_lock_irqsave(&amd_lock, flags); + if (amd_chipset.probe_count > 0) { + /* race - someone else was faster - drop devices */ + + /* Mark that we where here */ + amd_chipset.probe_count++; + ret = amd_chipset.probe_result; + + spin_unlock_irqrestore(&amd_lock, flags); + + if (info.nb_dev) + pci_dev_put(info.nb_dev); + if (info.smbus_dev) + pci_dev_put(info.smbus_dev); + + } else { + /* no race - commit the result */ + info.probe_count++; + amd_chipset = info; + spin_unlock_irqrestore(&amd_lock, flags); + } + + return ret; } EXPORT_SYMBOL_GPL(usb_amd_find_chipset_info); @@ -284,6 +311,7 @@ EXPORT_SYMBOL_GPL(usb_amd_quirk_pll_enable); void usb_amd_dev_put(void) { + struct pci_dev *nb, *smbus; unsigned long flags; spin_lock_irqsave(&amd_lock, flags); @@ -294,20 +322,23 @@ void usb_amd_dev_put(void) return; } - if (amd_chipset.nb_dev) { - pci_dev_put(amd_chipset.nb_dev); - amd_chipset.nb_dev = NULL; - } - if (amd_chipset.smbus_dev) { - pci_dev_put(amd_chipset.smbus_dev); - amd_chipset.smbus_dev = NULL; - } + /* save them to pci_dev_put outside of spinlock */ + nb = amd_chipset.nb_dev; + smbus = amd_chipset.smbus_dev; + + amd_chipset.nb_dev = NULL; + amd_chipset.smbus_dev = NULL; amd_chipset.nb_type = 0; amd_chipset.sb_type = 0; amd_chipset.isoc_reqs = 0; amd_chipset.probe_result = 0; spin_unlock_irqrestore(&amd_lock, flags); + + if (nb) + pci_dev_put(nb); + if (smbus) + pci_dev_put(smbus); } EXPORT_SYMBOL_GPL(usb_amd_dev_put); From 485707116b3949ab41e1aba8d74f45a7551e3a65 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 20 Mar 2011 14:09:50 +0300 Subject: [PATCH 093/200] usb: pch_udc: unlock on allocation failure There was an unlock missing on the error path. Also I did a small cleanup by changing ep->dev->lock for just dev->lock. They're the same lock, but dev->lock is shorter and that's how it is used for the spin_unlock_irqrestore() call. Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/pch_udc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/pch_udc.c b/drivers/usb/gadget/pch_udc.c index 3e4b35e50c24..68dbcc3e4cc2 100644 --- a/drivers/usb/gadget/pch_udc.c +++ b/drivers/usb/gadget/pch_udc.c @@ -1608,7 +1608,7 @@ static int pch_udc_pcd_queue(struct usb_ep *usbep, struct usb_request *usbreq, return -EINVAL; if (!dev->driver || (dev->gadget.speed == USB_SPEED_UNKNOWN)) return -ESHUTDOWN; - spin_lock_irqsave(&ep->dev->lock, iflags); + spin_lock_irqsave(&dev->lock, iflags); /* map the buffer for dma */ if (usbreq->length && ((usbreq->dma == DMA_ADDR_INVALID) || !usbreq->dma)) { @@ -1625,8 +1625,10 @@ static int pch_udc_pcd_queue(struct usb_ep *usbep, struct usb_request *usbreq, DMA_FROM_DEVICE); } else { req->buf = kzalloc(usbreq->length, GFP_ATOMIC); - if (!req->buf) - return -ENOMEM; + if (!req->buf) { + retval = -ENOMEM; + goto probe_end; + } if (ep->in) { memcpy(req->buf, usbreq->buf, usbreq->length); req->dma = dma_map_single(&dev->pdev->dev, From 80f9df3e0093ad9f1eeefd2ff7fd27daaa518d25 Mon Sep 17 00:00:00 2001 From: "Marius B. Kotsbak" Date: Tue, 22 Mar 2011 00:01:53 +0100 Subject: [PATCH 094/200] USB: option: Added support for Samsung GT-B3730/GT-B3710 LTE USB modem. Bind only modem AT command endpoint to option. Signed-off-by: Marius B. Kotsbak Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 75c7f456eed5..d77ff0435896 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -407,6 +407,10 @@ static void option_instat_callback(struct urb *urb); /* ONDA MT825UP HSDPA 14.2 modem */ #define ONDA_MT825UP 0x000b +/* Samsung products */ +#define SAMSUNG_VENDOR_ID 0x04e8 +#define SAMSUNG_PRODUCT_GT_B3730 0x6889 + /* some devices interfaces need special handling due to a number of reasons */ enum option_blacklist_reason { OPTION_BLACKLIST_NONE = 0, @@ -968,6 +972,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100) }, { USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */ { USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */ + { USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730/GT-B3710 LTE USB modem.*/ { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); From c53c2fab40cf16e13af66f40bfd27200cda98d2f Mon Sep 17 00:00:00 2001 From: Paul Friedrich Date: Fri, 18 Mar 2011 11:13:55 +0100 Subject: [PATCH 095/200] USB: ftdi_sio: add ids for Hameg HO720 and HO730 usb serial: ftdi_sio: add two missing USB ID's for Hameg interfaces HO720 and HO730 Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 2 ++ drivers/usb/serial/ftdi_sio_ids.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 51b780134f59..4de6ef0ae52a 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -790,6 +790,8 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, MARVELL_OPENRD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, HAMEG_HO820_PID) }, + { USB_DEVICE(FTDI_VID, HAMEG_HO720_PID) }, + { USB_DEVICE(FTDI_VID, HAMEG_HO730_PID) }, { USB_DEVICE(FTDI_VID, HAMEG_HO870_PID) }, { USB_DEVICE(FTDI_VID, MJSG_GENERIC_PID) }, { USB_DEVICE(FTDI_VID, MJSG_SR_RADIO_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 0bdba7a96293..efffc23723bd 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -300,6 +300,8 @@ * Hameg HO820 and HO870 interface (using VID 0x0403) */ #define HAMEG_HO820_PID 0xed74 +#define HAMEG_HO730_PID 0xed73 +#define HAMEG_HO720_PID 0xed72 #define HAMEG_HO870_PID 0xed71 /* From 10c9ab15d6aee153968d150c05b3ee3df89673de Mon Sep 17 00:00:00 2001 From: Steven Hardy Date: Mon, 4 Apr 2011 17:57:37 +0100 Subject: [PATCH 096/200] usb: Fix qcserial memory leak on rmmod qcprobe function allocates serial->private but this is never freed, this patch adds a new function qc_release() which frees serial->private, after calling usb_wwan_release Signed-off-by: Steven Hardy Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 8858201eb1d3..6e3b933457f4 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -205,6 +205,18 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) return retval; } +static void qc_release(struct usb_serial *serial) +{ + struct usb_wwan_intf_private *priv = usb_get_serial_data(serial); + + dbg("%s", __func__); + + /* Call usb_wwan release & free the private data allocated in qcprobe */ + usb_wwan_release(serial); + usb_set_serial_data(serial, NULL); + kfree(priv); +} + static struct usb_serial_driver qcdevice = { .driver = { .owner = THIS_MODULE, @@ -222,7 +234,7 @@ static struct usb_serial_driver qcdevice = { .chars_in_buffer = usb_wwan_chars_in_buffer, .attach = usb_wwan_startup, .disconnect = usb_wwan_disconnect, - .release = usb_wwan_release, + .release = qc_release, #ifdef CONFIG_PM .suspend = usb_wwan_suspend, .resume = usb_wwan_resume, From 99ab3f9e4eaec35fd2d7159c31b71f17f7e613e3 Mon Sep 17 00:00:00 2001 From: Steven Hardy Date: Mon, 4 Apr 2011 17:59:55 +0100 Subject: [PATCH 097/200] usb: qcserial avoid pointing to freed memory Rework the qcprobe logic such that serial->private is not set when qcprobe exits with -ENODEV, otherwise serial->private will point to freed memory on -ENODEV Signed-off-by: Steven Hardy Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 6e3b933457f4..cd638648479a 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -111,7 +111,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) ifnum = intf->desc.bInterfaceNumber; dbg("This Interface = %d", ifnum); - data = serial->private = kzalloc(sizeof(struct usb_wwan_intf_private), + data = kzalloc(sizeof(struct usb_wwan_intf_private), GFP_KERNEL); if (!data) return -ENOMEM; @@ -134,8 +134,10 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) usb_endpoint_is_bulk_out(&intf->endpoint[1].desc)) { dbg("QDL port found"); - if (serial->interface->num_altsetting == 1) - return 0; + if (serial->interface->num_altsetting == 1) { + retval = 0; /* Success */ + break; + } retval = usb_set_interface(serial->dev, ifnum, 1); if (retval < 0) { @@ -145,7 +147,6 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) retval = -ENODEV; kfree(data); } - return retval; } break; @@ -177,7 +178,6 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) retval = -ENODEV; kfree(data); } - return retval; } else if (ifnum==3) { /* * NMEA (serial line 9600 8N1) @@ -199,9 +199,12 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) dev_err(&serial->dev->dev, "unknown number of interfaces: %d\n", nintf); kfree(data); - return -ENODEV; + retval = -ENODEV; } + /* Set serial->private if not returning -ENODEV */ + if (retval != -ENODEV) + usb_set_serial_data(serial, data); return retval; } From cb62d65f966146a39fdde548cb474dacf1d00fa5 Mon Sep 17 00:00:00 2001 From: Steven Hardy Date: Mon, 4 Apr 2011 18:02:25 +0100 Subject: [PATCH 098/200] usb: qcserial add missing errorpath kfrees There are two -ENODEV error paths in qcprobe where the allocated private data is not freed, this patch adds the two missing kfrees to avoid leaking memory on the error path Signed-off-by: Steven Hardy Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index cd638648479a..54a9dab1f33b 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -167,6 +167,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) "Could not set interface, error %d\n", retval); retval = -ENODEV; + kfree(data); } } else if (ifnum == 2) { dbg("Modem port found"); @@ -191,6 +192,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) "Could not set interface, error %d\n", retval); retval = -ENODEV; + kfree(data); } } break; From d834508e159fe8936f9e7fd941b1e2fe9a209d4b Mon Sep 17 00:00:00 2001 From: Valentin Longchamp Date: Wed, 23 Mar 2011 17:47:00 +0100 Subject: [PATCH 099/200] USB: fsl_qe_udc: send ZLP when zero flag and length % maxpacket == 0 The driver did not take the zero flag in the USB request. If the request length is the same as the endpoint's maxpacket, an additional ZLP with no data has to be transmitted. The method used here is inspired to what is done in fsl_udc_core.c (and pxa27x_udc.c and at91_udc.c) where this is supported. There already was a discussion about this topic with people from Keymile, and I propose here a better implementation: http://thread.gmane.org/gmane.linux.usb.general/38951 Signed-off-by: Valentin Longchamp Acked-by: Li Yang Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/fsl_qe_udc.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c index aee7e3c53c38..36613b37c504 100644 --- a/drivers/usb/gadget/fsl_qe_udc.c +++ b/drivers/usb/gadget/fsl_qe_udc.c @@ -1148,6 +1148,12 @@ static int qe_ep_tx(struct qe_ep *ep, struct qe_frame *frame) static int txcomplete(struct qe_ep *ep, unsigned char restart) { if (ep->tx_req != NULL) { + struct qe_req *req = ep->tx_req; + unsigned zlp = 0, last_len = 0; + + last_len = min_t(unsigned, req->req.length - ep->sent, + ep->ep.maxpacket); + if (!restart) { int asent = ep->last; ep->sent += asent; @@ -1156,9 +1162,18 @@ static int txcomplete(struct qe_ep *ep, unsigned char restart) ep->last = 0; } + /* zlp needed when req->re.zero is set */ + if (req->req.zero) { + if (last_len == 0 || + (req->req.length % ep->ep.maxpacket) != 0) + zlp = 0; + else + zlp = 1; + } else + zlp = 0; + /* a request already were transmitted completely */ - if ((ep->tx_req->req.length - ep->sent) <= 0) { - ep->tx_req->req.actual = (unsigned int)ep->sent; + if (((ep->tx_req->req.length - ep->sent) <= 0) && !zlp) { done(ep, ep->tx_req, 0); ep->tx_req = NULL; ep->last = 0; @@ -1191,6 +1206,7 @@ static int qe_usb_senddata(struct qe_ep *ep, struct qe_frame *frame) buf = (u8 *)ep->tx_req->req.buf + ep->sent; if (buf && size) { ep->last = size; + ep->tx_req->req.actual += size; frame_set_data(frame, buf); frame_set_length(frame, size); frame_set_status(frame, FRAME_OK); From 5808544690300071f09eef9ab83a0fb1f60cf1cd Mon Sep 17 00:00:00 2001 From: Richard Retanubun Date: Thu, 17 Mar 2011 17:39:28 -0400 Subject: [PATCH 100/200] USB: isp1760-hcd: move imask clear after pending work is done This patch moves the HcInterrupt register write to clear the pending interrupt to after the isr work is done, doing this removes glitches in the irq line. Signed-off-by: Richard Retanubun Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/isp1760-hcd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c index 795345ad45e6..564b03337e7e 100644 --- a/drivers/usb/host/isp1760-hcd.c +++ b/drivers/usb/host/isp1760-hcd.c @@ -1676,13 +1676,15 @@ static irqreturn_t isp1760_irq(struct usb_hcd *hcd) if (unlikely(!imask)) goto leave; - reg_write32(hcd->regs, HC_INTERRUPT_REG, imask); if (imask & (HC_ATL_INT | HC_SOT_INT)) do_atl_int(hcd); if (imask & HC_INTL_INT) do_intl_int(hcd); + /* Clear interrupt mask on device after the work is done */ + reg_write32(hcd->regs, HC_INTERRUPT_REG, imask); + irqret = IRQ_HANDLED; leave: spin_unlock(&priv->lock); From 2868a2b1ba8f9c7f6c4170519ebb6c62934df70e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 18 Mar 2011 21:29:01 -0700 Subject: [PATCH 101/200] USB: fix formatting of SuperSpeed endpoints in /proc/bus/usb/devices Isochronous and interrupt SuperSpeed endpoints use the same mechanisms for decoding bInterval values as HighSpeed ones so adjust the code accordingly. Also bandwidth reservation for SuperSpeed matches highspeed, not low/full speed. Signed-off-by: Dmitry Torokhov Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devices.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c index a3d2e2399655..96fdfb815f89 100644 --- a/drivers/usb/core/devices.c +++ b/drivers/usb/core/devices.c @@ -221,7 +221,7 @@ static char *usb_dump_endpoint_descriptor(int speed, char *start, char *end, break; case USB_ENDPOINT_XFER_INT: type = "Int."; - if (speed == USB_SPEED_HIGH) + if (speed == USB_SPEED_HIGH || speed == USB_SPEED_SUPER) interval = 1 << (desc->bInterval - 1); else interval = desc->bInterval; @@ -229,7 +229,8 @@ static char *usb_dump_endpoint_descriptor(int speed, char *start, char *end, default: /* "can't happen" */ return start; } - interval *= (speed == USB_SPEED_HIGH) ? 125 : 1000; + interval *= (speed == USB_SPEED_HIGH || + speed == USB_SPEED_SUPER) ? 125 : 1000; if (interval % 1000) unit = 'u'; else { @@ -542,8 +543,9 @@ static ssize_t usb_device_dump(char __user **buffer, size_t *nbytes, if (level == 0) { int max; - /* high speed reserves 80%, full/low reserves 90% */ - if (usbdev->speed == USB_SPEED_HIGH) + /* super/high speed reserves 80%, full/low reserves 90% */ + if (usbdev->speed == USB_SPEED_HIGH || + usbdev->speed == USB_SPEED_SUPER) max = 800; else max = FRAME_TIME_MAX_USECS_ALLOC; From 5a6c2f3ff039154872ce597952f8b8900ea0d732 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 20 Mar 2011 02:15:17 -0700 Subject: [PATCH 102/200] USB: xhci - fix unsafe macro definitions Macro arguments used in expressions need to be enclosed in parenthesis to avoid unpleasant surprises. This should be queued for kernels back to 2.6.31 Signed-off-by: Dmitry Torokhov Signed-off-by: Sarah Sharp Cc: stable@kernel.org --- drivers/usb/host/xhci.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 07e263063e37..91b1a237c11e 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -232,7 +232,7 @@ struct xhci_op_regs { * notification type that matches a bit set in this bit field. */ #define DEV_NOTE_MASK (0xffff) -#define ENABLE_DEV_NOTE(x) (1 << x) +#define ENABLE_DEV_NOTE(x) (1 << (x)) /* Most of the device notification types should only be used for debug. * SW does need to pay attention to function wake notifications. */ @@ -601,11 +601,11 @@ struct xhci_ep_ctx { #define EP_STATE_STOPPED 3 #define EP_STATE_ERROR 4 /* Mult - Max number of burtst within an interval, in EP companion desc. */ -#define EP_MULT(p) ((p & 0x3) << 8) +#define EP_MULT(p) (((p) & 0x3) << 8) /* bits 10:14 are Max Primary Streams */ /* bit 15 is Linear Stream Array */ /* Interval - period between requests to an endpoint - 125u increments. */ -#define EP_INTERVAL(p) ((p & 0xff) << 16) +#define EP_INTERVAL(p) (((p) & 0xff) << 16) #define EP_INTERVAL_TO_UFRAMES(p) (1 << (((p) >> 16) & 0xff)) #define EP_MAXPSTREAMS_MASK (0x1f << 10) #define EP_MAXPSTREAMS(p) (((p) << 10) & EP_MAXPSTREAMS_MASK) From 22e0487047567252d5677ff35766cd884375efc2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 17 Mar 2011 22:39:49 +0300 Subject: [PATCH 103/200] USB: xhci: unsigned char never equals -1 There were some places that compared port_speed == -1 where port_speed is a u8. This doesn't work unless we cast the -1 to u8. Some places did it correctly. Instead of using -1 directly, I've created a DUPLICATE_ENTRY define which does the cast and is more descriptive as well. Signed-off-by: Dan Carpenter Signed-off-by: Sarah Sharp --- drivers/usb/host/xhci-mem.c | 8 ++++---- drivers/usb/host/xhci-ring.c | 4 ++-- drivers/usb/host/xhci.h | 3 +++ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index a003e79aacdc..ab7fc2b71a8c 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -846,7 +846,7 @@ static u32 xhci_find_real_port_number(struct xhci_hcd *xhci, * Skip ports that don't have known speeds, or have duplicate * Extended Capabilities port speed entries. */ - if (port_speed == 0 || port_speed == -1) + if (port_speed == 0 || port_speed == DUPLICATE_ENTRY) continue; /* @@ -1727,12 +1727,12 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, * found a similar duplicate. */ if (xhci->port_array[i] != major_revision && - xhci->port_array[i] != (u8) -1) { + xhci->port_array[i] != DUPLICATE_ENTRY) { if (xhci->port_array[i] == 0x03) xhci->num_usb3_ports--; else xhci->num_usb2_ports--; - xhci->port_array[i] = (u8) -1; + xhci->port_array[i] = DUPLICATE_ENTRY; } /* FIXME: Should we disable the port? */ continue; @@ -1831,7 +1831,7 @@ static int xhci_setup_port_arrays(struct xhci_hcd *xhci, gfp_t flags) for (i = 0; i < num_ports; i++) { if (xhci->port_array[i] == 0x03 || xhci->port_array[i] == 0 || - xhci->port_array[i] == -1) + xhci->port_array[i] == DUPLICATE_ENTRY) continue; xhci->usb2_ports[port_index] = diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index cfc1ad92473f..c6d1462aa1c3 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1209,7 +1209,7 @@ static unsigned int find_faked_portnum_from_hw_portnum(struct usb_hcd *hcd, * Skip ports that don't have known speeds, or have duplicate * Extended Capabilities port speed entries. */ - if (port_speed == 0 || port_speed == -1) + if (port_speed == 0 || port_speed == DUPLICATE_ENTRY) continue; /* @@ -1260,7 +1260,7 @@ static void handle_port_status(struct xhci_hcd *xhci, port_id); goto cleanup; } - if (major_revision == (u8) -1) { + if (major_revision == DUPLICATE_ENTRY) { xhci_warn(xhci, "Event for port %u duplicated in" "Extended Capabilities, ignoring.\n", port_id); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 91b1a237c11e..bdb78f51735e 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -348,6 +348,9 @@ struct xhci_op_regs { /* Initiate a warm port reset - complete when PORT_WRC is '1' */ #define PORT_WR (1 << 31) +/* We mark duplicate entries with -1 */ +#define DUPLICATE_ENTRY ((u8)(-1)) + /* Port Power Management Status and Control - port_power_base bitmasks */ /* Inactivity timer value for transitions into U1, in microseconds. * Timeout can be up to 127us. 0xFF means an infinite timeout. From 575688e1e5f462c44ddd608ce3ec9f38b64c3c0d Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 20 Mar 2011 02:15:16 -0700 Subject: [PATCH 104/200] USB: xhci - remove excessive 'inline' markings Remove 'inline' markings from file-local functions and let compiler do its job and inline what makes sense for given architecture. Signed-off-by: Dmitry Torokhov Signed-off-by: Sarah Sharp --- drivers/usb/host/xhci-mem.c | 8 ++++---- drivers/usb/host/xhci-ring.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index ab7fc2b71a8c..7f56c9b80692 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -982,7 +982,7 @@ int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *ud * The NAK interval is one NAK per 1 to 255 microframes, or no NAKs if interval * is set to 0. */ -static inline unsigned int xhci_get_endpoint_interval(struct usb_device *udev, +static unsigned int xhci_get_endpoint_interval(struct usb_device *udev, struct usb_host_endpoint *ep) { unsigned int interval = 0; @@ -1041,7 +1041,7 @@ static inline unsigned int xhci_get_endpoint_interval(struct usb_device *udev, * transaction opportunities per microframe", but that goes in the Max Burst * endpoint context field. */ -static inline u32 xhci_get_endpoint_mult(struct usb_device *udev, +static u32 xhci_get_endpoint_mult(struct usb_device *udev, struct usb_host_endpoint *ep) { if (udev->speed != USB_SPEED_SUPER || @@ -1050,7 +1050,7 @@ static inline u32 xhci_get_endpoint_mult(struct usb_device *udev, return ep->ss_ep_comp.bmAttributes; } -static inline u32 xhci_get_endpoint_type(struct usb_device *udev, +static u32 xhci_get_endpoint_type(struct usb_device *udev, struct usb_host_endpoint *ep) { int in; @@ -1084,7 +1084,7 @@ static inline u32 xhci_get_endpoint_type(struct usb_device *udev, * Basically, this is the maxpacket size, multiplied by the burst size * and mult size. */ -static inline u32 xhci_get_max_esit_payload(struct xhci_hcd *xhci, +static u32 xhci_get_max_esit_payload(struct xhci_hcd *xhci, struct usb_device *udev, struct usb_host_endpoint *ep) { diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index c6d1462aa1c3..0161eb053225 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -93,7 +93,7 @@ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg, /* Does this link TRB point to the first segment in a ring, * or was the previous TRB the last TRB on the last segment in the ERST? */ -static inline bool last_trb_on_last_seg(struct xhci_hcd *xhci, struct xhci_ring *ring, +static bool last_trb_on_last_seg(struct xhci_hcd *xhci, struct xhci_ring *ring, struct xhci_segment *seg, union xhci_trb *trb) { if (ring == xhci->event_ring) @@ -107,7 +107,7 @@ static inline bool last_trb_on_last_seg(struct xhci_hcd *xhci, struct xhci_ring * segment? I.e. would the updated event TRB pointer step off the end of the * event seg? */ -static inline int last_trb(struct xhci_hcd *xhci, struct xhci_ring *ring, +static int last_trb(struct xhci_hcd *xhci, struct xhci_ring *ring, struct xhci_segment *seg, union xhci_trb *trb) { if (ring == xhci->event_ring) @@ -116,7 +116,7 @@ static inline int last_trb(struct xhci_hcd *xhci, struct xhci_ring *ring, return (trb->link.control & TRB_TYPE_BITMASK) == TRB_TYPE(TRB_LINK); } -static inline int enqueue_is_link_trb(struct xhci_ring *ring) +static int enqueue_is_link_trb(struct xhci_ring *ring) { struct xhci_link_trb *link = &ring->enqueue->link; return ((link->control & TRB_TYPE_BITMASK) == TRB_TYPE(TRB_LINK)); @@ -592,7 +592,7 @@ void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci, ep->ep_state |= SET_DEQ_PENDING; } -static inline void xhci_stop_watchdog_timer_in_irq(struct xhci_hcd *xhci, +static void xhci_stop_watchdog_timer_in_irq(struct xhci_hcd *xhci, struct xhci_virt_ep *ep) { ep->ep_state &= ~EP_HALT_PENDING; From 926008c9386dde09b015753b6681c502177baa30 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 23 Mar 2011 20:47:05 -0700 Subject: [PATCH 105/200] USB: xhci: simplify logic of skipping missed isoc TDs The logic of the handling Missed Service Error Events was pretty confusing as we were checking the same condition several times. In addition, it caused compiler warning since the compiler could not figure out that event_trb is actually unused in case we are skipping current TD. Fix that by rearranging "skip" condition checks, and factor out skip_isoc_td() so that it is called explicitly. Signed-off-by: Dmitry Torokhov Signed-off-by: Sarah Sharp --- drivers/usb/host/xhci-ring.c | 172 +++++++++++++++++++---------------- 1 file changed, 94 insertions(+), 78 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 0161eb053225..b69a0a136e66 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1675,71 +1675,52 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, struct urb_priv *urb_priv; int idx; int len = 0; - int skip_td = 0; union xhci_trb *cur_trb; struct xhci_segment *cur_seg; + struct usb_iso_packet_descriptor *frame; u32 trb_comp_code; + bool skip_td = false; ep_ring = xhci_dma_to_transfer_ring(ep, event->buffer); trb_comp_code = GET_COMP_CODE(event->transfer_len); urb_priv = td->urb->hcpriv; idx = urb_priv->td_cnt; + frame = &td->urb->iso_frame_desc[idx]; - if (ep->skip) { - /* The transfer is partly done */ - *status = -EXDEV; - td->urb->iso_frame_desc[idx].status = -EXDEV; - } else { - /* handle completion code */ - switch (trb_comp_code) { - case COMP_SUCCESS: - td->urb->iso_frame_desc[idx].status = 0; - xhci_dbg(xhci, "Successful isoc transfer!\n"); - break; - case COMP_SHORT_TX: - if (td->urb->transfer_flags & URB_SHORT_NOT_OK) - td->urb->iso_frame_desc[idx].status = - -EREMOTEIO; - else - td->urb->iso_frame_desc[idx].status = 0; - break; - case COMP_BW_OVER: - td->urb->iso_frame_desc[idx].status = -ECOMM; - skip_td = 1; - break; - case COMP_BUFF_OVER: - case COMP_BABBLE: - td->urb->iso_frame_desc[idx].status = -EOVERFLOW; - skip_td = 1; - break; - case COMP_STALL: - td->urb->iso_frame_desc[idx].status = -EPROTO; - skip_td = 1; - break; - case COMP_STOP: - case COMP_STOP_INVAL: - break; - default: - td->urb->iso_frame_desc[idx].status = -1; - break; - } + /* handle completion code */ + switch (trb_comp_code) { + case COMP_SUCCESS: + frame->status = 0; + xhci_dbg(xhci, "Successful isoc transfer!\n"); + break; + case COMP_SHORT_TX: + frame->status = td->urb->transfer_flags & URB_SHORT_NOT_OK ? + -EREMOTEIO : 0; + break; + case COMP_BW_OVER: + frame->status = -ECOMM; + skip_td = true; + break; + case COMP_BUFF_OVER: + case COMP_BABBLE: + frame->status = -EOVERFLOW; + skip_td = true; + break; + case COMP_STALL: + frame->status = -EPROTO; + skip_td = true; + break; + case COMP_STOP: + case COMP_STOP_INVAL: + break; + default: + frame->status = -1; + break; } - /* calc actual length */ - if (ep->skip) { - td->urb->iso_frame_desc[idx].actual_length = 0; - /* Update ring dequeue pointer */ - while (ep_ring->dequeue != td->last_trb) - inc_deq(xhci, ep_ring, false); - inc_deq(xhci, ep_ring, false); - return finish_td(xhci, td, event_trb, event, ep, status, true); - } - - if (trb_comp_code == COMP_SUCCESS || skip_td == 1) { - td->urb->iso_frame_desc[idx].actual_length = - td->urb->iso_frame_desc[idx].length; - td->urb->actual_length += - td->urb->iso_frame_desc[idx].length; + if (trb_comp_code == COMP_SUCCESS || skip_td) { + frame->actual_length = frame->length; + td->urb->actual_length += frame->length; } else { for (cur_trb = ep_ring->dequeue, cur_seg = ep_ring->deq_seg; cur_trb != event_trb; @@ -1755,7 +1736,7 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, TRB_LEN(event->transfer_len); if (trb_comp_code != COMP_STOP_INVAL) { - td->urb->iso_frame_desc[idx].actual_length = len; + frame->actual_length = len; td->urb->actual_length += len; } } @@ -1766,6 +1747,35 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, return finish_td(xhci, td, event_trb, event, ep, status, false); } +static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, + struct xhci_transfer_event *event, + struct xhci_virt_ep *ep, int *status) +{ + struct xhci_ring *ep_ring; + struct urb_priv *urb_priv; + struct usb_iso_packet_descriptor *frame; + int idx; + + ep_ring = xhci_dma_to_transfer_ring(ep, event->buffer); + urb_priv = td->urb->hcpriv; + idx = urb_priv->td_cnt; + frame = &td->urb->iso_frame_desc[idx]; + + /* The transfer is partly done */ + *status = -EXDEV; + frame->status = -EXDEV; + + /* calc actual length */ + frame->actual_length = 0; + + /* Update ring dequeue pointer */ + while (ep_ring->dequeue != td->last_trb) + inc_deq(xhci, ep_ring, false); + inc_deq(xhci, ep_ring, false); + + return finish_td(xhci, td, NULL, event, ep, status, true); +} + /* * Process bulk and interrupt tds, update urb status and actual_length. */ @@ -2024,36 +2034,42 @@ static int handle_tx_event(struct xhci_hcd *xhci, } td = list_entry(ep_ring->td_list.next, struct xhci_td, td_list); + /* Is this a TRB in the currently executing TD? */ event_seg = trb_in_td(ep_ring->deq_seg, ep_ring->dequeue, td->last_trb, event_dma); - if (event_seg && ep->skip) { + if (!event_seg) { + if (!ep->skip || + !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { + /* HC is busted, give up! */ + xhci_err(xhci, + "ERROR Transfer event TRB DMA ptr not " + "part of current TD\n"); + return -ESHUTDOWN; + } + + ret = skip_isoc_td(xhci, td, event, ep, &status); + goto cleanup; + } + + if (ep->skip) { xhci_dbg(xhci, "Found td. Clear skip flag.\n"); ep->skip = false; } - if (!event_seg && - (!ep->skip || !usb_endpoint_xfer_isoc(&td->urb->ep->desc))) { - /* HC is busted, give up! */ - xhci_err(xhci, "ERROR Transfer event TRB DMA ptr not " - "part of current TD\n"); - return -ESHUTDOWN; - } - if (event_seg) { - event_trb = &event_seg->trbs[(event_dma - - event_seg->dma) / sizeof(*event_trb)]; - /* - * No-op TRB should not trigger interrupts. - * If event_trb is a no-op TRB, it means the - * corresponding TD has been cancelled. Just ignore - * the TD. - */ - if ((event_trb->generic.field[3] & TRB_TYPE_BITMASK) - == TRB_TYPE(TRB_TR_NOOP)) { - xhci_dbg(xhci, "event_trb is a no-op TRB. " - "Skip it\n"); - goto cleanup; - } + event_trb = &event_seg->trbs[(event_dma - event_seg->dma) / + sizeof(*event_trb)]; + /* + * No-op TRB should not trigger interrupts. + * If event_trb is a no-op TRB, it means the + * corresponding TD has been cancelled. Just ignore + * the TD. + */ + if ((event_trb->generic.field[3] & TRB_TYPE_BITMASK) + == TRB_TYPE(TRB_TR_NOOP)) { + xhci_dbg(xhci, + "event_trb is a no-op TRB. Skip it\n"); + goto cleanup; } /* Now update the urb's actual_length and give back to From dfa49c4ad120a784ef1ff0717168aa79f55a483a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 23 Mar 2011 22:41:23 -0700 Subject: [PATCH 106/200] USB: xhci - fix math in xhci_get_endpoint_interval() When parsing exponent-expressed intervals we subtract 1 from the value and then expect it to match with original + 1, which is highly unlikely, and we end with frequent spew: usb 3-4: ep 0x83 - rounding interval to 512 microframes Also, parsing interval for fullspeed isochronous endpoints was incorrect - according to USB spec they use exponent-based intervals (but xHCI spec claims frame-based intervals). I trust USB spec more, especially since USB core agrees with it. This should be queued for stable kernels back to 2.6.31. Reviewed-by: Micah Elizabeth Scott Signed-off-by: Dmitry Torokhov Signed-off-by: Sarah Sharp Cc: stable@kernel.org --- drivers/usb/host/xhci-mem.c | 90 +++++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 28 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 7f56c9b80692..627f3438028c 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -974,6 +974,47 @@ int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *ud return 0; } +/* + * Convert interval expressed as 2^(bInterval - 1) == interval into + * straight exponent value 2^n == interval. + * + */ +static unsigned int xhci_parse_exponent_interval(struct usb_device *udev, + struct usb_host_endpoint *ep) +{ + unsigned int interval; + + interval = clamp_val(ep->desc.bInterval, 1, 16) - 1; + if (interval != ep->desc.bInterval - 1) + dev_warn(&udev->dev, + "ep %#x - rounding interval to %d microframes\n", + ep->desc.bEndpointAddress, + 1 << interval); + + return interval; +} + +/* + * Convert bInterval expressed in frames (in 1-255 range) to exponent of + * microframes, rounded down to nearest power of 2. + */ +static unsigned int xhci_parse_frame_interval(struct usb_device *udev, + struct usb_host_endpoint *ep) +{ + unsigned int interval; + + interval = fls(8 * ep->desc.bInterval) - 1; + interval = clamp_val(interval, 3, 10); + if ((1 << interval) != 8 * ep->desc.bInterval) + dev_warn(&udev->dev, + "ep %#x - rounding interval to %d microframes, ep desc says %d microframes\n", + ep->desc.bEndpointAddress, + 1 << interval, + 8 * ep->desc.bInterval); + + return interval; +} + /* Return the polling or NAK interval. * * The polling interval is expressed in "microframes". If xHCI's Interval field @@ -991,45 +1032,38 @@ static unsigned int xhci_get_endpoint_interval(struct usb_device *udev, case USB_SPEED_HIGH: /* Max NAK rate */ if (usb_endpoint_xfer_control(&ep->desc) || - usb_endpoint_xfer_bulk(&ep->desc)) + usb_endpoint_xfer_bulk(&ep->desc)) { interval = ep->desc.bInterval; + break; + } /* Fall through - SS and HS isoc/int have same decoding */ + case USB_SPEED_SUPER: if (usb_endpoint_xfer_int(&ep->desc) || - usb_endpoint_xfer_isoc(&ep->desc)) { - if (ep->desc.bInterval == 0) - interval = 0; - else - interval = ep->desc.bInterval - 1; - if (interval > 15) - interval = 15; - if (interval != ep->desc.bInterval + 1) - dev_warn(&udev->dev, "ep %#x - rounding interval to %d microframes\n", - ep->desc.bEndpointAddress, 1 << interval); + usb_endpoint_xfer_isoc(&ep->desc)) { + interval = xhci_parse_exponent_interval(udev, ep); } break; - /* Convert bInterval (in 1-255 frames) to microframes and round down to - * nearest power of 2. - */ + case USB_SPEED_FULL: + if (usb_endpoint_xfer_int(&ep->desc)) { + interval = xhci_parse_exponent_interval(udev, ep); + break; + } + /* + * Fall through for isochronous endpoint interval decoding + * since it uses the same rules as low speed interrupt + * endpoints. + */ + case USB_SPEED_LOW: if (usb_endpoint_xfer_int(&ep->desc) || - usb_endpoint_xfer_isoc(&ep->desc)) { - interval = fls(8*ep->desc.bInterval) - 1; - if (interval > 10) - interval = 10; - if (interval < 3) - interval = 3; - if ((1 << interval) != 8*ep->desc.bInterval) - dev_warn(&udev->dev, - "ep %#x - rounding interval" - " to %d microframes, " - "ep desc says %d microframes\n", - ep->desc.bEndpointAddress, - 1 << interval, - 8*ep->desc.bInterval); + usb_endpoint_xfer_isoc(&ep->desc)) { + + interval = xhci_parse_frame_interval(udev, ep); } break; + default: BUG(); } From 386139d7c8f22d4983ca89de35d339cc41bb0996 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Thu, 24 Mar 2011 08:02:58 -0700 Subject: [PATCH 107/200] xhci: Fix NULL pointer deref in handle_port_status() When we get a port status change event, we need to figure out what type of port it came from: a USB 3.0 port, or a USB 2.0/1.1 port. We can't know which usb_hcd to use until that point, so hcd will be NULL for part of the function. Unfortunately, if any of the sanity checks fail, we'll jump to the cleanup label before hcd is set to a valid pointer, and then we'll attempt to tell the USB core to kick the hcd, which is NULL. Skip kicking the roothub if the sanity checks fail. Signed-off-by: Sarah Sharp --- drivers/usb/host/xhci-ring.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b69a0a136e66..b0b4cc3b8584 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1235,6 +1235,7 @@ static void handle_port_status(struct xhci_hcd *xhci, u8 major_revision; struct xhci_bus_state *bus_state; u32 __iomem **port_array; + bool bogus_port_status = false; /* Port status change events always have a successful completion code */ if (GET_COMP_CODE(event->generic.field[2]) != COMP_SUCCESS) { @@ -1247,6 +1248,7 @@ static void handle_port_status(struct xhci_hcd *xhci, max_ports = HCS_MAX_PORTS(xhci->hcs_params1); if ((port_id <= 0) || (port_id > max_ports)) { xhci_warn(xhci, "Invalid port id %d\n", port_id); + bogus_port_status = true; goto cleanup; } @@ -1258,12 +1260,14 @@ static void handle_port_status(struct xhci_hcd *xhci, xhci_warn(xhci, "Event for port %u not in " "Extended Capabilities, ignoring.\n", port_id); + bogus_port_status = true; goto cleanup; } if (major_revision == DUPLICATE_ENTRY) { xhci_warn(xhci, "Event for port %u duplicated in" "Extended Capabilities, ignoring.\n", port_id); + bogus_port_status = true; goto cleanup; } @@ -1335,6 +1339,13 @@ cleanup: /* Update event ring dequeue pointer before dropping the lock */ inc_deq(xhci, xhci->event_ring, true); + /* Don't make the USB core poll the roothub if we got a bad port status + * change event. Besides, at that point we can't tell which roothub + * (USB 2.0 or USB 3.0) to kick. + */ + if (bogus_port_status) + return; + spin_unlock(&xhci->lock); /* Pass this up to the core */ usb_hcd_poll_rh_status(hcd); From 943aee0c685d0563228d5a2ad9c8394ad0300fb5 Mon Sep 17 00:00:00 2001 From: Graf Yang Date: Thu, 7 Jan 2010 06:57:30 +0000 Subject: [PATCH 108/200] Blackfin: SMP: make all barriers handle cache issues When suspending/resuming, the common task freezing code will run in parallel and freeze processes on each core. This is because the code uses the non-smp version of memory barriers (as well it should). The Blackfin smp barrier logic at the moment contains the cache sync logic, but the non-smp barriers do not. This is incorrect as Rafel summarized: > ... > The existing memory barriers are SMP barriers too, but they are more > than _just_ SMP barriers. At least that's how it is _supposed_ to be > (eg. rmb() is supposed to be stronger than smp_rmb()). > ... > However, looking at the blackfin's definitions of SMP barriers I see > that it uses extra stuff that should _also_ be used in the definitions > of the mandatory barriers. > ... URL: http://lkml.org/lkml/2011/4/13/11 LKML-Reference: Signed-off-by: Graf Yang Signed-off-by: Mike Frysinger --- arch/blackfin/include/asm/system.h | 36 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/blackfin/include/asm/system.h b/arch/blackfin/include/asm/system.h index 19e2c7c3e63a..44bd0cced725 100644 --- a/arch/blackfin/include/asm/system.h +++ b/arch/blackfin/include/asm/system.h @@ -19,11 +19,11 @@ * Force strict CPU ordering. */ #define nop() __asm__ __volatile__ ("nop;\n\t" : : ) -#define mb() __asm__ __volatile__ ("" : : : "memory") -#define rmb() __asm__ __volatile__ ("" : : : "memory") -#define wmb() __asm__ __volatile__ ("" : : : "memory") -#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) -#define read_barrier_depends() do { } while(0) +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#define set_mb(var, value) do { var = value; mb(); } while (0) +#define smp_read_barrier_depends() read_barrier_depends() #ifdef CONFIG_SMP asmlinkage unsigned long __raw_xchg_1_asm(volatile void *ptr, unsigned long value); @@ -37,16 +37,16 @@ asmlinkage unsigned long __raw_cmpxchg_4_asm(volatile void *ptr, unsigned long new, unsigned long old); #ifdef __ARCH_SYNC_CORE_DCACHE -# define smp_mb() do { barrier(); smp_check_barrier(); smp_mark_barrier(); } while (0) -# define smp_rmb() do { barrier(); smp_check_barrier(); } while (0) -# define smp_wmb() do { barrier(); smp_mark_barrier(); } while (0) -#define smp_read_barrier_depends() do { barrier(); smp_check_barrier(); } while (0) - +/* Force Core data cache coherence */ +# define mb() do { barrier(); smp_check_barrier(); smp_mark_barrier(); } while (0) +# define rmb() do { barrier(); smp_check_barrier(); } while (0) +# define wmb() do { barrier(); smp_mark_barrier(); } while (0) +# define read_barrier_depends() do { barrier(); smp_check_barrier(); } while (0) #else -# define smp_mb() barrier() -# define smp_rmb() barrier() -# define smp_wmb() barrier() -#define smp_read_barrier_depends() barrier() +# define mb() barrier() +# define rmb() barrier() +# define wmb() barrier() +# define read_barrier_depends() do { } while (0) #endif static inline unsigned long __xchg(unsigned long x, volatile void *ptr, @@ -99,10 +99,10 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, #else /* !CONFIG_SMP */ -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while(0) +#define mb() barrier() +#define rmb() barrier() +#define wmb() barrier() +#define read_barrier_depends() do { } while (0) struct __xchg_dummy { unsigned long a[100]; From ce24ee468aabb7c499b910aa3c4ab3cb338326ed Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 4 Apr 2011 15:20:50 +0000 Subject: [PATCH 109/200] Blackfin: gptimers: fix thinko when disabling timers We only want to clear the run bit for this one timer, not all status bits. So don't read the whole reg and then write all the bits back out. Reported-by: Isabelle Leonardi Signed-off-by: Mike Frysinger --- arch/blackfin/kernel/gptimers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/blackfin/kernel/gptimers.c b/arch/blackfin/kernel/gptimers.c index cdbe075de1dc..8b81dc04488a 100644 --- a/arch/blackfin/kernel/gptimers.c +++ b/arch/blackfin/kernel/gptimers.c @@ -268,7 +268,7 @@ void disable_gptimers(uint16_t mask) _disable_gptimers(mask); for (i = 0; i < MAX_BLACKFIN_GPTIMERS; ++i) if (mask & (1 << i)) - group_regs[BFIN_TIMER_OCTET(i)]->status |= trun_mask[i]; + group_regs[BFIN_TIMER_OCTET(i)]->status = trun_mask[i]; SSYNC(); } EXPORT_SYMBOL(disable_gptimers); From 0bf02ce605b8780223b10739ab7c533de9eb10cc Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 4 Apr 2011 15:26:11 +0000 Subject: [PATCH 110/200] Blackfin: time-ts: ack gptimer sooner to avoid missing short ints If the period of a gptimer is fairly low, we might miss an interrupt by acking it too late (we end up acking the new int as well). Reported-by: Isabelle Leonardi Signed-off-by: Mike Frysinger --- arch/blackfin/kernel/time-ts.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c index 8c9a43daf80f..cdb4beb6bc8f 100644 --- a/arch/blackfin/kernel/time-ts.c +++ b/arch/blackfin/kernel/time-ts.c @@ -206,8 +206,14 @@ irqreturn_t bfin_gptmr0_interrupt(int irq, void *dev_id) { struct clock_event_device *evt = dev_id; smp_mb(); - evt->event_handler(evt); + /* + * We want to ACK before we handle so that we can handle smaller timer + * intervals. This way if the timer expires again while we're handling + * things, we're more likely to see that 2nd int rather than swallowing + * it by ACKing the int at the end of this handler. + */ bfin_gptmr0_ack(); + evt->event_handler(evt); return IRQ_HANDLED; } From 8d50de9ee77b38a239dc5b1d6a63ad92a78f119d Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Tue, 12 Apr 2011 08:16:04 +0000 Subject: [PATCH 111/200] Blackfin: SMP: fix cache flush loop The recent commit (10774912647781) wasn't entirely correct. While it fixed some issues, it introduced others. So pull in the fixes from the public cache flush functions, and document why we need to call things directly ourselves. Signed-off-by: Sonic Zhang Signed-off-by: Mike Frysinger --- arch/blackfin/mach-common/smp.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index 6e17a265c4d3..8bce5ed031e4 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c @@ -109,10 +109,23 @@ static void ipi_flush_icache(void *info) struct blackfin_flush_data *fdata = info; /* Invalidate the memory holding the bounds of the flushed region. */ - invalidate_dcache_range((unsigned long)fdata, - (unsigned long)fdata + sizeof(*fdata)); + blackfin_dcache_invalidate_range((unsigned long)fdata, + (unsigned long)fdata + sizeof(*fdata)); - flush_icache_range(fdata->start, fdata->end); + /* Make sure all write buffers in the data side of the core + * are flushed before trying to invalidate the icache. This + * needs to be after the data flush and before the icache + * flush so that the SSYNC does the right thing in preventing + * the instruction prefetcher from hitting things in cached + * memory at the wrong time -- it runs much further ahead than + * the pipeline. + */ + SSYNC(); + + /* ipi_flaush_icache is invoked by generic flush_icache_range, + * so call blackfin arch icache flush directly here. + */ + blackfin_icache_flush_range(fdata->start, fdata->end); } static void ipi_call_function(unsigned int cpu, struct ipi_message *msg) From 2dea75d96ade3c7cd2bfe73f99c7b3291dc3d03a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 12 Apr 2011 23:06:28 -0700 Subject: [PATCH 112/200] USB: xhci - also free streams when resetting devices Currently, when resetting a device, xHCI driver disables all but one endpoints and frees their rings, but leaves alone any streams that might have been allocated. Later, when users try to free allocated streams, we oops in xhci_setup_no_streams_ep_input_ctx() because ep->ring is NULL. Let's free not only rings but also stream data as well, so that calling free_streams() on a device that was reset will be safe. This should be queued for stable trees back to 2.6.35. Reviewed-by: Micah Elizabeth Scott Signed-off-by: Dmitry Torokhov Signed-off-by: Sarah Sharp Cc: stable@kernel.org --- drivers/usb/host/xhci.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 196e0181b2ed..48706c0d6577 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -2386,10 +2386,18 @@ int xhci_discover_or_reset_device(struct usb_hcd *hcd, struct usb_device *udev) /* Everything but endpoint 0 is disabled, so free or cache the rings. */ last_freed_endpoint = 1; for (i = 1; i < 31; ++i) { - if (!virt_dev->eps[i].ring) - continue; - xhci_free_or_cache_endpoint_ring(xhci, virt_dev, i); - last_freed_endpoint = i; + struct xhci_virt_ep *ep = &virt_dev->eps[i]; + + if (ep->ep_state & EP_HAS_STREAMS) { + xhci_free_stream_info(xhci, ep->stream_info); + ep->stream_info = NULL; + ep->ep_state &= ~EP_HAS_STREAMS; + } + + if (ep->ring) { + xhci_free_or_cache_endpoint_ring(xhci, virt_dev, i); + last_freed_endpoint = i; + } } xhci_dbg(xhci, "Output context after successful reset device cmd:\n"); xhci_dbg_ctx(xhci, virt_dev->out_ctx, last_freed_endpoint); From b214f191d95ba4b5a35aebd69cd129cf7e3b1884 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 28 Sep 2010 00:57:32 -0400 Subject: [PATCH 113/200] USB: Fix unplug of device with active streams If I unplug a device while the UAS driver is loaded, I get an oops in usb_free_streams(). This is because usb_unbind_interface() calls usb_disable_interface() which calls usb_disable_endpoint() which sets ep_out and ep_in to NULL. Then the UAS driver calls usb_pipe_endpoint() which returns a NULL pointer and passes an array of NULL pointers to usb_free_streams(). I think the correct fix for this is to check for the NULL pointer in usb_free_streams() rather than making the driver check for this situation. My original patch for this checked for dev->state == USB_STATE_NOTATTACHED, but the call to usb_disable_interface() is conditional, so not all drivers would want this check. Note from Sarah Sharp: This patch does avoid a potential dereference, but the real fix (which will be implemented later) is to set the .soft_unbind flag in the usb_driver structure for the UAS driver, and all drivers that allocate streams. The driver should free any streams when it is unbound from the interface. This avoids leaking stream rings in the xHCI driver when usb_disable_interface() is called. This should be queued for stable trees back to 2.6.35. Signed-off-by: Matthew Wilcox Signed-off-by: Sarah Sharp Cc: stable@kernel.org --- drivers/usb/core/hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 8eed05d23838..77a7faec8d78 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1908,7 +1908,7 @@ void usb_free_streams(struct usb_interface *interface, /* Streams only apply to bulk endpoints. */ for (i = 0; i < num_eps; i++) - if (!usb_endpoint_xfer_bulk(&eps[i]->desc)) + if (!eps[i] || !usb_endpoint_xfer_bulk(&eps[i]->desc)) return; hcd->driver->free_streams(hcd, dev, eps, num_eps, mem_flags); From a8f08d86dbf1b7bb5869cf1807d2fd40ec9d6d0a Mon Sep 17 00:00:00 2001 From: Andiry Xu Date: Thu, 31 Mar 2011 14:56:50 +0800 Subject: [PATCH 114/200] usbcore: Bug fix: system can't suspend with USB3.0 device connected to USB3.0 hub This patch clear PORT_POWER when suspend a USB3.0 device behind a USB3.0 external hub, so the system can suspend and resume. Note USB3.0 device may not work after system resume and this is a temporary workaround. The correct fix will be in future patches. Signed-off-by: Andiry Xu Signed-off-by: Sarah Sharp --- drivers/usb/core/hub.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 8fb754916c67..93720bdc9efd 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2285,7 +2285,17 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) } /* see 7.1.7.6 */ - status = set_port_feature(hub->hdev, port1, USB_PORT_FEAT_SUSPEND); + /* Clear PORT_POWER if it's a USB3.0 device connected to USB 3.0 + * external hub. + * FIXME: this is a temporary workaround to make the system able + * to suspend/resume. + */ + if ((hub->hdev->parent != NULL) && hub_is_superspeed(hub->hdev)) + status = clear_port_feature(hub->hdev, port1, + USB_PORT_FEAT_POWER); + else + status = set_port_feature(hub->hdev, port1, + USB_PORT_FEAT_SUSPEND); if (status) { dev_dbg(hub->intfdev, "can't suspend port %d, status %d\n", port1, status); From fedd383e33f9ba9b91626f72c593ea327403bf59 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Tue, 12 Apr 2011 17:43:19 -0700 Subject: [PATCH 115/200] xhci: Tell USB core both roothubs lost power. On a resume, when the power is lost during hibernate, the USB core will call hub_reset_resume for the xHCI USB 2.0 roothub, but not for the USB 3.0 roothub: [ 164.748310] usb usb1: root hub lost power or was reset [ 164.748353] usb usb2: root hub lost power or was reset [ 164.748487] usb usb3: root hub lost power or was reset [ 164.748488] xhci_hcd 0000:01:00.0: Stop HCD ... [ 164.870039] hub 4-0:1.0: hub_resume ... [ 164.870054] hub 3-0:1.0: hub_reset_resume This causes issues later, because the USB core assumes the USB 3.0 hub attached to the USB 3.0 roothub is still active. It attempts to queue a control URB for the external hub, which fails because all the device slot contexts were released when the USB 3.0 roothub lost power: [ 164.980044] hub 4-1:1.0: hub_resume [ 164.980047] xhci_hcd 0000:01:00.0: Get port status returned 0x10101 [ 164.980049] xHCI xhci_urb_enqueue called with unaddressed device [ 164.980053] hub 3-0:1.0: port 1: status 0101 change 0001 [ 164.980056] hub 4-1:1.0: hub_port_status failed (err = -22) [ 164.980060] xhci_hcd 0000:01:00.0: `MEM_WRITE_DWORD(3'b000, 32'hffffc90008948440, 32'h202e1, 4'hf); [ 164.980062] xHCI xhci_urb_enqueue called with unaddressed device [ 164.980066] xhci_hcd 0000:01:00.0: clear port connect change, actual port 0 status = 0x2e1 [ 164.980069] hub 4-1:1.0: hub_port_status failed (err = -22) [ 164.980072] xhci_hcd 0000:01:00.0: get port status, actual port 1 status = 0x2a0 [ 164.980074] xHCI xhci_urb_enqueue called with unaddressed device [ 164.980077] xhci_hcd 0000:01:00.0: Get port status returned 0x100 [ 164.980079] hub 4-1:1.0: hub_port_status failed (err = -22) [ 164.980082] xHCI xhci_urb_enqueue called with unaddressed device [ 164.980085] hub 4-1:1.0: hub_port_status failed (err = -22) [ 164.980088] hub 4-1:1.0: port 4: status 0000 change 0000 [ 164.980091] xHCI xhci_urb_enqueue called with unaddressed device [ 164.980094] hub 4-1:1.0: activate --> -22 [ 164.980113] xHCI xhci_urb_enqueue called with unaddressed device [ 164.980117] hub 4-1:1.0: hub_port_status failed (err = -22) [ 164.980119] xHCI xhci_urb_enqueue called with unaddressed device [ 164.980123] hub 4-1:1.0: can't resume port 4, status -22 [ 164.980126] hub 4-1:1.0: port 4 status ffff.ffff after resume, -22 [ 164.980129] usb 4-1.4: can't resume, status -22 [ 164.980131] hub 4-1:1.0: logical disconnect on port 4 This causes issues when a USB 3.0 hard drive is attached to the external USB 3.0 hub when the system is hibernated: [ 6249.849653] sd 8:0:0:0: [sdb] Unhandled error code [ 6249.849659] sd 8:0:0:0: [sdb] Result: hostbyte=DID_ERROR driverbyte=DRIVER_OK [ 6249.849663] sd 8:0:0:0: [sdb] CDB: Read(10): 28 00 00 00 2a 08 00 00 02 00 [ 6249.849671] end_request: I/O error, dev sdb, sector 10760 Make sure to inform the USB core that *both* xHCI roothubs lost power. Signed-off-by: Sarah Sharp --- drivers/usb/host/xhci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 48706c0d6577..c41358e4b8cc 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -771,7 +771,9 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) /* If restore operation fails, re-initialize the HC during resume */ if ((temp & STS_SRE) || hibernated) { - usb_root_hub_lost_power(hcd->self.root_hub); + /* Let the USB core know _both_ roothubs lost power. */ + usb_root_hub_lost_power(xhci->main_hcd->self.root_hub); + usb_root_hub_lost_power(xhci->shared_hcd->self.root_hub); xhci_dbg(xhci, "Stop HCD\n"); xhci_halt(xhci); From c41136b05d3fb213a192f76a5688ff83687c1136 Mon Sep 17 00:00:00 2001 From: Andiry Xu Date: Tue, 22 Mar 2011 17:08:14 +0800 Subject: [PATCH 116/200] xHCI: Implement AMD PLL quirk This patch disable the optional PM feature inside the Hudson3 platform under the following conditions: 1. If an isochronous device is connected to xHCI port and is active; 2. Optional PM feature that powers down the internal Bus PLL when the link is in low power state is enabled. The PM feature needs to be disabled to eliminate PLL startup delays when the link comes out of low power state. The performance of DMA data transfer could be impacted if system delay were encountered and in addition to the PLL start up delays. Disabling the PM would leave room for unpredictable system delays in order to guarantee uninterrupted data transfer to isochronous audio or video stream devices that require time sensitive information. If data in an audio/video stream was interrupted then erratic audio or video performance may be encountered. AMD PLL quirk is already implemented in OHCI/EHCI driver. After moving the quirk code to pci-quirks.c and export them, xHCI driver can call it directly without having the quirk implementation in itself. Signed-off-by: Andiry Xu Signed-off-by: Sarah Sharp --- drivers/usb/host/xhci-pci.c | 4 ++++ drivers/usb/host/xhci-ring.c | 24 +++++++++++++++++++++++- drivers/usb/host/xhci.c | 3 +++ drivers/usb/host/xhci.h | 2 ++ 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index ceea9f33491c..a10494c2f3c7 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -114,6 +114,10 @@ static int xhci_pci_setup(struct usb_hcd *hcd) if (pdev->vendor == PCI_VENDOR_ID_NEC) xhci->quirks |= XHCI_NEC_HOST; + /* AMD PLL quirk */ + if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info()) + xhci->quirks |= XHCI_AMD_PLL_FIX; + /* Make sure the HC is halted. */ retval = xhci_halt(xhci); if (retval) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b0b4cc3b8584..7437386a9a50 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -619,6 +619,13 @@ static void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci, /* Only giveback urb when this is the last td in urb */ if (urb_priv->td_cnt == urb_priv->length) { + if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) { + xhci_to_hcd(xhci)->self.bandwidth_isoc_reqs--; + if (xhci_to_hcd(xhci)->self.bandwidth_isoc_reqs == 0) { + if (xhci->quirks & XHCI_AMD_PLL_FIX) + usb_amd_quirk_pll_enable(); + } + } usb_hcd_unlink_urb_from_ep(hcd, urb); xhci_dbg(xhci, "Giveback %s URB %p\n", adjective, urb); @@ -1565,8 +1572,17 @@ td_cleanup: urb_priv->td_cnt++; /* Giveback the urb when all the tds are completed */ - if (urb_priv->td_cnt == urb_priv->length) + if (urb_priv->td_cnt == urb_priv->length) { ret = 1; + if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) { + xhci_to_hcd(xhci)->self.bandwidth_isoc_reqs--; + if (xhci_to_hcd(xhci)->self.bandwidth_isoc_reqs + == 0) { + if (xhci->quirks & XHCI_AMD_PLL_FIX) + usb_amd_quirk_pll_enable(); + } + } + } } return ret; @@ -3153,6 +3169,12 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, } } + if (xhci_to_hcd(xhci)->self.bandwidth_isoc_reqs == 0) { + if (xhci->quirks & XHCI_AMD_PLL_FIX) + usb_amd_quirk_pll_disable(); + } + xhci_to_hcd(xhci)->self.bandwidth_isoc_reqs++; + giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id, start_cycle, start_trb); return 0; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index c41358e4b8cc..81b976e45880 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -550,6 +550,9 @@ void xhci_stop(struct usb_hcd *hcd) del_timer_sync(&xhci->event_ring_timer); #endif + if (xhci->quirks & XHCI_AMD_PLL_FIX) + usb_amd_dev_put(); + xhci_dbg(xhci, "// Disabling event ring interrupts\n"); temp = xhci_readl(xhci, &xhci->op_regs->status); xhci_writel(xhci, temp & ~STS_EINT, &xhci->op_regs->status); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index bdb78f51735e..ba1be6b7cc6d 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -30,6 +30,7 @@ /* Code sharing between pci-quirks and xhci hcd */ #include "xhci-ext-caps.h" +#include "pci-quirks.h" /* xHCI PCI Configuration Registers */ #define XHCI_SBRN_OFFSET (0x60) @@ -1279,6 +1280,7 @@ struct xhci_hcd { #define XHCI_LINK_TRB_QUIRK (1 << 0) #define XHCI_RESET_EP_QUIRK (1 << 1) #define XHCI_NEC_HOST (1 << 2) +#define XHCI_AMD_PLL_FIX (1 << 3) /* There are two roothubs to keep track of bus suspend info for */ struct xhci_bus_state bus_state[2]; /* Is each xHCI roothub port a USB 3.0, USB 2.0, or USB 1.1 port? */ From dc5b966214cdbc4000f1da03a231952a701ebe6f Mon Sep 17 00:00:00 2001 From: Abhilash Kesavan Date: Fri, 25 Mar 2011 17:00:48 +0900 Subject: [PATCH 117/200] ARM: S5P: Remove unused s3c_pm_check_resume_pin The s3c_pm_check_resume_pin() is not being used and can be safely removed to fix the build warning. Signed-off-by: Abhilash Kesavan Signed-off-by: Kukjin Kim --- arch/arm/plat-s5p/pm.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/arm/plat-s5p/pm.c b/arch/arm/plat-s5p/pm.c index d592b6304b48..d15dc47b0e3d 100644 --- a/arch/arm/plat-s5p/pm.c +++ b/arch/arm/plat-s5p/pm.c @@ -19,17 +19,6 @@ #define PFX "s5p pm: " -/* s3c_pm_check_resume_pin - * - * check to see if the pin is configured correctly for sleep mode, and - * make any necessary adjustments if it is not -*/ - -static void s3c_pm_check_resume_pin(unsigned int pin, unsigned int irqoffs) -{ - /* nothing here yet */ -} - /* s3c_pm_configure_extint * * configure all external interrupt pins From 96cfb97dd4474da6f3c30b2bfbe2286e5554fc97 Mon Sep 17 00:00:00 2001 From: Abhilash Kesavan Date: Fri, 25 Mar 2011 17:45:19 +0900 Subject: [PATCH 118/200] ARM: SAMSUNG: Fix build failure in PM CRC check code This patch fixes build error that occurs on enabling the Samsung specific PM CRC check code. Missed removing this reference of s3c_sleep_save_phys during move to generic cpu suspend/resume support. Signed-off-by: Abhilash Kesavan Cc: Russell King Signed-off-by: Kukjin Kim --- arch/arm/plat-samsung/pm-check.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm/plat-samsung/pm-check.c b/arch/arm/plat-samsung/pm-check.c index e4baf76f374a..6b733fafe7cd 100644 --- a/arch/arm/plat-samsung/pm-check.c +++ b/arch/arm/plat-samsung/pm-check.c @@ -164,7 +164,6 @@ static inline int in_region(void *ptr, int size, void *what, size_t whatsz) */ static u32 *s3c_pm_runcheck(struct resource *res, u32 *val) { - void *save_at = phys_to_virt(s3c_sleep_save_phys); unsigned long addr; unsigned long left; void *stkpage; @@ -192,11 +191,6 @@ static u32 *s3c_pm_runcheck(struct resource *res, u32 *val) goto skip_check; } - if (in_region(ptr, left, save_at, 32*4 )) { - S3C_PMDBG("skipping %08lx, has save block in\n", addr); - goto skip_check; - } - /* calculate and check the checksum */ calc = crc32_le(~0, ptr, left); From baab7307c7af963fbd993149d50dd45232b9d04c Mon Sep 17 00:00:00 2001 From: Maurus Cuelenaere Date: Sat, 2 Apr 2011 10:50:22 +0900 Subject: [PATCH 119/200] ARM: SAMSUNG: Fix warning 's3c_pm_show_resume_irqs' defined but not used s3c_pm_show_resume_irqs() is used by some s3c_pm_arch_show_resume_irqs() implementations, which get included through mach/pm-core.h. Add __maybe_unused to silence warnings when it isn't used (e.g. on S3C64XX platforms). Signed-off-by: Maurus Cuelenaere Signed-off-by: Kukjin Kim --- arch/arm/plat-samsung/pm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/plat-samsung/pm.c b/arch/arm/plat-samsung/pm.c index d5b58d31903c..5c0a440d6e16 100644 --- a/arch/arm/plat-samsung/pm.c +++ b/arch/arm/plat-samsung/pm.c @@ -214,8 +214,9 @@ void s3c_pm_do_restore_core(struct sleep_save *ptr, int count) * * print any IRQs asserted at resume time (ie, we woke from) */ -static void s3c_pm_show_resume_irqs(int start, unsigned long which, - unsigned long mask) +static void __maybe_unused s3c_pm_show_resume_irqs(int start, + unsigned long which, + unsigned long mask) { int i; From b025a3f836d1e8785ae65b59282a4befef9892bb Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 11 Apr 2011 19:06:26 +0100 Subject: [PATCH 120/200] ARM: 6876/1: Kconfig.debug: Remove unused CONFIG_DEBUG_ERRORS This config option isn't actually used anywhere and can be safely removed. The last user was traps.c before commit 082f47a ([ARM] always allow dump_stack() to produce a backtrace, 2007-07-05). Reviewed-by: Jesper Juhl Signed-off-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/Kconfig.debug | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 494224a9b459..03d01d783e3b 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -63,17 +63,6 @@ config DEBUG_USER 8 - SIGSEGV faults 16 - SIGBUS faults -config DEBUG_ERRORS - bool "Verbose kernel error messages" - depends on DEBUG_KERNEL - help - This option controls verbose debugging information which can be - printed when the kernel detects an internal error. This debugging - information is useful to kernel hackers when tracking down problems, - but mostly meaningless to other people. It's safe to say Y unless - you are concerned with the code size or don't want to see these - messages. - config DEBUG_STACK_USAGE bool "Enable stack utilization instrumentation" depends on DEBUG_KERNEL From df5419a9a90ac4ea2d853d68cc788e32cfe71278 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 13 Apr 2011 04:57:17 +0100 Subject: [PATCH 121/200] ARM: 6877/1: the ADDR_NO_RANDOMIZE personality flag should be honored with mmap() Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mm/mmap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index afe209e1e1f8..74be05f3e03a 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -82,7 +83,8 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, mm->cached_hole_size = 0; } /* 8 bits of randomness in 20 address space bits */ - if (current->flags & PF_RANDOMIZE) + if ((current->flags & PF_RANDOMIZE) && + !(current->personality & ADDR_NO_RANDOMIZE)) addr += (get_random_int() % (1 << 8)) << PAGE_SHIFT; full_search: From 5e143436d04465c937c1a242808a99c46393af3e Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 13 Apr 2011 04:59:36 +0100 Subject: [PATCH 122/200] ARM: 6878/1: fix personality flag propagation across an exec Our SET_PERSONALITY() implementation was overwriting all existing personality flags, including ADDR_NO_RANDOMIZE, making them unavailable to processes being exec'd after a call to personality() in user space. This prevents the gdb test suite from running successfully. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/elf.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/elf.c b/arch/arm/kernel/elf.c index d4a0da1e48f4..9b05c6a0dcea 100644 --- a/arch/arm/kernel/elf.c +++ b/arch/arm/kernel/elf.c @@ -40,15 +40,22 @@ EXPORT_SYMBOL(elf_check_arch); void elf_set_personality(const struct elf32_hdr *x) { unsigned int eflags = x->e_flags; - unsigned int personality = PER_LINUX_32BIT; + unsigned int personality = current->personality & ~PER_MASK; + + /* + * We only support Linux ELF executables, so always set the + * personality to LINUX. + */ + personality |= PER_LINUX; /* * APCS-26 is only valid for OABI executables */ - if ((eflags & EF_ARM_EABI_MASK) == EF_ARM_EABI_UNKNOWN) { - if (eflags & EF_ARM_APCS_26) - personality = PER_LINUX; - } + if ((eflags & EF_ARM_EABI_MASK) == EF_ARM_EABI_UNKNOWN && + (eflags & EF_ARM_APCS_26)) + personality &= ~ADDR_LIMIT_32BIT; + else + personality |= ADDR_LIMIT_32BIT; set_personality(personality); From 88b9ef452690233d200abf57a3fa2c0f3bd874c5 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 13 Apr 2011 05:01:52 +0100 Subject: [PATCH 123/200] ARM: 6879/1: fix personality test wrt usage of domain handlers There are optional bits that may complement a personality ID. It is therefore wrong to simply test against the absolute current->personality value to determine the effective personality. The PER_LINUX_32BIT is itself just PER_LINUX with one of those optional bits set. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/traps.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index f0000e188c8c..3b54ad19d489 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -410,8 +410,7 @@ static int bad_syscall(int n, struct pt_regs *regs) struct thread_info *thread = current_thread_info(); siginfo_t info; - if (current->personality != PER_LINUX && - current->personality != PER_LINUX_32BIT && + if ((current->personality & PER_MASK) != PER_LINUX && thread->exec_domain->handler) { thread->exec_domain->handler(n, regs); return regs->ARM_r0; From 753d8534cc190ed144caebc2ea49ab7a43dca662 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 14 Apr 2011 13:37:07 -0700 Subject: [PATCH 124/200] Revert "USB: isp1760-hcd: move imask clear after pending work is done" This reverts commit 5808544690300071f09eef9ab83a0fb1f60cf1cd. To quote Richard: I don't think this should be mainlined. It was a misunderstanding on my part. If you see all the other hdc drivers in the same location, they all do the same thing (i.e. clear the interrupt status first, then do the work) that "glitch" I think I saw was actually two back-to-back interrupts. Sebastian (the original author of isp1760) explained it to me a few days after my submission. sorry for the confusion Cc: Richard Retanubun Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/isp1760-hcd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c index 564b03337e7e..795345ad45e6 100644 --- a/drivers/usb/host/isp1760-hcd.c +++ b/drivers/usb/host/isp1760-hcd.c @@ -1676,15 +1676,13 @@ static irqreturn_t isp1760_irq(struct usb_hcd *hcd) if (unlikely(!imask)) goto leave; + reg_write32(hcd->regs, HC_INTERRUPT_REG, imask); if (imask & (HC_ATL_INT | HC_SOT_INT)) do_atl_int(hcd); if (imask & HC_INTL_INT) do_intl_int(hcd); - /* Clear interrupt mask on device after the work is done */ - reg_write32(hcd->regs, HC_INTERRUPT_REG, imask); - irqret = IRQ_HANDLED; leave: spin_unlock(&priv->lock); From 67954fe95705a8ff80335964bd7e621d13fbc499 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Apr 2011 15:21:52 -0700 Subject: [PATCH 125/200] memcg: fix mem_cgroup_rotate_reclaimable_page() commit 3f58a8294333 ("move memcg reclaimable page into tail of inactive list") added inline keyword twice in its prototype. CC arch/x86/kernel/asm-offsets.s In file included from include/linux/swap.h:8, from include/linux/suspend.h:4, from arch/x86/kernel/asm-offsets.c:12: include/linux/memcontrol.h:220: error: duplicate `inline' Signed-off-by: Eric Dumazet Reviewed-by: Minchan Kim Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5a5ce7055839..5e9840f50980 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -216,7 +216,7 @@ static inline void mem_cgroup_del_lru_list(struct page *page, int lru) return ; } -static inline inline void mem_cgroup_rotate_reclaimable_page(struct page *page) +static inline void mem_cgroup_rotate_reclaimable_page(struct page *page) { return ; } From 584208e6b4103d2cfb08a7889c9fa3540826e0d5 Mon Sep 17 00:00:00 2001 From: Daniel Kiper Date: Thu, 14 Apr 2011 15:21:53 -0700 Subject: [PATCH 126/200] mm: optimize pfn calculation in online_page() If CONFIG_FLATMEM is enabled pfn is calculated in online_page() more than once. It is possible to optimize that and use value established at beginning of that function. Signed-off-by: Daniel Kiper Acked-by: Dave Hansen Cc: KAMEZAWA Hiroyuki Cc: Wu Fengguang Cc: Mel Gorman Cc: Christoph Lameter Acked-by: David Rientjes Reviewed-by: Jesper Juhl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a2acaf820fe5..9ca1d604f7cd 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -375,7 +375,7 @@ void online_page(struct page *page) #endif #ifdef CONFIG_FLATMEM - max_mapnr = max(page_to_pfn(page), max_mapnr); + max_mapnr = max(pfn, max_mapnr); #endif ClearPageReserved(page); From c344180c9e77145a9e7eab0050169c68afae04b2 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 14 Apr 2011 15:21:55 -0700 Subject: [PATCH 127/200] drivers/rtc/rtc-mc13xxx.c: fix unterminated platform_device_id table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The platform_device_id table is supposed to be zero-terminated. Signed-off-by: Axel Lin Acked-by: Uwe Kleine-König Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-mc13xxx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c index c42006469559..c5ac03793e79 100644 --- a/drivers/rtc/rtc-mc13xxx.c +++ b/drivers/rtc/rtc-mc13xxx.c @@ -401,6 +401,7 @@ const struct platform_device_id mc13xxx_rtc_idtable[] = { }, { .name = "mc13892-rtc", }, + { } }; static struct platform_driver mc13xxx_rtc_driver = { From c340b1d640001c8c9ecff74f68fd90422ae2448a Mon Sep 17 00:00:00 2001 From: Timo Warns Date: Thu, 14 Apr 2011 15:21:56 -0700 Subject: [PATCH 128/200] fs/partitions/ldm.c: fix oops caused by corrupted partition table The kernel automatically evaluates partition tables of storage devices. The code for evaluating LDM partitions (in fs/partitions/ldm.c) contains a bug that causes a kernel oops on certain corrupted LDM partitions. A kernel subsystem seems to crash, because, after the oops, the kernel no longer recognizes newly connected storage devices. The patch validates the value of vblk_size. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Timo Warns Cc: Eugene Teo Cc: Harvey Harrison Cc: Richard Russon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/partitions/ldm.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index b10e3540d5b7..ce4f62440425 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c @@ -1299,6 +1299,11 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags) BUG_ON (!data || !frags); + if (size < 2 * VBLK_SIZE_HEAD) { + ldm_error("Value of size is to small."); + return false; + } + group = get_unaligned_be32(data + 0x08); rec = get_unaligned_be16(data + 0x0C); num = get_unaligned_be16(data + 0x0E); @@ -1306,6 +1311,10 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags) ldm_error ("A VBLK claims to have %d parts.", num); return false; } + if (rec >= num) { + ldm_error("REC value (%d) exceeds NUM value (%d)", rec, num); + return false; + } list_for_each (item, frags) { f = list_entry (item, struct frag, list); @@ -1334,10 +1343,9 @@ found: f->map |= (1 << rec); - if (num > 0) { - data += VBLK_SIZE_HEAD; - size -= VBLK_SIZE_HEAD; - } + data += VBLK_SIZE_HEAD; + size -= VBLK_SIZE_HEAD; + memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size); return true; From 9f6ae448bfc6cdf40279f43bb0b4fd159edc4e0a Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 14 Apr 2011 15:21:57 -0700 Subject: [PATCH 129/200] mm/page_alloc.c: silence build_all_zonelists() section mismatch The memory hotplug case involves calling to build_all_zonelists() which in turns calls in to setup_zone_pageset(). The latter is marked __meminit while build_all_zonelists() itself has no particular annotation. build_all_zonelists() is only handed a non-NULL pointer in the case of memory hotplug through an existing __meminit path, so the setup_zone_pageset() reference is always safe. The options as such are either to flag build_all_zonelists() as __ref (as per __build_all_zonelists()), or to simply discard the __meminit annotation from setup_zone_pageset(). Signed-off-by: Paul Mundt Acked-by: Mel Gorman Cc: KAMEZAWA Hiroyuki Acked-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2747f5e5abc1..9f8a97b9a350 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3176,7 +3176,7 @@ static __init_refok int __build_all_zonelists(void *data) * Called with zonelists_mutex held always * unless system_state == SYSTEM_BOOTING. */ -void build_all_zonelists(void *data) +void __ref build_all_zonelists(void *data) { set_zonelist_order(); From d3bc2367180f7ee6afe4ee6e886bfba3ad4eb290 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 14 Apr 2011 15:21:58 -0700 Subject: [PATCH 130/200] vmstat: update comment regarding stat_threshold Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 772b39b87d95..8cb0f0a703e5 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -321,9 +321,12 @@ static inline void mod_state(struct zone *zone, /* * The fetching of the stat_threshold is racy. We may apply * a counter threshold to the wrong the cpu if we get - * rescheduled while executing here. However, the following - * will apply the threshold again and therefore bring the - * counter under the threshold. + * rescheduled while executing here. However, the next + * counter update will apply the threshold again and + * therefore bring the counter under the threshold again. + * + * Most of the time the thresholds are the same anyways + * for all cpus in a zone. */ t = this_cpu_read(pcp->stat_threshold); From 592ce316395abc6b4e96c1ac198e5f347bb5d578 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Thu, 14 Apr 2011 15:21:59 -0700 Subject: [PATCH 131/200] leds/leds-regulator.c: fix handling of already enabled regulators Make the driver aware of the initial status of the regulator. The leds-regulator driver was ignoring the initial status of the regulator; this resulted in rdev->use_count being incremented to 2 after calling regulator_led_set_value() in the .probe method when a regulator was already enabled at insmod time, which made it impossible to ever disable the regulator. Signed-off-by: Antonio Ospite Cc: Richard Purdie Cc: Antonio Ospite Acked-by: Mark Brown Cc: Liam Girdwood Cc: Daniel Ribeiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/leds/leds-regulator.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/leds/leds-regulator.c b/drivers/leds/leds-regulator.c index 3790816643be..8497f56f8e46 100644 --- a/drivers/leds/leds-regulator.c +++ b/drivers/leds/leds-regulator.c @@ -178,6 +178,10 @@ static int __devinit regulator_led_probe(struct platform_device *pdev) led->cdev.flags |= LED_CORE_SUSPENDRESUME; led->vcc = vcc; + /* to handle correctly an already enabled regulator */ + if (regulator_is_enabled(led->vcc)) + led->enabled = 1; + mutex_init(&led->mutex); INIT_WORK(&led->work, led_work); From 01eda2e0c0cf035308308a19581e4979285b51ec Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 14 Apr 2011 15:22:00 -0700 Subject: [PATCH 132/200] kstrtox: fix compile warnings in test Fix the following warnings: CC [M] lib/test-kstrtox.o lib/test-kstrtox.c: In function 'test_kstrtou64_ok': lib/test-kstrtox.c:318: warning: this decimal constant is unsigned only in ISO C90 ... Signed-off-by: Alexey Dobriyan Reported-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test-kstrtox.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c index 325c2f9ecebd..d55769d63cb8 100644 --- a/lib/test-kstrtox.c +++ b/lib/test-kstrtox.c @@ -315,12 +315,12 @@ static void __init test_kstrtou64_ok(void) {"65537", 10, 65537}, {"2147483646", 10, 2147483646}, {"2147483647", 10, 2147483647}, - {"2147483648", 10, 2147483648}, - {"2147483649", 10, 2147483649}, - {"4294967294", 10, 4294967294}, - {"4294967295", 10, 4294967295}, - {"4294967296", 10, 4294967296}, - {"4294967297", 10, 4294967297}, + {"2147483648", 10, 2147483648ULL}, + {"2147483649", 10, 2147483649ULL}, + {"4294967294", 10, 4294967294ULL}, + {"4294967295", 10, 4294967295ULL}, + {"4294967296", 10, 4294967296ULL}, + {"4294967297", 10, 4294967297ULL}, {"9223372036854775806", 10, 9223372036854775806ULL}, {"9223372036854775807", 10, 9223372036854775807ULL}, {"9223372036854775808", 10, 9223372036854775808ULL}, @@ -369,12 +369,12 @@ static void __init test_kstrtos64_ok(void) {"65537", 10, 65537}, {"2147483646", 10, 2147483646}, {"2147483647", 10, 2147483647}, - {"2147483648", 10, 2147483648}, - {"2147483649", 10, 2147483649}, - {"4294967294", 10, 4294967294}, - {"4294967295", 10, 4294967295}, - {"4294967296", 10, 4294967296}, - {"4294967297", 10, 4294967297}, + {"2147483648", 10, 2147483648LL}, + {"2147483649", 10, 2147483649LL}, + {"4294967294", 10, 4294967294LL}, + {"4294967295", 10, 4294967295LL}, + {"4294967296", 10, 4294967296LL}, + {"4294967297", 10, 4294967297LL}, {"9223372036854775806", 10, 9223372036854775806LL}, {"9223372036854775807", 10, 9223372036854775807LL}, }; @@ -418,10 +418,10 @@ static void __init test_kstrtou32_ok(void) {"65537", 10, 65537}, {"2147483646", 10, 2147483646}, {"2147483647", 10, 2147483647}, - {"2147483648", 10, 2147483648}, - {"2147483649", 10, 2147483649}, - {"4294967294", 10, 4294967294}, - {"4294967295", 10, 4294967295}, + {"2147483648", 10, 2147483648U}, + {"2147483649", 10, 2147483649U}, + {"4294967294", 10, 4294967294U}, + {"4294967295", 10, 4294967295U}, }; TEST_OK(kstrtou32, u32, "%u", test_u32_ok); } From 78be959e38567f0e020848179a5d64d2b064391a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 14 Apr 2011 15:22:02 -0700 Subject: [PATCH 133/200] kstrtox: simpler code in _kstrtoull() Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/kstrtox.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/kstrtox.c b/lib/kstrtox.c index 05672e819f8c..a235f3cc471c 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -49,12 +49,9 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) val = *s - '0'; else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f') val = _tolower(*s) - 'a' + 10; - else if (*s == '\n') { - if (*(s + 1) == '\0') - break; - else - return -EINVAL; - } else + else if (*s == '\n' && *(s + 1) == '\0') + break; + else return -EINVAL; if (val >= base) From d69ac131384aa735192b63fd6e0abbe42dec68dc Mon Sep 17 00:00:00 2001 From: Alexander Clouter Date: Thu, 14 Apr 2011 15:22:02 -0700 Subject: [PATCH 134/200] MAINTAINERS: add ARM/ts78xx-setup platform maintainer Signed-off-by: Alexander Clouter Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 649600cb8ec9..555a7bf8c525 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -877,6 +877,13 @@ F: arch/arm/mach-mv78xx0/ F: arch/arm/mach-orion5x/ F: arch/arm/plat-orion/ +ARM/Orion SoC/Technologic Systems TS-78xx platform support +M: Alexander Clouter +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +W: http://www.digriz.org.uk/ts78xx/kernel +S: Maintained +F: arch/arm/mach-orion5x/ts78xx-* + ARM/MIOA701 MACHINE SUPPORT M: Robert Jarzmik L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) From 61bc02bb252d438a3bb12a236a141d222b35da7b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 14 Apr 2011 15:22:04 -0700 Subject: [PATCH 135/200] MAINTAINERS: update m68knommu patterns Commit 66d857b08b ("m68k: merge m68k and m68knommu arch directories") moved the files around. Signed-off-by: Joe Perches Acked-by: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 555a7bf8c525..e36170bcaeee 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6255,7 +6255,8 @@ M: Greg Ungerer W: http://www.uclinux.org/ L: uclinux-dev@uclinux.org (subscribers-only) S: Maintained -F: arch/m68knommu/ +F: arch/m68k/*/*_no.* +F: arch/m68k/include/asm/*_no.* UCLINUX FOR RENESAS H8/300 (H8300) M: Yoshinori Sato From c897401bac2b099dd2ff673a9afe7193723d253c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 14 Apr 2011 15:22:05 -0700 Subject: [PATCH 136/200] MAINTAINERS: update various tty patterns Commits 4a6514e6d0 ("tty: move obsolete and broken tty drivers to drivers/staging/tty/") and a6afd9f3e8 ("tty: move a number of tty drivers from drivers/char/ to drivers/tty/") moved files around. Update patterns and orphan some files that were moved to staging. Signed-off-by: Joe Perches Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index e36170bcaeee..e205f6f8eb98 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -184,10 +184,9 @@ F: Documentation/filesystems/9p.txt F: fs/9p/ A2232 SERIAL BOARD DRIVER -M: Enver Haase L: linux-m68k@lists.linux-m68k.org -S: Maintained -F: drivers/char/ser_a2232* +S: Orphan +F: drivers/staging/generic_serial/ser_a2232* AACRAID SCSI RAID DRIVER M: Adaptec OEM Raid Solutions @@ -1830,11 +1829,10 @@ S: Maintained F: drivers/platform/x86/compal-laptop.c COMPUTONE INTELLIPORT MULTIPORT CARD -M: "Michael H. Warfield" W: http://www.wittsend.com/computone.html -S: Maintained +S: Orphan F: Documentation/serial/computone.txt -F: drivers/char/ip2/ +F: drivers/staging/tty/ip2/ CONEXANT ACCESSRUNNER USB DRIVER M: Simon Arlott @@ -2017,7 +2015,7 @@ F: drivers/net/wan/cycx* CYCLADES ASYNC MUX DRIVER W: http://www.cyclades.com/ S: Orphan -F: drivers/char/cyclades.c +F: drivers/tty/cyclades.c F: include/linux/cyclades.h CYCLADES PC300 DRIVER @@ -2131,8 +2129,8 @@ L: Eng.Linux@digi.com W: http://www.digi.com S: Orphan F: Documentation/serial/digiepca.txt -F: drivers/char/epca* -F: drivers/char/digi* +F: drivers/staging/tty/epca* +F: drivers/staging/tty/digi* DIOLAN U2C-12 I2C DRIVER M: Guenter Roeck @@ -4199,7 +4197,7 @@ MOXA SMARTIO/INDUSTIO/INTELLIO SERIAL CARD M: Jiri Slaby S: Maintained F: Documentation/serial/moxa-smartio -F: drivers/char/mxser.* +F: drivers/tty/mxser.* MSI LAPTOP SUPPORT M: "Lee, Chun-Yi" @@ -4241,7 +4239,7 @@ F: sound/oss/msnd* MULTITECH MULTIPORT CARD (ISICOM) S: Orphan -F: drivers/char/isicom.c +F: drivers/tty/isicom.c F: include/linux/isicom.h MUSB MULTIPOINT HIGH SPEED DUAL-ROLE CONTROLLER @@ -5280,14 +5278,14 @@ F: drivers/memstick/host/r592.* RISCOM8 DRIVER S: Orphan F: Documentation/serial/riscom8.txt -F: drivers/char/riscom8* +F: drivers/staging/tty/riscom8* ROCKETPORT DRIVER P: Comtrol Corp. W: http://www.comtrol.com S: Maintained F: Documentation/serial/rocket.txt -F: drivers/char/rocket* +F: drivers/tty/rocket* ROSE NETWORK LAYER M: Ralf Baechle @@ -5923,10 +5921,9 @@ F: arch/arm/mach-spear6xx/spear600.c F: arch/arm/mach-spear6xx/spear600_evb.c SPECIALIX IO8+ MULTIPORT SERIAL CARD DRIVER -M: Roger Wolff -S: Supported +S: Orphan F: Documentation/serial/specialix.txt -F: drivers/char/specialix* +F: drivers/staging/tty/specialix* SPI SUBSYSTEM M: David Brownell From 81ab4201fb7d91d6b0cd9ad5b4b16776e4bed145 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Apr 2011 15:22:06 -0700 Subject: [PATCH 137/200] mm: add VM counters for transparent hugepages I found it difficult to make sense of transparent huge pages without having any counters for its actions. Add some counters to vmstat for allocation of transparent hugepages and fallback to smaller pages. Optional patch, but useful for development and understanding the system. Contains improvements from Andrea Arcangeli and Johannes Weiner [akpm@linux-foundation.org: coding-style fixes] [hannes@cmpxchg.org: fix vmstat_text[] entries] Signed-off-by: Andi Kleen Acked-by: Andrea Arcangeli Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 7 +++++++ mm/huge_memory.c | 25 +++++++++++++++++++++---- mm/vmstat.c | 9 +++++++++ 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 461c0119664f..2b3831b58aa4 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -58,6 +58,13 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, UNEVICTABLE_PGCLEARED, /* on COW, page truncate */ UNEVICTABLE_PGSTRANDED, /* unable to isolate on unlock */ UNEVICTABLE_MLOCKFREED, +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + THP_FAULT_ALLOC, + THP_FAULT_FALLBACK, + THP_COLLAPSE_ALLOC, + THP_COLLAPSE_ALLOC_FAILED, + THP_SPLIT, +#endif NR_VM_EVENT_ITEMS }; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0a619e0e2e0b..1722683bde23 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -680,8 +680,11 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, return VM_FAULT_OOM; page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), vma, haddr, numa_node_id(), 0); - if (unlikely(!page)) + if (unlikely(!page)) { + count_vm_event(THP_FAULT_FALLBACK); goto out; + } + count_vm_event(THP_FAULT_ALLOC); if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { put_page(page); goto out; @@ -909,11 +912,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, new_page = NULL; if (unlikely(!new_page)) { + count_vm_event(THP_FAULT_FALLBACK); ret = do_huge_pmd_wp_page_fallback(mm, vma, address, pmd, orig_pmd, page, haddr); put_page(page); goto out; } + count_vm_event(THP_FAULT_ALLOC); if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { put_page(new_page); @@ -1390,6 +1395,7 @@ int split_huge_page(struct page *page) BUG_ON(!PageSwapBacked(page)); __split_huge_page(page, anon_vma); + count_vm_event(THP_SPLIT); BUG_ON(PageCompound(page)); out_unlock: @@ -1784,9 +1790,11 @@ static void collapse_huge_page(struct mm_struct *mm, node, __GFP_OTHER_NODE); if (unlikely(!new_page)) { up_read(&mm->mmap_sem); + count_vm_event(THP_COLLAPSE_ALLOC_FAILED); *hpage = ERR_PTR(-ENOMEM); return; } + count_vm_event(THP_COLLAPSE_ALLOC); if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { up_read(&mm->mmap_sem); put_page(new_page); @@ -2151,8 +2159,11 @@ static void khugepaged_do_scan(struct page **hpage) #ifndef CONFIG_NUMA if (!*hpage) { *hpage = alloc_hugepage(khugepaged_defrag()); - if (unlikely(!*hpage)) + if (unlikely(!*hpage)) { + count_vm_event(THP_COLLAPSE_ALLOC_FAILED); break; + } + count_vm_event(THP_COLLAPSE_ALLOC); } #else if (IS_ERR(*hpage)) @@ -2192,8 +2203,11 @@ static struct page *khugepaged_alloc_hugepage(void) do { hpage = alloc_hugepage(khugepaged_defrag()); - if (!hpage) + if (!hpage) { + count_vm_event(THP_COLLAPSE_ALLOC_FAILED); khugepaged_alloc_sleep(); + } else + count_vm_event(THP_COLLAPSE_ALLOC); } while (unlikely(!hpage) && likely(khugepaged_enabled())); return hpage; @@ -2210,8 +2224,11 @@ static void khugepaged_loop(void) while (likely(khugepaged_enabled())) { #ifndef CONFIG_NUMA hpage = khugepaged_alloc_hugepage(); - if (unlikely(!hpage)) + if (unlikely(!hpage)) { + count_vm_event(THP_COLLAPSE_ALLOC_FAILED); break; + } + count_vm_event(THP_COLLAPSE_ALLOC); #else if (IS_ERR(hpage)) { khugepaged_alloc_sleep(); diff --git a/mm/vmstat.c b/mm/vmstat.c index 8cb0f0a703e5..897ea9e88238 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -948,7 +948,16 @@ static const char * const vmstat_text[] = { "unevictable_pgs_cleared", "unevictable_pgs_stranded", "unevictable_pgs_mlockfreed", + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + "thp_fault_alloc", + "thp_fault_fallback", + "thp_collapse_alloc", + "thp_collapse_alloc_failed", + "thp_split", #endif + +#endif /* CONFIG_VM_EVENTS_COUNTERS */ }; static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, From d00ebeac5f24f290636f7a895dafc124b2930a08 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 14 Apr 2011 15:22:07 -0700 Subject: [PATCH 138/200] MAINTAINERS: update STABLE BRANCH info Drop Chris Wright from STABLE maintainers. He hasn't done STABLE release work for quite some time. Signed-off-by: Randy Dunlap Acked-by: Chris Wright Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e205f6f8eb98..17c22c50c691 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5968,7 +5968,6 @@ F: arch/alpha/kernel/srm_env.c STABLE BRANCH M: Greg Kroah-Hartman -M: Chris Wright L: stable@kernel.org S: Maintained From fc5da22ae35d4720be59af8787a8a6d5e4da9517 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 14 Apr 2011 15:22:07 -0700 Subject: [PATCH 139/200] tmpfs: fix off-by-one in max_blocks checks If you fill up a tmpfs, df was showing tmpfs 460800 - - - /tmp because of an off-by-one in the max_blocks checks. Fix it so df shows tmpfs 460800 460800 0 100% /tmp Signed-off-by: Hugh Dickins Cc: Tim Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 58da7c150ba6..8fa27e4e582a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -421,7 +421,8 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long * a waste to allocate index if we cannot allocate data. */ if (sbinfo->max_blocks) { - if (percpu_counter_compare(&sbinfo->used_blocks, (sbinfo->max_blocks - 1)) > 0) + if (percpu_counter_compare(&sbinfo->used_blocks, + sbinfo->max_blocks - 1) >= 0) return ERR_PTR(-ENOSPC); percpu_counter_inc(&sbinfo->used_blocks); spin_lock(&inode->i_lock); @@ -1397,7 +1398,8 @@ repeat: shmem_swp_unmap(entry); sbinfo = SHMEM_SB(inode->i_sb); if (sbinfo->max_blocks) { - if ((percpu_counter_compare(&sbinfo->used_blocks, sbinfo->max_blocks) > 0) || + if (percpu_counter_compare(&sbinfo->used_blocks, + sbinfo->max_blocks) >= 0 || shmem_acct_block(info->flags)) { spin_unlock(&info->lock); error = -ENOSPC; From 5de1743e2434fcb24e3d944a20130029b8fe867a Mon Sep 17 00:00:00 2001 From: Wanlong Gao Date: Thu, 14 Apr 2011 15:22:08 -0700 Subject: [PATCH 140/200] drivers/misc/sgi-gru/grufile.c: fix the wrong members of gru_chip Fix the wrong members and the wrong function's definition, since the irq_chip had changed. Signed-off-by: Wanlong Gao Cc: Jack Steiner Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/misc/sgi-gru/grufile.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index 20e4e9395b61..ecafa4ba238b 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -348,15 +348,15 @@ static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep) static int gru_irq_count[GRU_CHIPLETS_PER_BLADE]; -static void gru_noop(unsigned int irq) +static void gru_noop(struct irq_data *d) { } static struct irq_chip gru_chip[GRU_CHIPLETS_PER_BLADE] = { [0 ... GRU_CHIPLETS_PER_BLADE - 1] { - .mask = gru_noop, - .unmask = gru_noop, - .ack = gru_noop + .irq_mask = gru_noop, + .irq_unmask = gru_noop, + .irq_ack = gru_noop } }; From 4471a675dfc7ca676c165079e91c712b09dc9ce4 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 14 Apr 2011 15:22:09 -0700 Subject: [PATCH 141/200] brk: COMPAT_BRK: fix detection of randomized brk 5520e89 ("brk: fix min_brk lower bound computation for COMPAT_BRK") tried to get the whole logic of brk randomization for legacy (libc5-based) applications finally right. It turns out that the way to detect whether brk has actually been randomized in the end or not introduced by that patch still doesn't work for those binaries, as reported by Geert: : /sbin/init from my old m68k ramdisk exists prematurely. : : Before the patch: : : | brk(0x80005c8e) = 0x80006000 : : After the patch: : : | brk(0x80005c8e) = 0x80005c8e : : Old libc5 considers brk() to have failed if the return value is not : identical to the requested value. I don't like it, but currently see no better option than a bit flag in task_struct to catch the CONFIG_COMPAT_BRK && randomize_va_space == 2 case. Signed-off-by: Jiri Kosina Tested-by: Geert Uytterhoeven Reported-by: Geert Uytterhoeven Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 6 +++++- include/linux/sched.h | 3 +++ mm/mmap.c | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f34078d702d3..303983fabfd6 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -941,9 +941,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) current->mm->start_stack = bprm->p; #ifdef arch_randomize_brk - if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) + if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { current->mm->brk = current->mm->start_brk = arch_randomize_brk(current->mm); +#ifdef CONFIG_COMPAT_BRK + current->brk_randomized = 1; +#endif + } #endif if (current->personality & MMAP_PAGE_ZERO) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ec2c027e92c..18d63cea2848 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1254,6 +1254,9 @@ struct task_struct { #endif struct mm_struct *mm, *active_mm; +#ifdef CONFIG_COMPAT_BRK + unsigned brk_randomized:1; +#endif #if defined(SPLIT_RSS_COUNTING) struct task_rss_stat rss_stat; #endif diff --git a/mm/mmap.c b/mm/mmap.c index 8c05e5b43b69..e27e0cf0de03 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -259,7 +259,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) * randomize_va_space to 2, which will still cause mm->start_brk * to be arbitrarily shifted */ - if (mm->start_brk > PAGE_ALIGN(mm->end_data)) + if (current->brk_randomized) min_brk = mm->start_brk; else min_brk = mm->end_data; From fe936dfc23fed3475b11067e8d9b70553eafcd9e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 14 Apr 2011 15:22:10 -0700 Subject: [PATCH 142/200] mm: check that we have the right vma in __access_remote_vm() In __access_remote_vm() we need to check that we have found the right vma, not the following vma before we try to access it. Otherwise we might call the vma's access routine with an address which does not fall inside the vma. It was discovered on a current kernel but with an unreleased driver, from memory it was strace leading to a kernel bad access, but it obviously depends on what the access implementation does. Looking at other access implementations I only see: $ git grep -A 5 vm_operations|grep access arch/powerpc/platforms/cell/spufs/file.c- .access = spufs_mem_mmap_access, arch/x86/pci/i386.c- .access = generic_access_phys, drivers/char/mem.c- .access = generic_access_phys fs/sysfs/bin.c- .access = bin_access, The spufs one looks like it might behave badly given the wrong vma, it assumes vma->vm_file->private_data is a spu_context, and looks like it would probably blow up pretty quickly if it wasn't. generic_access_phys() only uses the vma to check vm_flags and get the mm, and then walks page tables using the address. So it should bail on the vm_flags check, or at worst let you access some other VM_IO mapping. And bin_access() just proxies to another access implementation. Signed-off-by: Michael Ellerman Reviewed-by: KOSAKI Motohiro Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index b623a249918c..ce22a250926f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3688,7 +3688,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, */ #ifdef CONFIG_HAVE_IOREMAP_PROT vma = find_vma(mm, addr); - if (!vma) + if (!vma || vma->vm_start > addr) break; if (vma->vm_ops && vma->vm_ops->access) ret = vma->vm_ops->access(vma, addr, buf, From 929bea7c714220fc76ce3f75bef9056477c28e74 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 14 Apr 2011 15:22:12 -0700 Subject: [PATCH 143/200] vmscan: all_unreclaimable() use zone->all_unreclaimable as a name all_unreclaimable check in direct reclaim has been introduced at 2.6.19 by following commit. 2006 Sep 25; commit 408d8544; oom: use unreclaimable info And it went through strange history. firstly, following commit broke the logic unintentionally. 2008 Apr 29; commit a41f24ea; page allocator: smarter retry of costly-order allocations Two years later, I've found obvious meaningless code fragment and restored original intention by following commit. 2010 Jun 04; commit bb21c7ce; vmscan: fix do_try_to_free_pages() return value when priority==0 But, the logic didn't works when 32bit highmem system goes hibernation and Minchan slightly changed the algorithm and fixed it . 2010 Sep 22: commit d1908362: vmscan: check all_unreclaimable in direct reclaim path But, recently, Andrey Vagin found the new corner case. Look, struct zone { .. int all_unreclaimable; .. unsigned long pages_scanned; .. } zone->all_unreclaimable and zone->pages_scanned are neigher atomic variables nor protected by lock. Therefore zones can become a state of zone->page_scanned=0 and zone->all_unreclaimable=1. In this case, current all_unreclaimable() return false even though zone->all_unreclaimabe=1. This resulted in the kernel hanging up when executing a loop of the form 1. fork 2. mmap 3. touch memory 4. read memory 5. munmmap as described in http://www.gossamer-threads.com/lists/linux/kernel/1348725#1348725 Is this ignorable minor issue? No. Unfortunately, x86 has very small dma zone and it become zone->all_unreclamble=1 easily. and if it become all_unreclaimable=1, it never restore all_unreclaimable=0. Why? if all_unreclaimable=1, vmscan only try DEF_PRIORITY reclaim and a-few-lru-pages>>DEF_PRIORITY always makes 0. that mean no page scan at all! Eventually, oom-killer never works on such systems. That said, we can't use zone->pages_scanned for this purpose. This patch restore all_unreclaimable() use zone->all_unreclaimable as old. and in addition, to add oom_killer_disabled check to avoid reintroduce the issue of commit d1908362 ("vmscan: check all_unreclaimable in direct reclaim path"). Reported-by: Andrey Vagin Signed-off-by: KOSAKI Motohiro Cc: Nick Piggin Reviewed-by: Minchan Kim Reviewed-by: KAMEZAWA Hiroyuki Acked-by: David Rientjes Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index c7f5a6d4b75b..f6b435c80079 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -1988,17 +1989,12 @@ static bool zone_reclaimable(struct zone *zone) return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; } -/* - * As hibernation is going on, kswapd is freezed so that it can't mark - * the zone into all_unreclaimable. It can't handle OOM during hibernation. - * So let's check zone's unreclaimable in direct reclaim as well as kswapd. - */ +/* All zones in zonelist are unreclaimable? */ static bool all_unreclaimable(struct zonelist *zonelist, struct scan_control *sc) { struct zoneref *z; struct zone *zone; - bool all_unreclaimable = true; for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(sc->gfp_mask), sc->nodemask) { @@ -2006,13 +2002,11 @@ static bool all_unreclaimable(struct zonelist *zonelist, continue; if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) continue; - if (zone_reclaimable(zone)) { - all_unreclaimable = false; - break; - } + if (!zone->all_unreclaimable) + return false; } - return all_unreclaimable; + return true; } /* @@ -2108,6 +2102,14 @@ out: if (sc->nr_reclaimed) return sc->nr_reclaimed; + /* + * As hibernation is going on, kswapd is freezed so that it can't mark + * the zone into all_unreclaimable. Thus bypassing all_unreclaimable + * check. + */ + if (oom_killer_disabled) + return 0; + /* top priority shrink_zones still had more to do? don't OOM, then */ if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc)) return 1; From 341aea2bc48bf652777fb015cc2b3dfa9a451817 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 14 Apr 2011 15:22:13 -0700 Subject: [PATCH 144/200] oom-kill: remove boost_dying_task_prio() This is an almost-revert of commit 93b43fa ("oom: give the dying task a higher priority"). That commit dramatically improved oom killer logic when a fork-bomb occurs. But I've found that it has nasty corner case. Now cpu cgroup has strange default RT runtime. It's 0! That said, if a process under cpu cgroup promote RT scheduling class, the process never run at all. If an admin inserts a !RT process into a cpu cgroup by setting rtruntime=0, usually it runs perfectly because a !RT task isn't affected by the rtruntime knob. But if it promotes an RT task via an explicit setscheduler() syscall or an OOM, the task can't run at all. In short, the oom killer doesn't work at all if admins are using cpu cgroup and don't touch the rtruntime knob. Eventually, kernel may hang up when oom kill occur. I and the original author Luis agreed to disable this logic. Signed-off-by: KOSAKI Motohiro Acked-by: Luis Claudio R. Goncalves Acked-by: KAMEZAWA Hiroyuki Reviewed-by: Minchan Kim Acked-by: David Rientjes Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 6a819d1b2c7d..83fb72c108b7 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -83,24 +83,6 @@ static bool has_intersects_mems_allowed(struct task_struct *tsk, } #endif /* CONFIG_NUMA */ -/* - * If this is a system OOM (not a memcg OOM) and the task selected to be - * killed is not already running at high (RT) priorities, speed up the - * recovery by boosting the dying task to the lowest FIFO priority. - * That helps with the recovery and avoids interfering with RT tasks. - */ -static void boost_dying_task_prio(struct task_struct *p, - struct mem_cgroup *mem) -{ - struct sched_param param = { .sched_priority = 1 }; - - if (mem) - return; - - if (!rt_task(p)) - sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); -} - /* * The process p may have detached its own ->mm while exiting or through * use_mm(), but one or more of its subthreads may still have a valid @@ -452,13 +434,6 @@ static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem) set_tsk_thread_flag(p, TIF_MEMDIE); force_sig(SIGKILL, p); - /* - * We give our sacrificial lamb high priority and access to - * all the memory it needs. That way it should be able to - * exit() and clear out its resources quickly... - */ - boost_dying_task_prio(p, mem); - return 0; } #undef K @@ -482,7 +457,6 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, */ if (p->flags & PF_EXITING) { set_tsk_thread_flag(p, TIF_MEMDIE); - boost_dying_task_prio(p, mem); return 0; } @@ -556,7 +530,6 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) */ if (fatal_signal_pending(current)) { set_thread_flag(TIF_MEMDIE); - boost_dying_task_prio(current, NULL); return; } @@ -712,7 +685,6 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, */ if (fatal_signal_pending(current)) { set_thread_flag(TIF_MEMDIE); - boost_dying_task_prio(current, NULL); return; } From 13209c2a52afa691ca19e7e6ebd22d4034bdfeed Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Thu, 14 Apr 2011 15:22:14 -0700 Subject: [PATCH 145/200] RapidIO: add IDT CPS-1432 switch definitions Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/switches/idt_gen2.c | 1 + include/linux/rio_ids.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/rapidio/switches/idt_gen2.c b/drivers/rapidio/switches/idt_gen2.c index 095016a9dec1..ac2701b22e71 100644 --- a/drivers/rapidio/switches/idt_gen2.c +++ b/drivers/rapidio/switches/idt_gen2.c @@ -418,3 +418,4 @@ DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1848, idtg2_switch_init); DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1616, idtg2_switch_init); DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTVPS1616, idtg2_switch_init); DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTSPS1616, idtg2_switch_init); +DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1432, idtg2_switch_init); diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h index 7410d3365e2a..0cee0152aca9 100644 --- a/include/linux/rio_ids.h +++ b/include/linux/rio_ids.h @@ -35,6 +35,7 @@ #define RIO_DID_IDTCPS6Q 0x035f #define RIO_DID_IDTCPS10Q 0x035e #define RIO_DID_IDTCPS1848 0x0374 +#define RIO_DID_IDTCPS1432 0x0375 #define RIO_DID_IDTCPS1616 0x0379 #define RIO_DID_IDTVPS1616 0x0377 #define RIO_DID_IDTSPS1616 0x0378 From 59f9996555542f901f2d01ccab5c0612c8c5c480 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Thu, 14 Apr 2011 15:22:14 -0700 Subject: [PATCH 146/200] RapidIO/mpc85xx: fix possible mport registration problems Fix a possible problem with mport registration left non-cleared after fsl_rio_setup() exits on link error. Abort mport initialization if registration failed. This patch is applicable to 2.6.39-rc1 only. The problem does not exist for earlier versions. Signed-off-by: Alexandre Bounine Cc: Kumar Gala Cc: Matt Porter Cc: Li Yang Cc: Thomas Moll Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/sysdev/fsl_rio.c | 4 +++- drivers/rapidio/rio.c | 5 +++-- include/linux/rio.h | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 14232d57369c..49798532b477 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -1457,7 +1457,6 @@ int fsl_rio_setup(struct platform_device *dev) port->ops = ops; port->priv = priv; port->phys_efptr = 0x100; - rio_register_mport(port); priv->regs_win = ioremap(regs.start, regs.end - regs.start + 1); rio_regs_win = priv->regs_win; @@ -1504,6 +1503,9 @@ int fsl_rio_setup(struct platform_device *dev) dev_info(&dev->dev, "RapidIO Common Transport System size: %d\n", port->sys_size ? 65536 : 256); + if (rio_register_mport(port)) + goto err; + if (port->host_deviceid >= 0) out_be32(priv->regs_win + RIO_GCCSR, RIO_PORT_GEN_HOST | RIO_PORT_GEN_MASTER | RIO_PORT_GEN_DISCOVERED); diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index c29719cacbca..86c9a091a2ff 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -1171,16 +1171,17 @@ static int rio_hdid_setup(char *str) __setup("riohdid=", rio_hdid_setup); -void rio_register_mport(struct rio_mport *port) +int rio_register_mport(struct rio_mport *port) { if (next_portid >= RIO_MAX_MPORTS) { pr_err("RIO: reached specified max number of mports\n"); - return; + return 1; } port->id = next_portid++; port->host_deviceid = rio_get_hdid(port->id); list_add_tail(&port->node, &rio_mports); + return 0; } EXPORT_SYMBOL_GPL(rio_local_get_device_id); diff --git a/include/linux/rio.h b/include/linux/rio.h index 4e37a7cfa726..4d50611112ba 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -396,7 +396,7 @@ union rio_pw_msg { }; /* Architecture and hardware-specific functions */ -extern void rio_register_mport(struct rio_mport *); +extern int rio_register_mport(struct rio_mport *); extern int rio_open_inb_mbox(struct rio_mport *, void *, int, int); extern void rio_close_inb_mbox(struct rio_mport *, int); extern int rio_open_outb_mbox(struct rio_mport *, void *, int, int); From 6a534c9d265ebabca4c3ae6f8712fc5a27cd3999 Mon Sep 17 00:00:00 2001 From: "Hans J. Koch" Date: Thu, 14 Apr 2011 15:22:16 -0700 Subject: [PATCH 147/200] MAINTAINERS: change mail adress of Hans J. Koch My old mail address doesn't exist anymore. This patch changes all occurences in MAINTAINERS to my new address. Signed-off-by: Hans J. Koch Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 17c22c50c691..ec3600306289 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1069,7 +1069,7 @@ F: arch/arm/mach-shmobile/ F: drivers/sh/ ARM/TELECHIPS ARM ARCHITECTURE -M: "Hans J. Koch" +M: "Hans J. Koch" L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/plat-tcc/ @@ -4082,7 +4082,7 @@ F: drivers/video/matrox/matroxfb_* F: include/linux/matroxfb.h MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER -M: "Hans J. Koch" +M: "Hans J. Koch" L: lm-sensors@lm-sensors.org S: Maintained F: Documentation/hwmon/max6650 @@ -6622,7 +6622,7 @@ F: fs/hostfs/ F: fs/hppfs/ USERSPACE I/O (UIO) -M: "Hans J. Koch" +M: "Hans J. Koch" M: Greg Kroah-Hartman S: Maintained F: Documentation/DocBook/uio-howto.tmpl From ed5afeaf422202485bbebc7e911f13b2a6be2666 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 14 Apr 2011 15:22:16 -0700 Subject: [PATCH 148/200] fs/fhandle.c: add for ia64 force_o_largefile() on ia64 is defined in and requires . Signed-off-by: Jeff Mahoney Cc: Aneesh Kumar K.V Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fhandle.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fhandle.c b/fs/fhandle.c index bf93ad2bee07..6b088641f5bf 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "internal.h" From 6d56dad3ae070511e59792a62f27b2394cc936bc Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Thu, 14 Apr 2011 15:22:17 -0700 Subject: [PATCH 149/200] um: fix call tracer and bug handler Commit 1de1502c ("x86, um: now we can get rid of trivial uml headers") removed accidentally bug.h which broke UML's call tracer and bug handler. Without asm-generic/bug.h UML uses BUG() from arch/x86/ which makes use of ud2. UML cannot use ud2, it raises SIGILL in user mode. As UML has a different stack for handling signals the call trace will be cut off. Signed-off-by: Richard Weinberger Reported-by: Sergei Trofimovich Tested-by: Sergei Trofimovich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/include/asm/bug.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 arch/um/include/asm/bug.h diff --git a/arch/um/include/asm/bug.h b/arch/um/include/asm/bug.h new file mode 100644 index 000000000000..9e33b864c359 --- /dev/null +++ b/arch/um/include/asm/bug.h @@ -0,0 +1,6 @@ +#ifndef __UM_BUG_H +#define __UM_BUG_H + +#include + +#endif From 084189a88754d40e1bb9bfbc278e70c33e571685 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Thu, 14 Apr 2011 15:22:18 -0700 Subject: [PATCH 150/200] um: disable CONFIG_CMPXCHG_LOCAL Commit 8a5ec0ba "Lockless (and preemptless) fastpaths for slub" makes use of this_cpu_cmpxchg_double() which needs this_cpu_cmpxchg16b_emu() on x86_64. Implementing cmpxchg16b emulation for UML would introduce too much complexity. So just disable it. Signed-off-by: Richard Weinberger Reported-by: Sergei Trofimovich Acked-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/Kconfig.x86 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/um/Kconfig.x86 b/arch/um/Kconfig.x86 index 02fb017fed47..a9da516a5274 100644 --- a/arch/um/Kconfig.x86 +++ b/arch/um/Kconfig.x86 @@ -4,6 +4,10 @@ menu "UML-specific options" menu "Host processor type and features" +config CMPXCHG_LOCAL + bool + default n + source "arch/x86/Kconfig.cpu" endmenu From b836aec53e2bce71de1d5415313380688c851477 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Thu, 14 Apr 2011 15:22:20 -0700 Subject: [PATCH 151/200] ramfs: fix memleak on no-mmu arch On no-mmu arch, there is a memleak during shmem test. The cause of this memleak is ramfs_nommu_expand_for_mapping() added page refcount to 2 which makes iput() can't free that pages. The simple test file is like this: int main(void) { int i; key_t k = ftok("/etc", 42); for ( i=0; i<100; ++i) { int id = shmget(k, 10000, 0644|IPC_CREAT); if (id == -1) { printf("shmget error\n"); } if(shmctl(id, IPC_RMID, NULL ) == -1) { printf("shm rm error\n"); return -1; } } printf("run ok...\n"); return 0; } And the result: root:/> free total used free shared buffers Mem: 60320 17912 42408 0 0 -/+ buffers: 17912 42408 root:/> shmem run ok... root:/> free total used free shared buffers Mem: 60320 19096 41224 0 0 -/+ buffers: 19096 41224 root:/> shmem run ok... root:/> free total used free shared buffers Mem: 60320 20296 40024 0 0 -/+ buffers: 20296 40024 ... After this patch the test result is:(no memleak anymore) root:/> free total used free shared buffers Mem: 60320 16668 43652 0 0 -/+ buffers: 16668 43652 root:/> shmem run ok... root:/> free total used free shared buffers Mem: 60320 16668 43652 0 0 -/+ buffers: 16668 43652 Signed-off-by: Bob Liu Acked-by: Hugh Dickins Signed-off-by: David Howells Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ramfs/file-nommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 9eead2c796b7..fbb0b478a346 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -112,6 +112,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) SetPageDirty(page); unlock_page(page); + put_page(page); } return 0; From e27e6151b154ff6e5e8162efa291bc60196d29ea Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 14 Apr 2011 15:22:21 -0700 Subject: [PATCH 152/200] mm/thp: use conventional format for boolean attributes The conventional format for boolean attributes in sysfs is numeric ("0" or "1" followed by new-line). Any boolean attribute can then be read and written using a generic function. Using the strings "yes [no]", "[yes] no" (read), "yes" and "no" (write) will frustrate this. [akpm@linux-foundation.org: use kstrtoul()] [akpm@linux-foundation.org: test_bit() doesn't return 1/0, per Neil] Signed-off-by: Ben Hutchings Cc: Andrea Arcangeli Cc: Mel Gorman Cc: Johannes Weiner Cc: Rik van Riel Cc: Hugh Dickins Tested-by: David Rientjes Cc: NeilBrown Cc: [2.6.38.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1722683bde23..470dcda10add 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -244,25 +244,29 @@ static ssize_t single_flag_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf, enum transparent_hugepage_flag flag) { - if (test_bit(flag, &transparent_hugepage_flags)) - return sprintf(buf, "[yes] no\n"); - else - return sprintf(buf, "yes [no]\n"); + return sprintf(buf, "%d\n", + !!test_bit(flag, &transparent_hugepage_flags)); } + static ssize_t single_flag_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count, enum transparent_hugepage_flag flag) { - if (!memcmp("yes", buf, - min(sizeof("yes")-1, count))) { - set_bit(flag, &transparent_hugepage_flags); - } else if (!memcmp("no", buf, - min(sizeof("no")-1, count))) { - clear_bit(flag, &transparent_hugepage_flags); - } else + unsigned long value; + int ret; + + ret = kstrtoul(buf, 10, &value); + if (ret < 0) + return ret; + if (value > 1) return -EINVAL; + if (value) + set_bit(flag, &transparent_hugepage_flags); + else + clear_bit(flag, &transparent_hugepage_flags); + return count; } From 88b996cd0652280cc9b9fc70008fda15f14175e1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 15 Apr 2011 15:20:10 +0200 Subject: [PATCH 153/200] block: cleanup the block plug helper functions It's a bit of a mess currently. task->plug is being cleared and reset in __blk_finish_plug(), and blk_finish_plug() is testing for a NULL plug which cannot happen even from schedule() anymore since it uses blk_needs_flush_plug() to determine whether to call into this function at all. So get rid of some of the cruft. Signed-off-by: Jens Axboe --- block/blk-core.c | 24 ++++++------------------ include/linux/blkdev.h | 6 +++--- 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 36b1a7559f94..b598fa7720d4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2671,7 +2671,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth) q->unplugged_fn(q); } -static void flush_plug_list(struct blk_plug *plug) +void blk_flush_plug_list(struct blk_plug *plug) { struct request_queue *q; unsigned long flags; @@ -2733,29 +2733,17 @@ static void flush_plug_list(struct blk_plug *plug) local_irq_restore(flags); } - -static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug) -{ - flush_plug_list(plug); - - if (plug == tsk->plug) - tsk->plug = NULL; -} +EXPORT_SYMBOL(blk_flush_plug_list); void blk_finish_plug(struct blk_plug *plug) { - if (plug) - __blk_finish_plug(current, plug); + blk_flush_plug_list(plug); + + if (plug == current->plug) + current->plug = NULL; } EXPORT_SYMBOL(blk_finish_plug); -void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug) -{ - __blk_finish_plug(tsk, plug); - tsk->plug = plug; -} -EXPORT_SYMBOL(__blk_flush_plug); - int __init blk_dev_init(void) { BUILD_BUG_ON(__REQ_NR_BITS > 8 * diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c07ffafac5d4..ffe48ff318f9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -865,14 +865,14 @@ struct blk_plug { extern void blk_start_plug(struct blk_plug *); extern void blk_finish_plug(struct blk_plug *); -extern void __blk_flush_plug(struct task_struct *, struct blk_plug *); +extern void blk_flush_plug_list(struct blk_plug *); static inline void blk_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; - if (unlikely(plug)) - __blk_flush_plug(tsk, plug); + if (plug) + blk_flush_plug_list(plug); } static inline bool blk_needs_flush_plug(struct task_struct *tsk) From f6603783f9f099bf7a83b3f6c689bbbf74f0e96e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Apr 2011 15:49:07 +0200 Subject: [PATCH 154/200] block: only force kblockd unplugging from the schedule() path For the explicit unplugging, we'd prefer to kick things off immediately and not pay the penalty of the latency to switch to kblockd. So let blk_finish_plug() do the run inline, while the implicit-on-schedule-out unplug will punt to kblockd. Signed-off-by: Jens Axboe --- block/blk-core.c | 13 +++++++------ include/linux/blkdev.h | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index b598fa7720d4..3c8121072507 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2662,16 +2662,17 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) return !(rqa->q <= rqb->q); } -static void queue_unplugged(struct request_queue *q, unsigned int depth) +static void queue_unplugged(struct request_queue *q, unsigned int depth, + bool force_kblockd) { trace_block_unplug_io(q, depth); - __blk_run_queue(q, true); + __blk_run_queue(q, force_kblockd); if (q->unplugged_fn) q->unplugged_fn(q); } -void blk_flush_plug_list(struct blk_plug *plug) +void blk_flush_plug_list(struct blk_plug *plug, bool force_kblockd) { struct request_queue *q; unsigned long flags; @@ -2706,7 +2707,7 @@ void blk_flush_plug_list(struct blk_plug *plug) BUG_ON(!rq->q); if (rq->q != q) { if (q) { - queue_unplugged(q, depth); + queue_unplugged(q, depth, force_kblockd); spin_unlock(q->queue_lock); } q = rq->q; @@ -2727,7 +2728,7 @@ void blk_flush_plug_list(struct blk_plug *plug) } if (q) { - queue_unplugged(q, depth); + queue_unplugged(q, depth, force_kblockd); spin_unlock(q->queue_lock); } @@ -2737,7 +2738,7 @@ EXPORT_SYMBOL(blk_flush_plug_list); void blk_finish_plug(struct blk_plug *plug) { - blk_flush_plug_list(plug); + blk_flush_plug_list(plug, false); if (plug == current->plug) current->plug = NULL; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ffe48ff318f9..1c76506fcf11 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -865,14 +865,14 @@ struct blk_plug { extern void blk_start_plug(struct blk_plug *); extern void blk_finish_plug(struct blk_plug *); -extern void blk_flush_plug_list(struct blk_plug *); +extern void blk_flush_plug_list(struct blk_plug *, bool); static inline void blk_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; if (plug) - blk_flush_plug_list(plug); + blk_flush_plug_list(plug, true); } static inline bool blk_needs_flush_plug(struct task_struct *tsk) From 7ebfa57f6d307b66bb88600145afccde31016ab5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 15 Apr 2011 07:34:26 -0700 Subject: [PATCH 155/200] vfs: fix incorrect dentry_update_name_case() BUG_ON() test The case we should be verifying when updating the dentry name is that the _parent_ inode (the directory) semaphore is held, not the semaphore for the dentry itself. It's the directory locking that rename and readdir() etc all care about. The comment just above even says so - but then the BUG_ON() still checked the dentry inode itself. Very few people noticed, because this helper function really isn't used for very much, so you had to be using ncpfs to ever hit it. I think I should just remove the BUG_ON (the function really has just one user), but let's run with it fixed for a while before getting rid of it entirely. Reported-and-tested-by: Bongani Hlope Reported-and-tested-by: Bernd Feige Cc: Petr Vandrovec , Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: Nick Piggin Signed-off-by: Linus Torvalds --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dcache.c b/fs/dcache.c index ad25c4cec7d5..129a35730994 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2131,7 +2131,7 @@ EXPORT_SYMBOL(d_rehash); */ void dentry_update_name_case(struct dentry *dentry, struct qstr *name) { - BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); + BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex)); BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ spin_lock(&dentry->d_lock); From 0cd9c6494ee5c19aef085152bc37f3a4e774a9e1 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Thu, 14 Apr 2011 15:41:57 -0700 Subject: [PATCH 156/200] futex: Set FLAGS_HAS_TIMEOUT during futex_wait restart setup The FLAGS_HAS_TIMEOUT flag was not getting set, causing the restart_block to restart futex_wait() without a timeout after a signal. Commit b41277dc7a18ee332d in 2.6.38 introduced the regression by accidentally removing the the FLAGS_HAS_TIMEOUT assignment from futex_wait() during the setup of the restart block. Restore the originaly behavior. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=32922 Reported-by: Tim Smith Reported-by: Torsten Hilbrich Signed-off-by: Darren Hart Signed-off-by: Eric Dumazet Cc: Peter Zijlstra Cc: John Kacur Cc: stable@kernel.org Link: http://lkml.kernel.org/r/%3Cdaac0eb3af607f72b9a4d3126b2ba8fb5ed3b883.1302820917.git.dvhart%40linux.intel.com%3E Signed-off-by: Thomas Gleixner --- kernel/futex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/futex.c b/kernel/futex.c index dfb924ffe65b..fe28dc282eae 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1886,7 +1886,7 @@ retry: restart->futex.val = val; restart->futex.time = abs_time->tv64; restart->futex.bitset = bitset; - restart->futex.flags = flags; + restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; ret = -ERESTART_RESTARTBLOCK; From 7d6b46707f2491a94f4bd3b4329d2d7f809e9368 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Fri, 15 Apr 2011 20:39:01 +0900 Subject: [PATCH 157/200] x86, NUMA: Fix fakenuma boot failure Currently, numa=fake boot parameter is broken. If it's used, kernel may panic due to devide by zero error depending on CPU configuration Call Trace: [] find_busiest_group+0x38c/0xd30 [] ? local_clock+0x6f/0x80 [] load_balance+0xa3/0x600 [] idle_balance+0xf3/0x180 [] schedule+0x722/0x7d0 [] ? wait_for_common+0x128/0x190 [] schedule_timeout+0x265/0x320 [] ? lock_release_holdtime+0x35/0x1a0 [] ? wait_for_common+0x128/0x190 [] ? __lock_release+0x9c/0x1d0 [] ? _raw_spin_unlock_irq+0x30/0x40 [] ? _raw_spin_unlock_irq+0x30/0x40 [] wait_for_common+0x130/0x190 [] ? try_to_wake_up+0x510/0x510 [] wait_for_completion+0x1d/0x20 [] kthread_create_on_node+0xac/0x150 [] ? process_scheduled_works+0x40/0x40 [] ? wait_for_common+0x4f/0x190 [] __alloc_workqueue_key+0x1a3/0x590 [] cpuset_init_smp+0x6b/0x7b [] kernel_init+0xc3/0x182 [] kernel_thread_helper+0x4/0x10 [] ? retint_restore_args+0x13/0x13 [] ? start_kernel+0x400/0x400 [] ? gs_change+0x13/0x13 The divede by zero is caused by the following line, group->cpu_power==0: kernel/sched_fair.c::update_sg_lb_stats() /* Adjust by relative CPU power of the group */ sgs->avg_load = (sgs->group_load * SCHED_LOAD_SCALE) / group->cpu_power; This regression was caused by commit e23bba6044 ("x86-64, NUMA: Unify emulated distance mapping") because it changes cpu -> node mapping in the process of dropping fake_physnodes(). old) all cpus are assinged node 0 now) cpus are assigned round robin (the logic is implemented by numa_init_array()) Note: The change in behavior only happens if the system doesn't have neither ACPI SRAT table nor AMD northbridge NUMA information. Round robin assignment doesn't work because init_numa_sched_groups_power() assumes all logical cpus in the same physical cpu share the same node (then it only accounts for group_first_cpu()), and the simple round robin breaks the above assumption. Thus, this patch implements a reassignment of node-ids if buggy firmware or numa emulation makes wrong cpu node map. Tt enforce all logical cpus in the same physical cpu share the same node. Signed-off-by: KOSAKI Motohiro Acked-by: Tejun Heo Cc: Yinghai Lu Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Shaohui Zheng Cc: David Rientjes Cc: H. Peter Anvin Link: http://lkml.kernel.org/r/20110415203928.1303.A69D9226@jp.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c2871d3c71b6..8ed8908cc9f7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -312,6 +312,26 @@ void __cpuinit smp_store_cpu_info(int id) identify_secondary_cpu(c); } +static void __cpuinit check_cpu_siblings_on_same_node(int cpu1, int cpu2) +{ + int node1 = early_cpu_to_node(cpu1); + int node2 = early_cpu_to_node(cpu2); + + /* + * Our CPU scheduler assumes all logical cpus in the same physical cpu + * share the same node. But, buggy ACPI or NUMA emulation might assign + * them to different node. Fix it. + */ + if (node1 != node2) { + pr_warning("CPU %d in node %d and CPU %d in node %d are in the same physical CPU. forcing same node %d\n", + cpu1, node1, cpu2, node2, node2); + + numa_remove_cpu(cpu1); + numa_set_node(cpu1, node2); + numa_add_cpu(cpu1); + } +} + static void __cpuinit link_thread_siblings(int cpu1, int cpu2) { cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); @@ -320,6 +340,7 @@ static void __cpuinit link_thread_siblings(int cpu1, int cpu2) cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2)); cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1)); + check_cpu_siblings_on_same_node(cpu1, cpu2); } @@ -361,10 +382,12 @@ void __cpuinit set_cpu_sibling_map(int cpu) per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { cpumask_set_cpu(i, cpu_llc_shared_mask(cpu)); cpumask_set_cpu(cpu, cpu_llc_shared_mask(i)); + check_cpu_siblings_on_same_node(cpu, i); } if (c->phys_proc_id == cpu_data(i).phys_proc_id) { cpumask_set_cpu(i, cpu_core_mask(cpu)); cpumask_set_cpu(cpu, cpu_core_mask(i)); + check_cpu_siblings_on_same_node(cpu, i); /* * Does this new cpu bringup a new core? */ From 0e4f8f888845f9dca540ad175884244e5db5eea2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Apr 2011 16:05:44 -0400 Subject: [PATCH 158/200] Btrfs: don't force chunk allocation in find_free_extent find_free_extent likes to allocate in contiguous clusters, which makes writeback faster, especially on SSD storage. As the FS fragments, these clusters become harder to find and we have to decide between allocating a new chunk to make more clusters or giving up on the cluster to allocate from the free space we have. Right now it creates too many chunks, and you can end up with a whole FS that is mostly empty metadata chunks. This commit changes the allocation code to be more strict and only allocate new chunks when we've made good use of the chunks we already have. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 95 ++++++++++++++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f619c3cb13b7..26479484180d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -33,6 +33,25 @@ #include "locking.h" #include "free-space-cache.h" +/* control flags for do_chunk_alloc's force field + * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk + * if we really need one. + * + * CHUNK_ALLOC_FORCE means it must try to allocate one + * + * CHUNK_ALLOC_LIMITED means to only try and allocate one + * if we have very few chunks already allocated. This is + * used as part of the clustering code to help make sure + * we have a good pool of storage to cluster in, without + * filling the FS with empty chunks + * + */ +enum { + CHUNK_ALLOC_NO_FORCE = 0, + CHUNK_ALLOC_FORCE = 1, + CHUNK_ALLOC_LIMITED = 2, +}; + static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc); @@ -3019,7 +3038,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_readonly = 0; found->bytes_may_use = 0; found->full = 0; - found->force_alloc = 0; + found->force_alloc = CHUNK_ALLOC_NO_FORCE; *space_info = found; list_add_rcu(&found->list, &info->space_info); atomic_set(&found->caching_threads, 0); @@ -3150,7 +3169,7 @@ again: if (!data_sinfo->full && alloc_chunk) { u64 alloc_target; - data_sinfo->force_alloc = 1; + data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; spin_unlock(&data_sinfo->lock); alloc: alloc_target = btrfs_get_alloc_profile(root, 1); @@ -3160,7 +3179,8 @@ alloc: ret = do_chunk_alloc(trans, root->fs_info->extent_root, bytes + 2 * 1024 * 1024, - alloc_target, 0); + alloc_target, + CHUNK_ALLOC_NO_FORCE); btrfs_end_transaction(trans, root); if (ret < 0) { if (ret != -ENOSPC) @@ -3239,31 +3259,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) rcu_read_lock(); list_for_each_entry_rcu(found, head, list) { if (found->flags & BTRFS_BLOCK_GROUP_METADATA) - found->force_alloc = 1; + found->force_alloc = CHUNK_ALLOC_FORCE; } rcu_read_unlock(); } static int should_alloc_chunk(struct btrfs_root *root, - struct btrfs_space_info *sinfo, u64 alloc_bytes) + struct btrfs_space_info *sinfo, u64 alloc_bytes, + int force) { u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; + u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; u64 thresh; - if (sinfo->bytes_used + sinfo->bytes_reserved + - alloc_bytes + 256 * 1024 * 1024 < num_bytes) + if (force == CHUNK_ALLOC_FORCE) + return 1; + + /* + * in limited mode, we want to have some free space up to + * about 1% of the FS size. + */ + if (force == CHUNK_ALLOC_LIMITED) { + thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); + thresh = max_t(u64, 64 * 1024 * 1024, + div_factor_fine(thresh, 1)); + + if (num_bytes - num_allocated < thresh) + return 1; + } + + /* + * we have two similar checks here, one based on percentage + * and once based on a hard number of 256MB. The idea + * is that if we have a good amount of free + * room, don't allocate a chunk. A good mount is + * less than 80% utilized of the chunks we have allocated, + * or more than 256MB free + */ + if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes) return 0; - if (sinfo->bytes_used + sinfo->bytes_reserved + - alloc_bytes < div_factor(num_bytes, 8)) + if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) return 0; thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); + + /* 256MB or 5% of the FS */ thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) return 0; - return 1; } @@ -3289,17 +3334,17 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, spin_lock(&space_info->lock); if (space_info->force_alloc) - force = 1; + force = space_info->force_alloc; if (space_info->full) { spin_unlock(&space_info->lock); goto out; } - if (!force && !should_alloc_chunk(extent_root, space_info, - alloc_bytes)) { + if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) { spin_unlock(&space_info->lock); goto out; } + spin_unlock(&space_info->lock); /* @@ -3327,7 +3372,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, space_info->full = 1; else ret = 1; - space_info->force_alloc = 0; + space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; spin_unlock(&space_info->lock); out: mutex_unlock(&extent_root->fs_info->chunk_mutex); @@ -5303,11 +5348,13 @@ loop: if (allowed_chunk_alloc) { ret = do_chunk_alloc(trans, root, num_bytes + - 2 * 1024 * 1024, data, 1); + 2 * 1024 * 1024, data, + CHUNK_ALLOC_LIMITED); allowed_chunk_alloc = 0; done_chunk_alloc = 1; - } else if (!done_chunk_alloc) { - space_info->force_alloc = 1; + } else if (!done_chunk_alloc && + space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) { + space_info->force_alloc = CHUNK_ALLOC_LIMITED; } if (loop < LOOP_NO_EMPTY_SIZE) { @@ -5393,7 +5440,8 @@ again: */ if (empty_size || root->ref_cows) ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes + 2 * 1024 * 1024, data, 0); + num_bytes + 2 * 1024 * 1024, data, + CHUNK_ALLOC_NO_FORCE); WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, @@ -5405,7 +5453,7 @@ again: num_bytes = num_bytes & ~(root->sectorsize - 1); num_bytes = max(num_bytes, min_alloc_size); do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes, data, 1); + num_bytes, data, CHUNK_ALLOC_FORCE); goto again; } if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { @@ -8109,13 +8157,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, alloc_flags = update_block_group_flags(root, cache->flags); if (alloc_flags != cache->flags) - do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); + do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, + CHUNK_ALLOC_FORCE); ret = set_block_group_ro(cache); if (!ret) goto out; alloc_flags = get_alloc_profile(root, cache->space_info->flags); - ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); + ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, + CHUNK_ALLOC_FORCE); if (ret < 0) goto out; ret = set_block_group_ro(cache); @@ -8128,7 +8178,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 type) { u64 alloc_flags = get_alloc_profile(root, type); - return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); + return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, + CHUNK_ALLOC_FORCE); } /* From f2eda2c6cc138710ae775490397657e8877774b6 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 23 Mar 2011 19:48:16 +0530 Subject: [PATCH 159/200] fs/9p: Fix revalidate to return correct value revalidate should return > 0 on success. Also return 0 on ENOENT to force do_revalidate to return NULL dentry; Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_dentry.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index b6a3b9f7fe4d..e022890c6f40 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -126,7 +126,9 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) retval = v9fs_refresh_inode_dotl(fid, inode); else retval = v9fs_refresh_inode(fid, inode); - if (retval <= 0) + if (retval == -ENOENT) + return 0; + if (retval < 0) return retval; } out_valid: From c2ed388021a60bb4a9449fddfef770c95875b052 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 23 Mar 2011 15:11:27 +0530 Subject: [PATCH 160/200] fs/9p: Use write_inode for data sync on server Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_super.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index f3eed3383e4f..acdc26593e64 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -307,6 +307,51 @@ static int v9fs_drop_inode(struct inode *inode) return 1; } +static int v9fs_write_inode(struct inode *inode, + struct writeback_control *wbc) +{ + int ret; + struct p9_wstat wstat; + struct v9fs_inode *v9inode; + /* + * send an fsync request to server irrespective of + * wbc->sync_mode. + */ + P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); + v9inode = V9FS_I(inode); + if (!v9inode->writeback_fid) + return 0; + v9fs_blank_wstat(&wstat); + + ret = p9_client_wstat(v9inode->writeback_fid, &wstat); + if (ret < 0) { + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + return ret; + } + return 0; +} + +static int v9fs_write_inode_dotl(struct inode *inode, + struct writeback_control *wbc) +{ + int ret; + struct v9fs_inode *v9inode; + /* + * send an fsync request to server irrespective of + * wbc->sync_mode. + */ + P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); + v9inode = V9FS_I(inode); + if (!v9inode->writeback_fid) + return 0; + ret = p9_client_fsync(v9inode->writeback_fid, 0); + if (ret < 0) { + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + return ret; + } + return 0; +} + static const struct super_operations v9fs_super_ops = { .alloc_inode = v9fs_alloc_inode, .destroy_inode = v9fs_destroy_inode, @@ -314,6 +359,7 @@ static const struct super_operations v9fs_super_ops = { .evict_inode = v9fs_evict_inode, .show_options = generic_show_options, .umount_begin = v9fs_umount_begin, + .write_inode = v9fs_write_inode, }; static const struct super_operations v9fs_super_ops_dotl = { @@ -325,6 +371,7 @@ static const struct super_operations v9fs_super_ops_dotl = { .evict_inode = v9fs_evict_inode, .show_options = generic_show_options, .umount_begin = v9fs_umount_begin, + .write_inode = v9fs_write_inode_dotl, }; struct file_system_type v9fs_fs_type = { From df5d8c80f1871d9e79af4b0f3656a9528a7d4bab Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 24 Mar 2011 20:38:35 +0530 Subject: [PATCH 161/200] 9p: revert tsyncfs related changes Now that we use write_inode to flush server cache related to fid, we don't need tsyncfs either fort dotl or dotu protocols. For dotu this helps to do a more efficient server flush. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- fs/9p/fid.c | 15 ++------------- fs/9p/v9fs.h | 1 - fs/9p/vfs_super.c | 33 +++++++++------------------------ include/net/9p/9p.h | 2 -- include/net/9p/client.h | 1 - net/9p/client.c | 21 --------------------- 6 files changed, 11 insertions(+), 62 deletions(-) diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 0ee594569dcc..85b67ffa2a43 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -286,11 +286,9 @@ static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid) struct p9_fid *v9fs_writeback_fid(struct dentry *dentry) { - int err, flags; + int err; struct p9_fid *fid; - struct v9fs_session_info *v9ses; - v9ses = v9fs_dentry2v9ses(dentry); fid = v9fs_fid_clone_with_uid(dentry, 0); if (IS_ERR(fid)) goto error_out; @@ -299,17 +297,8 @@ struct p9_fid *v9fs_writeback_fid(struct dentry *dentry) * dirty pages. We always request for the open fid in read-write * mode so that a partial page write which result in page * read can work. - * - * we don't have a tsyncfs operation for older version - * of protocol. So make sure the write back fid is - * opened in O_SYNC mode. */ - if (!v9fs_proto_dotl(v9ses)) - flags = O_RDWR | O_SYNC; - else - flags = O_RDWR; - - err = p9_client_open(fid, flags); + err = p9_client_open(fid, O_RDWR); if (err < 0) { p9_client_clunk(fid); fid = ERR_PTR(err); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 9665c2b840e6..e5ebedfc5ed8 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -116,7 +116,6 @@ struct v9fs_session_info { struct list_head slist; /* list of sessions registered with v9fs */ struct backing_dev_info bdi; struct rw_semaphore rename_sem; - struct p9_fid *root_fid; /* Used for file system sync */ }; /* cache_validity flags */ diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index acdc26593e64..feef6cdc1fd2 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -154,6 +154,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, retval = PTR_ERR(inode); goto release_sb; } + root = d_alloc_root(inode); if (!root) { iput(inode); @@ -185,21 +186,10 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, p9stat_free(st); kfree(st); } - v9fs_fid_add(root, fid); retval = v9fs_get_acl(inode, fid); if (retval) goto release_sb; - /* - * Add the root fid to session info. This is used - * for file system sync. We want a cloned fid here - * so that we can do a sync_filesystem after a - * shrink_dcache_for_umount - */ - v9ses->root_fid = v9fs_fid_clone(root); - if (IS_ERR(v9ses->root_fid)) { - retval = PTR_ERR(v9ses->root_fid); - goto release_sb; - } + v9fs_fid_add(root, fid); P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); return dget(sb->s_root); @@ -210,11 +200,15 @@ close_session: v9fs_session_close(v9ses); kfree(v9ses); return ERR_PTR(retval); + release_sb: /* - * we will do the session_close and root dentry - * release in the below call. + * we will do the session_close and root dentry release + * in the below call. But we need to clunk fid, because we haven't + * attached the fid to dentry so it won't get clunked + * automatically. */ + p9_client_clunk(fid); deactivate_locked_super(sb); return ERR_PTR(retval); } @@ -232,7 +226,7 @@ static void v9fs_kill_super(struct super_block *s) P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); kill_anon_super(s); - p9_client_clunk(v9ses->root_fid); + v9fs_session_cancel(v9ses); v9fs_session_close(v9ses); kfree(v9ses); @@ -285,14 +279,6 @@ done: return res; } -static int v9fs_sync_fs(struct super_block *sb, int wait) -{ - struct v9fs_session_info *v9ses = sb->s_fs_info; - - P9_DPRINTK(P9_DEBUG_VFS, "v9fs_sync_fs: super_block %p\n", sb); - return p9_client_sync_fs(v9ses->root_fid); -} - static int v9fs_drop_inode(struct inode *inode) { struct v9fs_session_info *v9ses; @@ -365,7 +351,6 @@ static const struct super_operations v9fs_super_ops = { static const struct super_operations v9fs_super_ops_dotl = { .alloc_inode = v9fs_alloc_inode, .destroy_inode = v9fs_destroy_inode, - .sync_fs = v9fs_sync_fs, .statfs = v9fs_statfs, .drop_inode = v9fs_drop_inode, .evict_inode = v9fs_evict_inode, diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index cdf2e8ac4309..d2df55b0c213 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -139,8 +139,6 @@ do { \ */ enum p9_msg_t { - P9_TSYNCFS = 0, - P9_RSYNCFS, P9_TLERROR = 6, P9_RLERROR, P9_TSTATFS = 8, diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 85c1413f054d..59b5df599210 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -230,7 +230,6 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, gid_t gid, struct p9_qid *qid); int p9_client_clunk(struct p9_fid *fid); int p9_client_fsync(struct p9_fid *fid, int datasync); -int p9_client_sync_fs(struct p9_fid *fid); int p9_client_remove(struct p9_fid *fid); int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, u32 count); diff --git a/net/9p/client.c b/net/9p/client.c index 48b8e084e710..d72aac7d25cc 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1220,27 +1220,6 @@ error: } EXPORT_SYMBOL(p9_client_fsync); -int p9_client_sync_fs(struct p9_fid *fid) -{ - int err = 0; - struct p9_req_t *req; - struct p9_client *clnt; - - P9_DPRINTK(P9_DEBUG_9P, ">>> TSYNC_FS fid %d\n", fid->fid); - - clnt = fid->clnt; - req = p9_client_rpc(clnt, P9_TSYNCFS, "d", fid->fid); - if (IS_ERR(req)) { - err = PTR_ERR(req); - goto error; - } - P9_DPRINTK(P9_DEBUG_9P, "<<< RSYNCFS fid %d\n", fid->fid); - p9_free_req(clnt, req); -error: - return err; -} -EXPORT_SYMBOL(p9_client_sync_fs); - int p9_client_clunk(struct p9_fid *fid) { int err; From 936bb2d7034165fd6ef7afea54057da65c329a27 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 24 Mar 2011 23:04:41 +0530 Subject: [PATCH 162/200] fs/9p: Fix error reported by coccicheck Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_inode_dotl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index ffbb113d5f33..82a7c38ddad0 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -811,7 +811,7 @@ v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd) fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) { __putname(link); - link = ERR_PTR(PTR_ERR(fid)); + link = ERR_CAST(fid); goto ndset; } retval = p9_client_readlink(fid, &target); From bd8c8ade6b6f109bc3dce14a8d12013f27f2a590 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 24 Mar 2011 23:14:46 +0530 Subject: [PATCH 163/200] 9p: Fix sparse error Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/protocol.c | 3 ++- net/9p/trans_common.c | 2 +- net/9p/trans_virtio.c | 15 +++++++++++---- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 8a4084fa8b5a..a7e899740041 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -468,7 +468,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, case 'E':{ int32_t cnt = va_arg(ap, int32_t); const char *k = va_arg(ap, const void *); - const char *u = va_arg(ap, const void *); + const char __user *u = va_arg(ap, + const void __user *); errcode = p9pdu_writef(pdu, proto_version, "d", cnt); if (!errcode && pdu_write_urw(pdu, k, u, cnt)) diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index d47880e971dd..e883172f9aa2 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -66,7 +66,7 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, uint32_t pdata_mapped_pages; struct trans_rpage_info *rpinfo; - *pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1); + *pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1); if (*pdata_off) first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off), diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index e8f046b07182..244e70742183 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -326,8 +326,11 @@ req_retry_pinned: outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, pdata_off, rpinfo->rp_data, pdata_len); } else { - char *pbuf = req->tc->pubuf ? req->tc->pubuf : - req->tc->pkbuf; + char *pbuf; + if (req->tc->pubuf) + pbuf = (__force char *) req->tc->pubuf; + else + pbuf = req->tc->pkbuf; outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, req->tc->pbuf_size); } @@ -352,8 +355,12 @@ req_retry_pinned: in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM, pdata_off, rpinfo->rp_data, pdata_len); } else { - char *pbuf = req->tc->pubuf ? req->tc->pubuf : - req->tc->pkbuf; + char *pbuf; + if (req->tc->pubuf) + pbuf = (__force char *) req->tc->pubuf; + else + pbuf = req->tc->pkbuf; + in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM, pbuf, req->tc->pbuf_size); } From b76225e22ac98070325ee2ba89473c1e1360c4cb Mon Sep 17 00:00:00 2001 From: Harsh Prateek Bora Date: Thu, 31 Mar 2011 15:49:39 +0530 Subject: [PATCH 164/200] net/9p: nwname should be an unsigned int Signed-off-by: Harsh Prateek Bora Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric VAn Hensbergen --- include/net/9p/client.h | 4 ++-- net/9p/client.c | 8 ++++---- net/9p/protocol.c | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 59b5df599210..051a99f79769 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -218,8 +218,8 @@ void p9_client_disconnect(struct p9_client *clnt); void p9_client_begin_disconnect(struct p9_client *clnt); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, char *uname, u32 n_uname, char *aname); -struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, - int clone); +struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, + char **wnames, int clone); int p9_client_open(struct p9_fid *fid, int mode); int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, char *extension); diff --git a/net/9p/client.c b/net/9p/client.c index d72aac7d25cc..77367745be9b 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -929,15 +929,15 @@ error: } EXPORT_SYMBOL(p9_client_attach); -struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, - int clone) +struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, + char **wnames, int clone) { int err; struct p9_client *clnt; struct p9_fid *fid; struct p9_qid *wqids; struct p9_req_t *req; - int16_t nwqids, count; + uint16_t nwqids, count; err = 0; wqids = NULL; @@ -955,7 +955,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, fid = oldfid; - P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %d wname[0] %s\n", + P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n", oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid, diff --git a/net/9p/protocol.c b/net/9p/protocol.c index a7e899740041..b58a501cf3d1 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -265,7 +265,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, } break; case 'T':{ - int16_t *nwname = va_arg(ap, int16_t *); + uint16_t *nwname = va_arg(ap, uint16_t *); char ***wnames = va_arg(ap, char ***); errcode = p9pdu_readf(pdu, proto_version, @@ -496,7 +496,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, } break; case 'T':{ - int16_t nwname = va_arg(ap, int); + uint16_t nwname = va_arg(ap, int); const char **wnames = va_arg(ap, const char **); errcode = p9pdu_writef(pdu, proto_version, "w", From c1530019e311c91d14b24d8e74d233152d806e45 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Fri, 15 Apr 2011 11:39:29 -0700 Subject: [PATCH 165/200] vfs: Fix absolute RCU path walk failures due to uninitialized seq number During RCU walk in path_lookupat and path_openat, the rcu lookup frequently failed if looking up an absolute path, because when root directory was looked up, seq number was not properly set in nameidata. We dropped out of RCU walk in nameidata_drop_rcu due to mismatch in directory entry's seq number. We reverted to slow path walk that need to take references. With the following patch, I saw a 50% increase in an exim mail server benchmark throughput on a 4-socket Nehalem-EX system. Signed-off-by: Tim Chen Reviewed-by: Andi Kleen Cc: stable@kernel.org (v2.6.38) Signed-off-by: Linus Torvalds --- fs/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/namei.c b/fs/namei.c index e6cd6113872c..54fc993e3027 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -697,6 +697,7 @@ static __always_inline void set_root_rcu(struct nameidata *nd) do { seq = read_seqcount_begin(&fs->seq); nd->root = fs->root; + nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq); } while (read_seqcount_retry(&fs->seq, seq)); } } From 5bbc097d890409d8eff4e3f1d26f11a9d6b7c07e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 15 Apr 2011 14:47:40 +0200 Subject: [PATCH 166/200] x86, amd: Disable GartTlbWlkErr when BIOS forgets it This patch disables GartTlbWlk errors on AMD Fam10h CPUs if the BIOS forgets to do is (or is just too old). Letting these errors enabled can cause a sync-flood on the CPU causing a reboot. The AMD BKDG recommends disabling GART TLB Wlk Error completely. This patch is the fix for https://bugzilla.kernel.org/show_bug.cgi?id=33012 on my machine. Signed-off-by: Joerg Roedel Link: http://lkml.kernel.org/r/20110415131152.GJ18463@8bytes.org Tested-by: Alexandre Demers Cc: Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msr-index.h | 4 ++++ arch/x86/kernel/cpu/amd.c | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index fd5a1f365c95..3cce71413d0b 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -96,11 +96,15 @@ #define MSR_IA32_MC0_ADDR 0x00000402 #define MSR_IA32_MC0_MISC 0x00000403 +#define MSR_AMD64_MC0_MASK 0xc0010044 + #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) #define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) #define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) #define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) +#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) + /* These are consecutive and not in the normal 4er MCE bank block */ #define MSR_IA32_MC0_CTL2 0x00000280 #define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 3ecece0217ef..3532d3bf8105 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -615,6 +615,25 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* As a rule processors have APIC timer running in deep C states */ if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) set_cpu_cap(c, X86_FEATURE_ARAT); + + /* + * Disable GART TLB Walk Errors on Fam10h. We do this here + * because this is always needed when GART is enabled, even in a + * kernel which has no MCE support built in. + */ + if (c->x86 == 0x10) { + /* + * BIOS should disable GartTlbWlk Errors themself. If + * it doesn't do it here as suggested by the BKDG. + * + * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 + */ + u64 mask; + + rdmsrl(MSR_AMD64_MCx_MASK(4), mask); + mask |= (1 << 10); + wrmsrl(MSR_AMD64_MCx_MASK(4), mask); + } } #ifdef CONFIG_X86_32 From 0d399205edf3a4c290e76ebb36e541593af4a1b4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 16 Apr 2011 06:55:39 -0400 Subject: [PATCH 167/200] Btrfs end_bio_extent_readpage should look for locked bits A recent commit caches the extent state in end_bio_extent_readpage, but the search it does should look for locked extents. This fixes things to make it more effective. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1c462f895c98..5ae0bffaa4d8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1761,7 +1761,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) prefetchw(&bvec->bv_page->flags); spin_lock(&tree->lock); - state = find_first_extent_bit_state(tree, start, 0); + state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED); if (state && state->start == start) { /* * take a reference on the state, unlock will drop From 6d74119f1a3efad9dc7f79a16c201242324b731f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Apr 2011 20:20:11 -0400 Subject: [PATCH 168/200] Btrfs: avoid taking the chunk_mutex in do_chunk_alloc Everytime we try to allocate disk space we try and see if we can pre-emptively allocate a chunk, but in the common case we don't allocate anything, so there is no sense in taking the chunk_mutex at all. So instead if we are allocating a chunk, mark it in the space_info so we don't get two people trying to allocate at the same time. Thanks, Signed-off-by: Josef Bacik Reviewed-by: Liu Bo --- fs/btrfs/ctree.h | 4 +++- fs/btrfs/extent-tree.c | 30 +++++++++++++++++++++++++----- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0d00a07b5b29..2e61fe1b6b8c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -740,8 +740,10 @@ struct btrfs_space_info { */ unsigned long reservation_progress; - int full; /* indicates that we cannot allocate any more + int full:1; /* indicates that we cannot allocate any more chunks for this space */ + int chunk_alloc:1; /* set if we are allocating a chunk */ + int force_alloc; /* set if we need to force a chunk alloc for this space */ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 26479484180d..31f33ba56fe8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3039,6 +3039,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_may_use = 0; found->full = 0; found->force_alloc = CHUNK_ALLOC_NO_FORCE; + found->chunk_alloc = 0; *space_info = found; list_add_rcu(&found->list, &info->space_info); atomic_set(&found->caching_threads, 0); @@ -3318,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, { struct btrfs_space_info *space_info; struct btrfs_fs_info *fs_info = extent_root->fs_info; + int wait_for_alloc = 0; int ret = 0; - mutex_lock(&fs_info->chunk_mutex); - flags = btrfs_reduce_alloc_profile(extent_root, flags); space_info = __find_space_info(extent_root->fs_info, flags); @@ -3332,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, } BUG_ON(!space_info); +again: spin_lock(&space_info->lock); if (space_info->force_alloc) force = space_info->force_alloc; if (space_info->full) { spin_unlock(&space_info->lock); - goto out; + return 0; } if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) { spin_unlock(&space_info->lock); - goto out; + return 0; + } else if (space_info->chunk_alloc) { + wait_for_alloc = 1; + } else { + space_info->chunk_alloc = 1; } spin_unlock(&space_info->lock); + mutex_lock(&fs_info->chunk_mutex); + + /* + * The chunk_mutex is held throughout the entirety of a chunk + * allocation, so once we've acquired the chunk_mutex we know that the + * other guy is done and we need to recheck and see if we should + * allocate. + */ + if (wait_for_alloc) { + mutex_unlock(&fs_info->chunk_mutex); + wait_for_alloc = 0; + goto again; + } + /* * If we have mixed data/metadata chunks we want to make sure we keep * allocating mixed chunks instead of individual chunks. @@ -3372,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, space_info->full = 1; else ret = 1; + space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; + space_info->chunk_alloc = 0; spin_unlock(&space_info->lock); -out: mutex_unlock(&extent_root->fs_info->chunk_mutex); return ret; } From a237c1c5bc5dc5c76a21be922dca4826f3eca8ca Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 16 Apr 2011 13:27:55 +0200 Subject: [PATCH 169/200] block: let io_schedule() flush the plug inline Linus correctly observes that the most important dispatch cases are now done from kblockd, this isn't ideal for latency reasons. The original reason for switching dispatches out-of-line was to avoid too deep a stack, so by _only_ letting the "accidental" flush directly in schedule() be guarded by offload to kblockd, we should be able to get the best of both worlds. So add a blk_schedule_flush_plug() that offloads to kblockd, and only use that from the schedule() path. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 13 +++++++++++++ kernel/sched.c | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1c76506fcf11..ec0357d8c4a5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -871,6 +871,14 @@ static inline void blk_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; + if (plug) + blk_flush_plug_list(plug, false); +} + +static inline void blk_schedule_flush_plug(struct task_struct *tsk) +{ + struct blk_plug *plug = tsk->plug; + if (plug) blk_flush_plug_list(plug, true); } @@ -1317,6 +1325,11 @@ static inline void blk_flush_plug(struct task_struct *task) { } +static inline void blk_schedule_flush_plug(struct task_struct *task) +{ +} + + static inline bool blk_needs_flush_plug(struct task_struct *tsk) { return false; diff --git a/kernel/sched.c b/kernel/sched.c index a187c3fe027b..312f8b95c2d4 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4118,7 +4118,7 @@ need_resched: */ if (blk_needs_flush_plug(prev)) { raw_spin_unlock(&rq->lock); - blk_flush_plug(prev); + blk_schedule_flush_plug(prev); raw_spin_lock(&rq->lock); } } From 49cac01e1fa74174d72adb0e872504a7fefd7c01 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 16 Apr 2011 13:51:05 +0200 Subject: [PATCH 170/200] block: make unplug timer trace event correspond to the schedule() unplug It's a pretty close match to what we had before - the timer triggering would mean that nobody unplugged the plug in due time, in the new scheme this matches very closely what the schedule() unplug now is. It's essentially the difference between an explicit unplug (IO unplug) or an implicit unplug (timer unplug, we scheduled with pending IO queued). Signed-off-by: Jens Axboe --- block/blk-core.c | 18 ++++++++++++------ include/trace/events/block.h | 13 +++++++------ kernel/trace/blktrace.c | 18 ++++++++++++------ 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 3c8121072507..78b7b0cb7216 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2662,17 +2662,23 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) return !(rqa->q <= rqb->q); } +/* + * If 'from_schedule' is true, then postpone the dispatch of requests + * until a safe kblockd context. We due this to avoid accidental big + * additional stack usage in driver dispatch, in places where the originally + * plugger did not intend it. + */ static void queue_unplugged(struct request_queue *q, unsigned int depth, - bool force_kblockd) + bool from_schedule) { - trace_block_unplug_io(q, depth); - __blk_run_queue(q, force_kblockd); + trace_block_unplug(q, depth, !from_schedule); + __blk_run_queue(q, from_schedule); if (q->unplugged_fn) q->unplugged_fn(q); } -void blk_flush_plug_list(struct blk_plug *plug, bool force_kblockd) +void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) { struct request_queue *q; unsigned long flags; @@ -2707,7 +2713,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool force_kblockd) BUG_ON(!rq->q); if (rq->q != q) { if (q) { - queue_unplugged(q, depth, force_kblockd); + queue_unplugged(q, depth, from_schedule); spin_unlock(q->queue_lock); } q = rq->q; @@ -2728,7 +2734,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool force_kblockd) } if (q) { - queue_unplugged(q, depth, force_kblockd); + queue_unplugged(q, depth, from_schedule); spin_unlock(q->queue_lock); } diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 006e60b58306..bf366547da25 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -401,9 +401,9 @@ TRACE_EVENT(block_plug, DECLARE_EVENT_CLASS(block_unplug, - TP_PROTO(struct request_queue *q, unsigned int depth), + TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit), - TP_ARGS(q, depth), + TP_ARGS(q, depth, explicit), TP_STRUCT__entry( __field( int, nr_rq ) @@ -419,18 +419,19 @@ DECLARE_EVENT_CLASS(block_unplug, ); /** - * block_unplug_io - release of operations requests in request queue + * block_unplug - release of operations requests in request queue * @q: request queue to unplug * @depth: number of requests just added to the queue + * @explicit: whether this was an explicit unplug, or one from schedule() * * Unplug request queue @q because device driver is scheduled to work * on elements in the request queue. */ -DEFINE_EVENT(block_unplug, block_unplug_io, +DEFINE_EVENT(block_unplug, block_unplug, - TP_PROTO(struct request_queue *q, unsigned int depth), + TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit), - TP_ARGS(q, depth) + TP_ARGS(q, depth, explicit) ); /** diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 3e3970d53d14..6957aa298dfa 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -850,16 +850,21 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q) __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); } -static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q, - unsigned int depth) +static void blk_add_trace_unplug(void *ignore, struct request_queue *q, + unsigned int depth, bool explicit) { struct blk_trace *bt = q->blk_trace; if (bt) { __be64 rpdu = cpu_to_be64(depth); + u32 what; - __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0, - sizeof(rpdu), &rpdu); + if (explicit) + what = BLK_TA_UNPLUG_IO; + else + what = BLK_TA_UNPLUG_TIMER; + + __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu); } } @@ -1002,7 +1007,7 @@ static void blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_plug(blk_add_trace_plug, NULL); WARN_ON(ret); - ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); + ret = register_trace_block_unplug(blk_add_trace_unplug, NULL); WARN_ON(ret); ret = register_trace_block_split(blk_add_trace_split, NULL); WARN_ON(ret); @@ -1017,7 +1022,7 @@ static void blk_unregister_tracepoints(void) unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); unregister_trace_block_split(blk_add_trace_split, NULL); - unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); + unregister_trace_block_unplug(blk_add_trace_unplug, NULL); unregister_trace_block_plug(blk_add_trace_plug, NULL); unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); unregister_trace_block_getrq(blk_add_trace_getrq, NULL); @@ -1332,6 +1337,7 @@ static const struct { [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error }, [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug }, [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug }, + [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug }, [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic }, [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split }, [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic }, From a920ff41cb3d2b03da095c4fa1a11b71417ae2a4 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 17 Apr 2011 10:20:19 +0200 Subject: [PATCH 171/200] i2c: Improve deprecation warnings When warning on the use of deprecated i2c_driver methods attach_adapter and detach_adapter, mention the name of the driver which needs to be updated. Signed-off-by: Jean Delvare Cc: Benjamin Herrenschmidt --- drivers/i2c/i2c-core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 70c30e6bce0b..9a58994ff7ea 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -797,7 +797,8 @@ static int i2c_do_add_adapter(struct i2c_driver *driver, /* Let legacy drivers scan this bus for matching devices */ if (driver->attach_adapter) { - dev_warn(&adap->dev, "attach_adapter method is deprecated\n"); + dev_warn(&adap->dev, "%s: attach_adapter method is deprecated\n", + driver->driver.name); dev_warn(&adap->dev, "Please use another way to instantiate " "your i2c_client\n"); /* We ignore the return code; if it fails, too bad */ @@ -984,7 +985,8 @@ static int i2c_do_del_adapter(struct i2c_driver *driver, if (!driver->detach_adapter) return 0; - dev_warn(&adapter->dev, "detach_adapter method is deprecated\n"); + dev_warn(&adapter->dev, "%s: detach_adapter method is deprecated\n", + driver->driver.name); res = driver->detach_adapter(adapter); if (res) dev_err(&adapter->dev, "detach_adapter failed (%d) " From d3b3e15da14ded61c9654db05863b04a2435f4cc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 17 Apr 2011 10:20:19 +0200 Subject: [PATCH 172/200] i2c-algo-bit: Call pre/post_xfer for bit_test Apparently some distros set i2c-algo-bit.bit_test to 1 by default. In some cases this causes i2c_bit_add_bus to fail and prevents the i2c bus from being added. In the radeon case, we fail to add the ddc i2c buses which prevents the driver from being able to detect attached monitors. The i2c bus works fine even if bit_test fails. This is likely due to gpio switching that is required and handled in the pre/post_xfer hooks, so call the pre/post_xfer hooks in the bit test as well. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=36221 Signed-off-by: Alex Deucher Signed-off-by: Jean Delvare Cc: stable@kernel.org [.38 down to .34] --- drivers/i2c/algos/i2c-algo-bit.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c index 38319a69bd0a..d6d58684712b 100644 --- a/drivers/i2c/algos/i2c-algo-bit.c +++ b/drivers/i2c/algos/i2c-algo-bit.c @@ -232,9 +232,17 @@ static int i2c_inb(struct i2c_adapter *i2c_adap) * Sanity check for the adapter hardware - check the reaction of * the bus lines only if it seems to be idle. */ -static int test_bus(struct i2c_algo_bit_data *adap, char *name) +static int test_bus(struct i2c_adapter *i2c_adap) { - int scl, sda; + struct i2c_algo_bit_data *adap = i2c_adap->algo_data; + const char *name = i2c_adap->name; + int scl, sda, ret; + + if (adap->pre_xfer) { + ret = adap->pre_xfer(i2c_adap); + if (ret < 0) + return -ENODEV; + } if (adap->getscl == NULL) pr_info("%s: Testing SDA only, SCL is not readable\n", name); @@ -297,11 +305,19 @@ static int test_bus(struct i2c_algo_bit_data *adap, char *name) "while pulling SCL high!\n", name); goto bailout; } + + if (adap->post_xfer) + adap->post_xfer(i2c_adap); + pr_info("%s: Test OK\n", name); return 0; bailout: sdahi(adap); sclhi(adap); + + if (adap->post_xfer) + adap->post_xfer(i2c_adap); + return -ENODEV; } @@ -607,7 +623,7 @@ static int __i2c_bit_add_bus(struct i2c_adapter *adap, int ret; if (bit_test) { - ret = test_bus(bit_adap, adap->name); + ret = test_bus(adap); if (ret < 0) return -ENODEV; } From fff3e5ade4455a4b42a19c95dd7a167a3cb7956a Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Thu, 14 Apr 2011 10:30:08 -0500 Subject: [PATCH 173/200] fs: synchronize_rcu when unregister_filesystem success not failure While checking unregister_filesystem for saftey vs extra calls for "ext4: register ext2 and ext3 alias after ext4" I realized that the synchronize_rcu() was called on the error path but not on the success path. Cc: stable (2.6.38) Signed-off-by: Milton Miller [ This probably won't really make a difference since commit d863b50ab013 ("vfs: call rcu_barrier after ->kill_sb()"), but it's the right thing to do. - Linus ] Signed-off-by: Linus Torvalds --- fs/filesystems.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/filesystems.c b/fs/filesystems.c index 751d6b255a12..0845f84f2a5f 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -110,14 +110,13 @@ int unregister_filesystem(struct file_system_type * fs) *tmp = fs->next; fs->next = NULL; write_unlock(&file_systems_lock); + synchronize_rcu(); return 0; } tmp = &(*tmp)->next; } write_unlock(&file_systems_lock); - synchronize_rcu(); - return -EINVAL; } From 90fd30c914ec71eb8dc91259bf720b3641672696 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 17 Apr 2011 13:05:23 -0700 Subject: [PATCH 174/200] alpha: Don't force -Werror. There are outstanding gcc 4.6 warnings that need to be cleaned up in the subdirectory. No sense forcing the issue immediately. Signed-off-by: Richard Henderson Signed-off-by: Linus Torvalds --- arch/alpha/kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index 9bb7b858ed23..7a6d908bb865 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -4,7 +4,7 @@ extra-y := head.o vmlinux.lds asflags-y := $(KBUILD_CFLAGS) -ccflags-y := -Werror -Wno-sign-compare +ccflags-y := -Wno-sign-compare obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \ irq_alpha.o signal.o setup.o ptrace.o time.o \ From 280da4e4d306667b7faa95152b54f7ca4266ff1e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 17 Apr 2011 13:05:24 -0700 Subject: [PATCH 175/200] alpha: Remove set but unused variables. This is a new warning in gcc 4.6. Several of these variables are used within #if 0 code, which probably ought to be removed. Most of the changes are legitimate cleanups. Signed-off-by: Richard Henderson Signed-off-by: Linus Torvalds --- arch/alpha/kernel/core_mcpcia.c | 12 +++++------- arch/alpha/kernel/err_titan.c | 4 +--- arch/alpha/kernel/setup.c | 6 +++--- arch/alpha/kernel/smc37c93x.c | 3 +-- arch/alpha/kernel/sys_wildfire.c | 5 +++-- 5 files changed, 13 insertions(+), 17 deletions(-) diff --git a/arch/alpha/kernel/core_mcpcia.c b/arch/alpha/kernel/core_mcpcia.c index 381fec0af52e..da7bcc372f16 100644 --- a/arch/alpha/kernel/core_mcpcia.c +++ b/arch/alpha/kernel/core_mcpcia.c @@ -88,7 +88,7 @@ conf_read(unsigned long addr, unsigned char type1, { unsigned long flags; unsigned long mid = MCPCIA_HOSE2MID(hose->index); - unsigned int stat0, value, temp, cpu; + unsigned int stat0, value, cpu; cpu = smp_processor_id(); @@ -101,7 +101,7 @@ conf_read(unsigned long addr, unsigned char type1, stat0 = *(vuip)MCPCIA_CAP_ERR(mid); *(vuip)MCPCIA_CAP_ERR(mid) = stat0; mb(); - temp = *(vuip)MCPCIA_CAP_ERR(mid); + *(vuip)MCPCIA_CAP_ERR(mid); DBG_CFG(("conf_read: MCPCIA_CAP_ERR(%d) was 0x%x\n", mid, stat0)); mb(); @@ -136,7 +136,7 @@ conf_write(unsigned long addr, unsigned int value, unsigned char type1, { unsigned long flags; unsigned long mid = MCPCIA_HOSE2MID(hose->index); - unsigned int stat0, temp, cpu; + unsigned int stat0, cpu; cpu = smp_processor_id(); @@ -145,7 +145,7 @@ conf_write(unsigned long addr, unsigned int value, unsigned char type1, /* Reset status register to avoid losing errors. */ stat0 = *(vuip)MCPCIA_CAP_ERR(mid); *(vuip)MCPCIA_CAP_ERR(mid) = stat0; mb(); - temp = *(vuip)MCPCIA_CAP_ERR(mid); + *(vuip)MCPCIA_CAP_ERR(mid); DBG_CFG(("conf_write: MCPCIA CAP_ERR(%d) was 0x%x\n", mid, stat0)); draina(); @@ -157,7 +157,7 @@ conf_write(unsigned long addr, unsigned int value, unsigned char type1, *((vuip)addr) = value; mb(); mb(); /* magic */ - temp = *(vuip)MCPCIA_CAP_ERR(mid); /* read to force the write */ + *(vuip)MCPCIA_CAP_ERR(mid); /* read to force the write */ mcheck_expected(cpu) = 0; mb(); @@ -572,12 +572,10 @@ mcpcia_print_system_area(unsigned long la_ptr) void mcpcia_machine_check(unsigned long vector, unsigned long la_ptr) { - struct el_common *mchk_header; struct el_MCPCIA_uncorrected_frame_mcheck *mchk_logout; unsigned int cpu = smp_processor_id(); int expected; - mchk_header = (struct el_common *)la_ptr; mchk_logout = (struct el_MCPCIA_uncorrected_frame_mcheck *)la_ptr; expected = mcheck_expected(cpu); diff --git a/arch/alpha/kernel/err_titan.c b/arch/alpha/kernel/err_titan.c index c3b3781a03de..14b26c466c89 100644 --- a/arch/alpha/kernel/err_titan.c +++ b/arch/alpha/kernel/err_titan.c @@ -533,8 +533,6 @@ static struct el_subpacket_annotation el_titan_annotations[] = { static struct el_subpacket * el_process_regatta_subpacket(struct el_subpacket *header) { - int status; - if (header->class != EL_CLASS__REGATTA_FAMILY) { printk("%s ** Unexpected header CLASS %d TYPE %d, aborting\n", err_print_prefix, @@ -551,7 +549,7 @@ el_process_regatta_subpacket(struct el_subpacket *header) printk("%s ** Occurred on CPU %d:\n", err_print_prefix, (int)header->by_type.regatta_frame.cpuid); - status = privateer_process_logout_frame((struct el_common *) + privateer_process_logout_frame((struct el_common *) header->by_type.regatta_frame.data_start, 1); break; default: diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index d2634e4476b4..edbddcbd5bc6 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -1404,8 +1404,6 @@ determine_cpu_caches (unsigned int cpu_type) case PCA56_CPU: case PCA57_CPU: { - unsigned long cbox_config, size; - if (cpu_type == PCA56_CPU) { L1I = CSHAPE(16*1024, 6, 1); L1D = CSHAPE(8*1024, 5, 1); @@ -1415,10 +1413,12 @@ determine_cpu_caches (unsigned int cpu_type) } L3 = -1; +#if 0 + unsigned long cbox_config, size; + cbox_config = *(vulp) phys_to_virt (0xfffff00008UL); size = 512*1024 * (1 << ((cbox_config >> 12) & 3)); -#if 0 L2 = ((cbox_config >> 31) & 1 ? CSHAPE (size, 6, 1) : -1); #else L2 = external_cache_probe(512*1024, 6); diff --git a/arch/alpha/kernel/smc37c93x.c b/arch/alpha/kernel/smc37c93x.c index 3e6a2893af9f..6886b834f487 100644 --- a/arch/alpha/kernel/smc37c93x.c +++ b/arch/alpha/kernel/smc37c93x.c @@ -79,7 +79,6 @@ static unsigned long __init SMCConfigState(unsigned long baseAddr) { unsigned char devId; - unsigned char devRev; unsigned long configPort; unsigned long indexPort; @@ -100,7 +99,7 @@ static unsigned long __init SMCConfigState(unsigned long baseAddr) devId = inb(dataPort); if (devId == VALID_DEVICE_ID) { outb(DEVICE_REV, indexPort); - devRev = inb(dataPort); + /* unsigned char devRev = */ inb(dataPort); break; } else diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c index d3cb28bb8eb0..d92cdc715c65 100644 --- a/arch/alpha/kernel/sys_wildfire.c +++ b/arch/alpha/kernel/sys_wildfire.c @@ -156,7 +156,6 @@ static void __init wildfire_init_irq_per_pca(int qbbno, int pcano) { int i, irq_bias; - unsigned long io_bias; static struct irqaction isa_enable = { .handler = no_action, .name = "isa_enable", @@ -165,10 +164,12 @@ wildfire_init_irq_per_pca(int qbbno, int pcano) irq_bias = qbbno * (WILDFIRE_PCA_PER_QBB * WILDFIRE_IRQ_PER_PCA) + pcano * WILDFIRE_IRQ_PER_PCA; +#if 0 + unsigned long io_bias; + /* Only need the following for first PCI bus per PCA. */ io_bias = WILDFIRE_IO(qbbno, pcano<<1) - WILDFIRE_IO_BIAS; -#if 0 outb(0, DMA1_RESET_REG + io_bias); outb(0, DMA2_RESET_REG + io_bias); outb(DMA_MODE_CASCADE, DMA2_MODE_REG + io_bias); From 6181318897258749a637829c542b8448fdce346d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 17 Apr 2011 13:05:25 -0700 Subject: [PATCH 176/200] alpha: Fix RTC interrupt setup. Following commit 091738a266fc ("genirq: Remove real old transition functions") we removed an automatic conversion of no_irq_chip to dummy_irq_chip. This change needs to be propagated back into the alpha backend. Signed-off-by: Richard Henderson Signed-off-by: Linus Torvalds --- arch/alpha/kernel/irq_alpha.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 1479dc6ebd97..51b7fbd9e4c1 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -228,7 +228,7 @@ struct irqaction timer_irqaction = { void __init init_rtc_irq(void) { - irq_set_chip_and_handler_name(RTC_IRQ, &no_irq_chip, + irq_set_chip_and_handler_name(RTC_IRQ, &dummy_irq_chip, handle_simple_irq, "RTC"); setup_irq(RTC_IRQ, &timer_irqaction); } From a78eda5cd3055852231ad10cfd047e65cb44dfde Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 17 Apr 2011 13:05:26 -0700 Subject: [PATCH 177/200] alpha: Fix uninitialized value in read_persistent_clock. Signed-off-by: Richard Henderson Signed-off-by: Linus Torvalds --- arch/alpha/kernel/time.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index a58e84f1a63b..918e8e0b72ff 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -153,6 +153,7 @@ void read_persistent_clock(struct timespec *ts) year += 100; ts->tv_sec = mktime(year, mon, day, hour, min, sec); + ts->tv_nsec = 0; } From 7c7a81b53e581d727d069cc45df5510516faac31 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 13 Apr 2011 06:30:08 +0000 Subject: [PATCH 178/200] powerpc/kexec: Fix regression causing compile failure on UP Recent commit b987812b3fcaf70fdf0037589e5d2f5f2453e6ce caused a compile failure on UP because a considerably large block of the file was included within CONFIG_SMP, hence making a stub function not exposed on UP builds when it needed to be. Relocate the stub to the #else /* ! CONFIG_SMP */ section and also annotate the relevant else/endif so that nobody else falls into the same trap I did. Reported-by: Michael Guntsche Signed-off-by: Paul Gortmaker Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 3d3d416339dd..5b5e1f002a8e 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -163,7 +163,7 @@ static void crash_kexec_prepare_cpus(int cpu) } /* wait for all the CPUs to hit real mode but timeout if they don't come in */ -#if defined(CONFIG_PPC_STD_MMU_64) && defined(CONFIG_SMP) +#ifdef CONFIG_PPC_STD_MMU_64 static void crash_kexec_wait_realmode(int cpu) { unsigned int msecs; @@ -188,9 +188,7 @@ static void crash_kexec_wait_realmode(int cpu) } mb(); } -#else -static inline void crash_kexec_wait_realmode(int cpu) {} -#endif +#endif /* CONFIG_PPC_STD_MMU_64 */ /* * This function will be called by secondary cpus or by kexec cpu @@ -235,7 +233,9 @@ void crash_kexec_secondary(struct pt_regs *regs) crash_ipi_callback(regs); } -#else +#else /* ! CONFIG_SMP */ +static inline void crash_kexec_wait_realmode(int cpu) {} + static void crash_kexec_prepare_cpus(int cpu) { /* @@ -255,7 +255,7 @@ void crash_kexec_secondary(struct pt_regs *regs) { cpus_in_sr = CPU_MASK_NONE; } -#endif +#endif /* CONFIG_SMP */ /* * Register a function to be called on shutdown. Only use this if you From 127493d5dc73589cbe00ea5ec8357cc2a4c0d82a Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Wed, 13 Apr 2011 19:45:59 +0000 Subject: [PATCH 179/200] powerpc/pseries: Use a kmem cache for DTL buffers PAPR specifies that DTL buffers can not cross AMS environments (aka CMO in the PAPR) and can not cross a memory entitlement granule boundary (4k). This is found in section 14.11.3.2 H_REGISTER_VPA of the PAPR. kmalloc does not guarantee an alignment of the allocation, though, beyond 8 bytes (at least in my understanding). Create a special kmem cache for DTL buffers with the alignment requirement. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/setup.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 000724149089..6c42cfde8415 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -287,14 +287,22 @@ static int alloc_dispatch_logs(void) int cpu, ret; struct paca_struct *pp; struct dtl_entry *dtl; + struct kmem_cache *dtl_cache; if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return 0; + dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES, + DISPATCH_LOG_BYTES, 0, NULL); + if (!dtl_cache) { + pr_warn("Failed to create dispatch trace log buffer cache\n"); + pr_warn("Stolen time statistics will be unreliable\n"); + return 0; + } + for_each_possible_cpu(cpu) { pp = &paca[cpu]; - dtl = kmalloc_node(DISPATCH_LOG_BYTES, GFP_KERNEL, - cpu_to_node(cpu)); + dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL); if (!dtl) { pr_warn("Failed to allocate dispatch trace log for cpu %d\n", cpu); From 84ffae55af79d7b8834fd0c08d0d1ebf2c77f91e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Apr 2011 21:44:21 +0000 Subject: [PATCH 180/200] powerpc: Fix oops if scan_dispatch_log is called too early We currently enable interrupts before the dispatch log for the boot cpu is setup. If a timer interrupt comes in early enough we oops in scan_dispatch_log: Unable to handle kernel paging request for data at address 0x00000010 ... .scan_dispatch_log+0xb0/0x170 .account_system_vtime+0xa0/0x220 .irq_enter+0x88/0xc0 .do_IRQ+0x48/0x230 The patch below adds a check to scan_dispatch_log to ensure the dispatch log has been allocated. Signed-off-by: Anton Blanchard Cc: Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 375480c56eb9..f33acfd872ad 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -229,6 +229,9 @@ static u64 scan_dispatch_log(u64 stop_tb) u64 stolen = 0; u64 dtb; + if (!dtl) + return 0; + if (i == vpa->dtl_idx) return 0; while (i < vpa->dtl_idx) { From 09597cfe93d3cc2c6e064a3ead5956b882511560 Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Thu, 14 Apr 2011 23:49:53 +0000 Subject: [PATCH 181/200] powerpc: Don't write protect kernel text with CONFIG_DYNAMIC_FTRACE enabled This problem was noticed on an MPC855T platform. Ftrace did oops when trying to write to the kernel text segment. Many thanks to Joakim for finding the root cause of this problem. Signed-off-by: Stefan Roese Cc: Joakim Tjernlund Cc: Benjamin Herrenschmidt Cc: Steven Rostedt Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/pte-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index 811f04ac3660..8d1569c29042 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -162,7 +162,7 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void); * on platforms where such control is possible. */ #if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\ - defined(CONFIG_KPROBES) + defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE) #define PAGE_KERNEL_TEXT PAGE_KERNEL_X #else #define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX From 86c74ab317c1ef4d37325e0d7ca8a01a796b0bd7 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Fri, 15 Apr 2011 08:12:30 +0000 Subject: [PATCH 182/200] powerpc/perf_event: Skip updating kernel counters if register value shrinks Because of speculative event roll back, it is possible for some event coutners to decrease between reads on POWER7. This causes a problem with the way that counters are updated. Delta calues are calculated in a 64 bit value and the top 32 bits are masked. If the register value has decreased, this leaves us with a very large positive value added to the kernel counters. This patch protects against this by skipping the update if the delta would be negative. This can lead to a lack of precision in the coutner values, but from my testing the value is typcially fewer than 10 samples at a time. Signed-off-by: Eric B Munson Cc: stable@kernel.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/perf_event.c | 37 ++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index c4063b7f49a0..822f63008ae1 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -398,6 +398,25 @@ static int check_excludes(struct perf_event **ctrs, unsigned int cflags[], return 0; } +static u64 check_and_compute_delta(u64 prev, u64 val) +{ + u64 delta = (val - prev) & 0xfffffffful; + + /* + * POWER7 can roll back counter values, if the new value is smaller + * than the previous value it will cause the delta and the counter to + * have bogus values unless we rolled a counter over. If a coutner is + * rolled back, it will be smaller, but within 256, which is the maximum + * number of events to rollback at once. If we dectect a rollback + * return 0. This can lead to a small lack of precision in the + * counters. + */ + if (prev > val && (prev - val) < 256) + delta = 0; + + return delta; +} + static void power_pmu_read(struct perf_event *event) { s64 val, delta, prev; @@ -416,10 +435,11 @@ static void power_pmu_read(struct perf_event *event) prev = local64_read(&event->hw.prev_count); barrier(); val = read_pmc(event->hw.idx); + delta = check_and_compute_delta(prev, val); + if (!delta) + return; } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); - /* The counters are only 32 bits wide */ - delta = (val - prev) & 0xfffffffful; local64_add(delta, &event->count); local64_sub(delta, &event->hw.period_left); } @@ -449,8 +469,9 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw, val = (event->hw.idx == 5) ? pmc5 : pmc6; prev = local64_read(&event->hw.prev_count); event->hw.idx = 0; - delta = (val - prev) & 0xfffffffful; - local64_add(delta, &event->count); + delta = check_and_compute_delta(prev, val); + if (delta) + local64_add(delta, &event->count); } } @@ -458,14 +479,16 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw, unsigned long pmc5, unsigned long pmc6) { struct perf_event *event; - u64 val; + u64 val, prev; int i; for (i = 0; i < cpuhw->n_limited; ++i) { event = cpuhw->limited_counter[i]; event->hw.idx = cpuhw->limited_hwidx[i]; val = (event->hw.idx == 5) ? pmc5 : pmc6; - local64_set(&event->hw.prev_count, val); + prev = local64_read(&event->hw.prev_count); + if (check_and_compute_delta(prev, val)) + local64_set(&event->hw.prev_count, val); perf_event_update_userpage(event); } } @@ -1197,7 +1220,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); - delta = (val - prev) & 0xfffffffful; + delta = check_and_compute_delta(prev, val); local64_add(delta, &event->count); /* From 7b84b29b8c2711fe64e0dba4db22f02ce0f16015 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 18 Apr 2011 15:46:35 +1000 Subject: [PATCH 183/200] powerpc/powermac: Build fix with SMP and CPU hotplug Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/powermac/smp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index a830c5e80657..bc5f0dc6ae1e 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -842,6 +842,7 @@ static void __devinit smp_core99_setup_cpu(int cpu_nr) mpic_setup_this_cpu(); } +#ifdef CONFIG_PPC64 #ifdef CONFIG_HOTPLUG_CPU static int smp_core99_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) @@ -879,7 +880,6 @@ static struct notifier_block __cpuinitdata smp_core99_cpu_nb = { static void __init smp_core99_bringup_done(void) { -#ifdef CONFIG_PPC64 extern void g5_phy_disable_cpu1(void); /* Close i2c bus if it was used for tb sync */ @@ -894,14 +894,14 @@ static void __init smp_core99_bringup_done(void) set_cpu_present(1, false); g5_phy_disable_cpu1(); } -#endif /* CONFIG_PPC64 */ - #ifdef CONFIG_HOTPLUG_CPU register_cpu_notifier(&smp_core99_cpu_nb); #endif + if (ppc_md.progress) ppc_md.progress("smp_core99_bringup_done", 0x349); } +#endif /* CONFIG_PPC64 */ #ifdef CONFIG_HOTPLUG_CPU @@ -975,7 +975,9 @@ static void pmac_cpu_die(void) struct smp_ops_t core99_smp_ops = { .message_pass = smp_mpic_message_pass, .probe = smp_core99_probe, +#ifdef CONFIG_PPC64 .bringup_done = smp_core99_bringup_done, +#endif .kick_cpu = smp_core99_kick_cpu, .setup_cpu = smp_core99_setup_cpu, .give_timebase = smp_core99_give_timebase, From 048c9374a749a27f16493cea033fa4a8ff492356 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Apr 2011 09:52:22 +0200 Subject: [PATCH 184/200] block: Enhance new plugging support to support general callbacks md/raid requires an unplug callback, but as it does not uses requests the current code cannot provide one. So allow arbitrary callbacks to be attached to the blk_plug. Signed-off-by: NeilBrown Signed-off-by: Jens Axboe --- block/blk-core.c | 20 ++++++++++++++++++++ include/linux/blkdev.h | 7 ++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 78b7b0cb7216..77edf0512338 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2638,6 +2638,7 @@ void blk_start_plug(struct blk_plug *plug) plug->magic = PLUG_MAGIC; INIT_LIST_HEAD(&plug->list); + INIT_LIST_HEAD(&plug->cb_list); plug->should_sort = 0; /* @@ -2678,6 +2679,24 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, q->unplugged_fn(q); } +static void flush_plug_callbacks(struct blk_plug *plug) +{ + LIST_HEAD(callbacks); + + if (list_empty(&plug->cb_list)) + return; + + list_splice_init(&plug->cb_list, &callbacks); + + while (!list_empty(&callbacks)) { + struct blk_plug_cb *cb = list_first_entry(&callbacks, + struct blk_plug_cb, + list); + list_del(&cb->list); + cb->callback(cb); + } +} + void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) { struct request_queue *q; @@ -2688,6 +2707,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) BUG_ON(plug->magic != PLUG_MAGIC); + flush_plug_callbacks(plug); if (list_empty(&plug->list)) return; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ec0357d8c4a5..f3f7879391a7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -860,8 +860,13 @@ extern void blk_put_queue(struct request_queue *); struct blk_plug { unsigned long magic; struct list_head list; + struct list_head cb_list; unsigned int should_sort; }; +struct blk_plug_cb { + struct list_head list; + void (*callback)(struct blk_plug_cb *); +}; extern void blk_start_plug(struct blk_plug *); extern void blk_finish_plug(struct blk_plug *); @@ -887,7 +892,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; - return plug && !list_empty(&plug->list); + return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list)); } /* From b4cb290e0a7d19235bd075c2ad4d60dbab0bac15 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Apr 2011 09:54:05 +0200 Subject: [PATCH 185/200] Revert "block: add callback function for unplug notification" MD can't use this since it really requires us to be able to keep more than a single piece of state for the unplug. Commit 048c9374 added the required support for MD, so get rid of this now unused code. This reverts commit f75664570d8b75469cc468f23c2b27220984983b. Conflicts: block/blk-core.c Signed-off-by: Jens Axboe --- block/blk-core.c | 3 --- block/blk-settings.c | 16 ---------------- include/linux/blkdev.h | 3 --- 3 files changed, 22 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 77edf0512338..09b262811fff 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2674,9 +2674,6 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, { trace_block_unplug(q, depth, !from_schedule); __blk_run_queue(q, from_schedule); - - if (q->unplugged_fn) - q->unplugged_fn(q); } static void flush_plug_callbacks(struct blk_plug *plug) diff --git a/block/blk-settings.c b/block/blk-settings.c index eb949045bb12..1fa769293597 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -790,22 +790,6 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush) } EXPORT_SYMBOL_GPL(blk_queue_flush); -/** - * blk_queue_unplugged - register a callback for an unplug event - * @q: the request queue for the device - * @fn: the function to call - * - * Some stacked drivers may need to know when IO is dispatched on an - * unplug event. By registrering a callback here, they will be notified - * when someone flushes their on-stack queue plug. The function will be - * called with the queue lock held. - */ -void blk_queue_unplugged(struct request_queue *q, unplugged_fn *fn) -{ - q->unplugged_fn = fn; -} -EXPORT_SYMBOL(blk_queue_unplugged); - static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3f7879391a7..3448d89297e8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -196,7 +196,6 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); -typedef void (unplugged_fn) (struct request_queue *); struct bio_vec; struct bvec_merge_data { @@ -284,7 +283,6 @@ struct request_queue rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; lld_busy_fn *lld_busy_fn; - unplugged_fn *unplugged_fn; /* * Dispatch queue sorting @@ -843,7 +841,6 @@ extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); -extern void blk_queue_unplugged(struct request_queue *, unplugged_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); From 99e22598e9a8e0a996d69c8c0f6b7027cb57720a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Apr 2011 09:59:55 +0200 Subject: [PATCH 186/200] block: drop queue lock before calling __blk_run_queue() for kblockd punt If we know we are going to punt to kblockd, we can drop the queue lock before calling into __blk_run_queue() since it only does a safe bit test and a workqueue call. Since kblockd needs to grab this very lock as one of the first things it does, it's a good optimization to drop the lock before waking kblockd. Signed-off-by: Jens Axboe --- block/blk-core.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 09b262811fff..5e413933bc3a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -295,7 +295,8 @@ EXPORT_SYMBOL(blk_sync_queue); * * Description: * See @blk_run_queue. This variant must be called with the queue lock - * held and interrupts disabled. + * held and interrupts disabled. If force_kblockd is true, then it is + * safe to call this without holding the queue lock. * */ void __blk_run_queue(struct request_queue *q, bool force_kblockd) @@ -2671,9 +2672,23 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) */ static void queue_unplugged(struct request_queue *q, unsigned int depth, bool from_schedule) + __releases(q->queue_lock) { trace_block_unplug(q, depth, !from_schedule); - __blk_run_queue(q, from_schedule); + + /* + * If we are punting this to kblockd, then we can safely drop + * the queue_lock before waking kblockd (which needs to take + * this lock). + */ + if (from_schedule) { + spin_unlock(q->queue_lock); + __blk_run_queue(q, true); + } else { + __blk_run_queue(q, false); + spin_unlock(q->queue_lock); + } + } static void flush_plug_callbacks(struct blk_plug *plug) @@ -2729,10 +2744,11 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG)); BUG_ON(!rq->q); if (rq->q != q) { - if (q) { + /* + * This drops the queue lock + */ + if (q) queue_unplugged(q, depth, from_schedule); - spin_unlock(q->queue_lock); - } q = rq->q; depth = 0; spin_lock(q->queue_lock); @@ -2750,10 +2766,11 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) depth++; } - if (q) { + /* + * This drops the queue lock + */ + if (q) queue_unplugged(q, depth, from_schedule); - spin_unlock(q->queue_lock); - } local_irq_restore(flags); } From e1dfa0a29737142c32f00a3bac0f609dc85b4a82 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Apr 2011 18:25:41 +1000 Subject: [PATCH 187/200] md: use new plugging interface for RAID IO. md/raid submits a lot of IO from the various raid threads. So adding start/finish plug calls to those so that some plugging happens. Signed-off-by: NeilBrown --- drivers/md/raid1.c | 5 ++++- drivers/md/raid10.c | 3 +++ drivers/md/raid5.c | 3 +++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index c2a21ae56d97..5449cd5e3141 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1516,9 +1516,11 @@ static void raid1d(mddev_t *mddev) conf_t *conf = mddev->private; struct list_head *head = &conf->retry_list; mdk_rdev_t *rdev; + struct blk_plug plug; md_check_recovery(mddev); - + + blk_start_plug(&plug); for (;;) { char b[BDEVNAME_SIZE]; @@ -1593,6 +1595,7 @@ static void raid1d(mddev_t *mddev) } cond_resched(); } + blk_finish_plug(&plug); } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 2da83d566592..c8e5dac5d695 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1640,9 +1640,11 @@ static void raid10d(mddev_t *mddev) conf_t *conf = mddev->private; struct list_head *head = &conf->retry_list; mdk_rdev_t *rdev; + struct blk_plug plug; md_check_recovery(mddev); + blk_start_plug(&plug); for (;;) { char b[BDEVNAME_SIZE]; @@ -1716,6 +1718,7 @@ static void raid10d(mddev_t *mddev) } cond_resched(); } + blk_finish_plug(&plug); } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e867ee42b152..ce6960b1c682 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4478,11 +4478,13 @@ static void raid5d(mddev_t *mddev) struct stripe_head *sh; raid5_conf_t *conf = mddev->private; int handled; + struct blk_plug plug; pr_debug("+++ raid5d active\n"); md_check_recovery(mddev); + blk_start_plug(&plug); handled = 0; spin_lock_irq(&conf->device_lock); while (1) { @@ -4525,6 +4527,7 @@ static void raid5d(mddev_t *mddev) spin_unlock_irq(&conf->device_lock); async_tx_issue_pending_all(); + blk_finish_plug(&plug); pr_debug("--- raid5d inactive\n"); } From af1db72d8b340f97ad12b60175afdef43e6f0e60 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Apr 2011 18:25:41 +1000 Subject: [PATCH 188/200] md/dm - remove remains of plug_fn callback. Now that unplugging is done differently, the unplug_fn callback is never called, so it can be completely discarded. Signed-off-by: NeilBrown --- drivers/md/dm-raid.c | 8 -------- include/linux/device-mapper.h | 1 - 2 files changed, 9 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 5ef136cdba91..e5d8904fc8f6 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -390,13 +390,6 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits) return md_raid5_congested(&rs->md, bits); } -static void raid_unplug(struct dm_target_callbacks *cb) -{ - struct raid_set *rs = container_of(cb, struct raid_set, callbacks); - - md_raid5_kick_device(rs->md.private); -} - /* * Construct a RAID4/5/6 mapping: * Args: @@ -487,7 +480,6 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) } rs->callbacks.congested_fn = raid_is_congested; - rs->callbacks.unplug_fn = raid_unplug; dm_table_add_target_callbacks(ti->table, &rs->callbacks); return 0; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index e2768834f397..32a4423710f5 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -197,7 +197,6 @@ struct dm_target { struct dm_target_callbacks { struct list_head list; int (*congested_fn) (struct dm_target_callbacks *, int); - void (*unplug_fn)(struct dm_target_callbacks *); }; int dm_register_target(struct target_type *t); From 482c083492ddaa32ef5864bae3d143dc8bcdf7d1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Apr 2011 18:25:42 +1000 Subject: [PATCH 189/200] md - remove old plugging code. md has some plugging infrastructure for RAID5 to use because the normal plugging infrastructure required a 'request_queue', and when called from dm, RAID5 doesn't have one of those available. This relied on the ->unplug_fn callback which doesn't exist any more. So remove all of that code, both in md and raid5. Subsequent patches with restore the plugging functionality. Signed-off-by: NeilBrown --- drivers/md/md.c | 51 ---------------------------------------------- drivers/md/md.h | 22 -------------------- drivers/md/raid5.c | 37 ++++++++------------------------- drivers/md/raid5.h | 2 -- 4 files changed, 8 insertions(+), 104 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index b12b3776c0c0..fb11170c717e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -445,50 +445,6 @@ void md_flush_request(mddev_t *mddev, struct bio *bio) } EXPORT_SYMBOL(md_flush_request); -/* Support for plugging. - * This mirrors the plugging support in request_queue, but does not - * require having a whole queue - */ -static void plugger_work(struct work_struct *work) -{ - struct plug_handle *plug = - container_of(work, struct plug_handle, unplug_work); - plug->unplug_fn(plug); -} -static void plugger_timeout(unsigned long data) -{ - struct plug_handle *plug = (void *)data; - kblockd_schedule_work(NULL, &plug->unplug_work); -} -void plugger_init(struct plug_handle *plug, - void (*unplug_fn)(struct plug_handle *)) -{ - plug->unplug_flag = 0; - plug->unplug_fn = unplug_fn; - init_timer(&plug->unplug_timer); - plug->unplug_timer.function = plugger_timeout; - plug->unplug_timer.data = (unsigned long)plug; - INIT_WORK(&plug->unplug_work, plugger_work); -} -EXPORT_SYMBOL_GPL(plugger_init); - -void plugger_set_plug(struct plug_handle *plug) -{ - if (!test_and_set_bit(PLUGGED_FLAG, &plug->unplug_flag)) - mod_timer(&plug->unplug_timer, jiffies + msecs_to_jiffies(3)+1); -} -EXPORT_SYMBOL_GPL(plugger_set_plug); - -int plugger_remove_plug(struct plug_handle *plug) -{ - if (test_and_clear_bit(PLUGGED_FLAG, &plug->unplug_flag)) { - del_timer(&plug->unplug_timer); - return 1; - } else - return 0; -} -EXPORT_SYMBOL_GPL(plugger_remove_plug); - static inline mddev_t *mddev_get(mddev_t *mddev) { @@ -4723,7 +4679,6 @@ static void md_clean(mddev_t *mddev) mddev->bitmap_info.chunksize = 0; mddev->bitmap_info.daemon_sleep = 0; mddev->bitmap_info.max_write_behind = 0; - mddev->plug = NULL; } static void __md_stop_writes(mddev_t *mddev) @@ -6688,12 +6643,6 @@ int md_allow_write(mddev_t *mddev) } EXPORT_SYMBOL_GPL(md_allow_write); -void md_unplug(mddev_t *mddev) -{ - if (mddev->plug) - mddev->plug->unplug_fn(mddev->plug); -} - #define SYNC_MARKS 10 #define SYNC_MARK_STEP (3*HZ) void md_do_sync(mddev_t *mddev) diff --git a/drivers/md/md.h b/drivers/md/md.h index 52b407369e13..fad90228672f 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -29,26 +29,6 @@ typedef struct mddev_s mddev_t; typedef struct mdk_rdev_s mdk_rdev_t; -/* generic plugging support - like that provided with request_queue, - * but does not require a request_queue - */ -struct plug_handle { - void (*unplug_fn)(struct plug_handle *); - struct timer_list unplug_timer; - struct work_struct unplug_work; - unsigned long unplug_flag; -}; -#define PLUGGED_FLAG 1 -void plugger_init(struct plug_handle *plug, - void (*unplug_fn)(struct plug_handle *)); -void plugger_set_plug(struct plug_handle *plug); -int plugger_remove_plug(struct plug_handle *plug); -static inline void plugger_flush(struct plug_handle *plug) -{ - del_timer_sync(&plug->unplug_timer); - cancel_work_sync(&plug->unplug_work); -} - /* * MD's 'extended' device */ @@ -336,7 +316,6 @@ struct mddev_s struct list_head all_mddevs; struct attribute_group *to_remove; - struct plug_handle *plug; /* if used by personality */ struct bio_set *bio_set; @@ -516,7 +495,6 @@ extern int md_integrity_register(mddev_t *mddev); extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); extern void restore_bitmap_write_access(struct file *file); -extern void md_unplug(mddev_t *mddev); extern void mddev_init(mddev_t *mddev); extern int md_run(mddev_t *mddev); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ce6960b1c682..a1755a6a3e5f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -199,14 +199,12 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) BUG_ON(!list_empty(&sh->lru)); BUG_ON(atomic_read(&conf->active_stripes)==0); if (test_bit(STRIPE_HANDLE, &sh->state)) { - if (test_bit(STRIPE_DELAYED, &sh->state)) { + if (test_bit(STRIPE_DELAYED, &sh->state)) list_add_tail(&sh->lru, &conf->delayed_list); - plugger_set_plug(&conf->plug); - } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && - sh->bm_seq - conf->seq_write > 0) { + else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && + sh->bm_seq - conf->seq_write > 0) list_add_tail(&sh->lru, &conf->bitmap_list); - plugger_set_plug(&conf->plug); - } else { + else { clear_bit(STRIPE_BIT_DELAY, &sh->state); list_add_tail(&sh->lru, &conf->handle_list); } @@ -461,7 +459,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector, < (conf->max_nr_stripes *3/4) || !conf->inactive_blocked), conf->device_lock, - md_raid5_kick_device(conf)); + md_wakeup_thread(conf->mddev->thread)); conf->inactive_blocked = 0; } else init_stripe(sh, sector, previous); @@ -1470,7 +1468,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) wait_event_lock_irq(conf->wait_for_stripe, !list_empty(&conf->inactive_list), conf->device_lock, - blk_flush_plug(current)); + ); osh = get_free_stripe(conf); spin_unlock_irq(&conf->device_lock); atomic_set(&nsh->count, 1); @@ -3623,8 +3621,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf) atomic_inc(&conf->preread_active_stripes); list_add_tail(&sh->lru, &conf->hold_list); } - } else - plugger_set_plug(&conf->plug); + } } static void activate_bit_delay(raid5_conf_t *conf) @@ -3641,21 +3638,6 @@ static void activate_bit_delay(raid5_conf_t *conf) } } -void md_raid5_kick_device(raid5_conf_t *conf) -{ - blk_flush_plug(current); - raid5_activate_delayed(conf); - md_wakeup_thread(conf->mddev->thread); -} -EXPORT_SYMBOL_GPL(md_raid5_kick_device); - -static void raid5_unplug(struct plug_handle *plug) -{ - raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug); - - md_raid5_kick_device(conf); -} - int md_raid5_congested(mddev_t *mddev, int bits) { raid5_conf_t *conf = mddev->private; @@ -4057,7 +4039,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) * add failed due to overlap. Flush everything * and wait a while */ - md_raid5_kick_device(conf); + md_wakeup_thread(mddev->thread); release_stripe(sh); schedule(); goto retry; @@ -5144,8 +5126,6 @@ static int run(mddev_t *mddev) mdname(mddev)); md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); - plugger_init(&conf->plug, raid5_unplug); - mddev->plug = &conf->plug; if (mddev->queue) { int chunk_size; /* read-ahead size must cover two whole stripes, which @@ -5195,7 +5175,6 @@ static int stop(mddev_t *mddev) mddev->thread = NULL; if (mddev->queue) mddev->queue->backing_dev_info.congested_fn = NULL; - plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/ free_conf(conf); mddev->private = NULL; mddev->to_remove = &raid5_attrs_group; diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 8d563a4f022a..3ca77a2613ba 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -400,8 +400,6 @@ struct raid5_private_data { * Cleared when a sync completes. */ - struct plug_handle plug; - /* per cpu variables */ struct raid5_percpu { struct page *spare_page; /* Used when checking P/Q in raid6 */ From 97658cdd3af7d01461874c93b89afa4a2465e7c6 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Apr 2011 18:25:42 +1000 Subject: [PATCH 190/200] md: provide generic support for handling unplug callbacks. When an md device adds a request to a queue, it can call mddev_check_plugged. If this succeeds then we know that the md thread will be woken up shortly, and ->plug_cnt will be non-zero until then, so some processing can be delayed. If it fails, then no unplug callback is expected and the make_request function needs to do whatever is required to make the request happen. Signed-off-by: NeilBrown --- drivers/md/md.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/md/md.h | 4 ++++ 2 files changed, 60 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index fb11170c717e..6e853c61d87e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -445,6 +445,61 @@ void md_flush_request(mddev_t *mddev, struct bio *bio) } EXPORT_SYMBOL(md_flush_request); +/* Support for plugging. + * This mirrors the plugging support in request_queue, but does not + * require having a whole queue or request structures. + * We allocate an md_plug_cb for each md device and each thread it gets + * plugged on. This links tot the private plug_handle structure in the + * personality data where we keep a count of the number of outstanding + * plugs so other code can see if a plug is active. + */ +struct md_plug_cb { + struct blk_plug_cb cb; + mddev_t *mddev; +}; + +static void plugger_unplug(struct blk_plug_cb *cb) +{ + struct md_plug_cb *mdcb = container_of(cb, struct md_plug_cb, cb); + if (atomic_dec_and_test(&mdcb->mddev->plug_cnt)) + md_wakeup_thread(mdcb->mddev->thread); + kfree(mdcb); +} + +/* Check that an unplug wakeup will come shortly. + * If not, wakeup the md thread immediately + */ +int mddev_check_plugged(mddev_t *mddev) +{ + struct blk_plug *plug = current->plug; + struct md_plug_cb *mdcb; + + if (!plug) + return 0; + + list_for_each_entry(mdcb, &plug->cb_list, cb.list) { + if (mdcb->cb.callback == plugger_unplug && + mdcb->mddev == mddev) { + /* Already on the list, move to top */ + if (mdcb != list_first_entry(&plug->cb_list, + struct md_plug_cb, + cb.list)) + list_move(&mdcb->cb.list, &plug->cb_list); + return 1; + } + } + /* Not currently on the callback list */ + mdcb = kmalloc(sizeof(*mdcb), GFP_ATOMIC); + if (!mdcb) + return 0; + + mdcb->mddev = mddev; + mdcb->cb.callback = plugger_unplug; + atomic_inc(&mddev->plug_cnt); + list_add(&mdcb->cb.list, &plug->cb_list); + return 1; +} +EXPORT_SYMBOL_GPL(mddev_check_plugged); static inline mddev_t *mddev_get(mddev_t *mddev) { @@ -494,6 +549,7 @@ void mddev_init(mddev_t *mddev) atomic_set(&mddev->active, 1); atomic_set(&mddev->openers, 0); atomic_set(&mddev->active_io, 0); + atomic_set(&mddev->plug_cnt, 0); spin_lock_init(&mddev->write_lock); atomic_set(&mddev->flush_pending, 0); init_waitqueue_head(&mddev->sb_wait); diff --git a/drivers/md/md.h b/drivers/md/md.h index fad90228672f..0b1fd3f1d85b 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -179,6 +179,9 @@ struct mddev_s int delta_disks, new_level, new_layout; int new_chunk_sectors; + atomic_t plug_cnt; /* If device is expecting + * more bios soon. + */ struct mdk_thread_s *thread; /* management thread */ struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ sector_t curr_resync; /* last block scheduled */ @@ -508,4 +511,5 @@ extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, mddev_t *mddev); extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, mddev_t *mddev); +extern int mddev_check_plugged(mddev_t *mddev); #endif /* _MD_MD_H */ From 7c13edc87510f665da3094174e1fd633e06649f4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Apr 2011 18:25:43 +1000 Subject: [PATCH 191/200] md: incorporate new plugging into raid5. In raid5 plugging is used for 2 things: 1/ collecting writes that require a bitmap update 2/ collecting writes in the hope that we can create full stripes - or at least more-full. We now release these different sets of stripes when plug_cnt is zero. Also in make_request, we call mddev_check_plug to hopefully increase plug_cnt, and wake up the thread at the end if plugging wasn't achieved for some reason. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a1755a6a3e5f..f301e6ae220c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -27,12 +27,12 @@ * * We group bitmap updates into batches. Each batch has a number. * We may write out several batches at once, but that isn't very important. - * conf->bm_write is the number of the last batch successfully written. - * conf->bm_flush is the number of the last batch that was closed to + * conf->seq_write is the number of the last batch successfully written. + * conf->seq_flush is the number of the last batch that was closed to * new additions. * When we discover that we will need to write to any block in a stripe * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq - * the number of the batch it will be in. This is bm_flush+1. + * the number of the batch it will be in. This is seq_flush+1. * When we are ready to do a write, if that batch hasn't been written yet, * we plug the array and queue the stripe for later. * When an unplug happens, we increment bm_flush, thus closing the current @@ -459,7 +459,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector, < (conf->max_nr_stripes *3/4) || !conf->inactive_blocked), conf->device_lock, - md_wakeup_thread(conf->mddev->thread)); + ); conf->inactive_blocked = 0; } else init_stripe(sh, sector, previous); @@ -3927,6 +3927,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) struct stripe_head *sh; const int rw = bio_data_dir(bi); int remaining; + int plugged; if (unlikely(bi->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bi); @@ -3945,6 +3946,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) bi->bi_next = NULL; bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ + plugged = mddev_check_plugged(mddev); for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { DEFINE_WAIT(w); int disks, data_disks; @@ -4059,6 +4061,9 @@ static int make_request(mddev_t *mddev, struct bio * bi) } } + if (!plugged) + md_wakeup_thread(mddev->thread); + spin_lock_irq(&conf->device_lock); remaining = raid5_dec_bi_phys_segments(bi); spin_unlock_irq(&conf->device_lock); @@ -4472,14 +4477,18 @@ static void raid5d(mddev_t *mddev) while (1) { struct bio *bio; - if (conf->seq_flush != conf->seq_write) { - int seq = conf->seq_flush; + if (atomic_read(&mddev->plug_cnt) == 0 && + !list_empty(&conf->bitmap_list)) { + /* Now is a good time to flush some bitmap updates */ + conf->seq_flush++; spin_unlock_irq(&conf->device_lock); bitmap_unplug(mddev->bitmap); spin_lock_irq(&conf->device_lock); - conf->seq_write = seq; + conf->seq_write = conf->seq_flush; activate_bit_delay(conf); } + if (atomic_read(&mddev->plug_cnt) == 0) + raid5_activate_delayed(conf); while ((bio = remove_bio_from_retry(conf))) { int ok; From c3b328ac846bcf6b9a62c5563380a81ab723006d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Apr 2011 18:25:43 +1000 Subject: [PATCH 192/200] md: fix up raid1/raid10 unplugging. We just need to make sure that an unplug event wakes up the md thread, which is exactly what mddev_check_plugged does. Also remove some plug-related code that is no longer needed. Signed-off-by: NeilBrown --- drivers/md/raid1.c | 24 ++++++++++-------------- drivers/md/raid10.c | 24 ++++++++++-------------- 2 files changed, 20 insertions(+), 28 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 5449cd5e3141..2b7a7ff401dc 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -565,12 +565,6 @@ static void flush_pending_writes(conf_t *conf) spin_unlock_irq(&conf->device_lock); } -static void md_kick_device(mddev_t *mddev) -{ - blk_flush_plug(current); - md_wakeup_thread(mddev->thread); -} - /* Barriers.... * Sometimes we need to suspend IO while we do something else, * either some resync/recovery, or reconfigure the array. @@ -600,7 +594,7 @@ static void raise_barrier(conf_t *conf) /* Wait until no block IO is waiting */ wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, - conf->resync_lock, md_kick_device(conf->mddev)); + conf->resync_lock, ); /* block any new IO from starting */ conf->barrier++; @@ -608,7 +602,7 @@ static void raise_barrier(conf_t *conf) /* Now wait for all pending IO to complete */ wait_event_lock_irq(conf->wait_barrier, !conf->nr_pending && conf->barrier < RESYNC_DEPTH, - conf->resync_lock, md_kick_device(conf->mddev)); + conf->resync_lock, ); spin_unlock_irq(&conf->resync_lock); } @@ -630,7 +624,7 @@ static void wait_barrier(conf_t *conf) conf->nr_waiting++; wait_event_lock_irq(conf->wait_barrier, !conf->barrier, conf->resync_lock, - md_kick_device(conf->mddev)); + ); conf->nr_waiting--; } conf->nr_pending++; @@ -666,8 +660,7 @@ static void freeze_array(conf_t *conf) wait_event_lock_irq(conf->wait_barrier, conf->nr_pending == conf->nr_queued+1, conf->resync_lock, - ({ flush_pending_writes(conf); - md_kick_device(conf->mddev); })); + flush_pending_writes(conf)); spin_unlock_irq(&conf->resync_lock); } static void unfreeze_array(conf_t *conf) @@ -729,6 +722,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); mdk_rdev_t *blocked_rdev; + int plugged; /* * Register the new request and wait if the reconstruction @@ -820,6 +814,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) * inc refcount on their rdev. Record them by setting * bios[x] to bio */ + plugged = mddev_check_plugged(mddev); + disks = conf->raid_disks; retry_write: blocked_rdev = NULL; @@ -925,7 +921,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) /* In case raid1d snuck in to freeze_array */ wake_up(&conf->wait_barrier); - if (do_sync || !bitmap) + if (do_sync || !bitmap || !plugged) md_wakeup_thread(mddev->thread); return 0; @@ -1524,7 +1520,8 @@ static void raid1d(mddev_t *mddev) for (;;) { char b[BDEVNAME_SIZE]; - flush_pending_writes(conf); + if (atomic_read(&mddev->plug_cnt) == 0) + flush_pending_writes(conf); spin_lock_irqsave(&conf->device_lock, flags); if (list_empty(head)) { @@ -2042,7 +2039,6 @@ static int stop(mddev_t *mddev) md_unregister_thread(mddev->thread); mddev->thread = NULL; - blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ if (conf->r1bio_pool) mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index c8e5dac5d695..8e9462626ec5 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -634,12 +634,6 @@ static void flush_pending_writes(conf_t *conf) spin_unlock_irq(&conf->device_lock); } -static void md_kick_device(mddev_t *mddev) -{ - blk_flush_plug(current); - md_wakeup_thread(mddev->thread); -} - /* Barriers.... * Sometimes we need to suspend IO while we do something else, * either some resync/recovery, or reconfigure the array. @@ -669,15 +663,15 @@ static void raise_barrier(conf_t *conf, int force) /* Wait until no block IO is waiting (unless 'force') */ wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, - conf->resync_lock, md_kick_device(conf->mddev)); + conf->resync_lock, ); /* block any new IO from starting */ conf->barrier++; - /* No wait for all pending IO to complete */ + /* Now wait for all pending IO to complete */ wait_event_lock_irq(conf->wait_barrier, !conf->nr_pending && conf->barrier < RESYNC_DEPTH, - conf->resync_lock, md_kick_device(conf->mddev)); + conf->resync_lock, ); spin_unlock_irq(&conf->resync_lock); } @@ -698,7 +692,7 @@ static void wait_barrier(conf_t *conf) conf->nr_waiting++; wait_event_lock_irq(conf->wait_barrier, !conf->barrier, conf->resync_lock, - md_kick_device(conf->mddev)); + ); conf->nr_waiting--; } conf->nr_pending++; @@ -734,8 +728,8 @@ static void freeze_array(conf_t *conf) wait_event_lock_irq(conf->wait_barrier, conf->nr_pending == conf->nr_queued+1, conf->resync_lock, - ({ flush_pending_writes(conf); - md_kick_device(conf->mddev); })); + flush_pending_writes(conf)); + spin_unlock_irq(&conf->resync_lock); } @@ -762,6 +756,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) const unsigned long do_fua = (bio->bi_rw & REQ_FUA); unsigned long flags; mdk_rdev_t *blocked_rdev; + int plugged; if (unlikely(bio->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bio); @@ -870,6 +865,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) * inc refcount on their rdev. Record them by setting * bios[x] to bio */ + plugged = mddev_check_plugged(mddev); + raid10_find_phys(conf, r10_bio); retry_write: blocked_rdev = NULL; @@ -946,9 +943,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) /* In case raid10d snuck in to freeze_array */ wake_up(&conf->wait_barrier); - if (do_sync || !mddev->bitmap) + if (do_sync || !mddev->bitmap || !plugged) md_wakeup_thread(mddev->thread); - return 0; } From 4521cc4ed5173f92714f6999a69910c3385fed68 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Apr 2011 11:36:39 +0200 Subject: [PATCH 193/200] block: blk_delay_queue() should use kblockd workqueue Reported-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 5e413933bc3a..e2bacfa46cc3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -220,7 +220,8 @@ static void blk_delay_work(struct work_struct *work) */ void blk_delay_queue(struct request_queue *q, unsigned long msecs) { - schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs)); + queue_delayed_work(kblockd_workqueue, &q->delay_work, + msecs_to_jiffies(msecs)); } EXPORT_SYMBOL(blk_delay_queue); From 24ecfbe27f65563909b14492afda2f1c21f7c044 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Apr 2011 11:41:33 +0200 Subject: [PATCH 194/200] block: add blk_run_queue_async Instead of overloading __blk_run_queue to force an offload to kblockd add a new blk_run_queue_async helper to do it explicitly. I've kept the blk_queue_stopped check for now, but I suspect it's not needed as the check we do when the workqueue items runs should be enough. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 36 +++++++++++++++++++++----------- block/blk-exec.c | 2 +- block/blk-flush.c | 4 ++-- block/blk.h | 1 + block/cfq-iosched.c | 6 +++--- block/elevator.c | 4 ++-- drivers/scsi/scsi_lib.c | 2 +- drivers/scsi/scsi_transport_fc.c | 2 +- include/linux/blkdev.h | 2 +- 9 files changed, 36 insertions(+), 23 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index e2bacfa46cc3..5fa3dd2705c6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -204,7 +204,7 @@ static void blk_delay_work(struct work_struct *work) q = container_of(work, struct request_queue, delay_work.work); spin_lock_irq(q->queue_lock); - __blk_run_queue(q, false); + __blk_run_queue(q); spin_unlock_irq(q->queue_lock); } @@ -239,7 +239,7 @@ void blk_start_queue(struct request_queue *q) WARN_ON(!irqs_disabled()); queue_flag_clear(QUEUE_FLAG_STOPPED, q); - __blk_run_queue(q, false); + __blk_run_queue(q); } EXPORT_SYMBOL(blk_start_queue); @@ -296,11 +296,9 @@ EXPORT_SYMBOL(blk_sync_queue); * * Description: * See @blk_run_queue. This variant must be called with the queue lock - * held and interrupts disabled. If force_kblockd is true, then it is - * safe to call this without holding the queue lock. - * + * held and interrupts disabled. */ -void __blk_run_queue(struct request_queue *q, bool force_kblockd) +void __blk_run_queue(struct request_queue *q) { if (unlikely(blk_queue_stopped(q))) return; @@ -309,7 +307,7 @@ void __blk_run_queue(struct request_queue *q, bool force_kblockd) * Only recurse once to avoid overrunning the stack, let the unplug * handling reinvoke the handler shortly if we already got there. */ - if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { + if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { q->request_fn(q); queue_flag_clear(QUEUE_FLAG_REENTER, q); } else @@ -317,6 +315,20 @@ void __blk_run_queue(struct request_queue *q, bool force_kblockd) } EXPORT_SYMBOL(__blk_run_queue); +/** + * blk_run_queue_async - run a single device queue in workqueue context + * @q: The queue to run + * + * Description: + * Tells kblockd to perform the equivalent of @blk_run_queue on behalf + * of us. + */ +void blk_run_queue_async(struct request_queue *q) +{ + if (likely(!blk_queue_stopped(q))) + queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); +} + /** * blk_run_queue - run a single device queue * @q: The queue to run @@ -330,7 +342,7 @@ void blk_run_queue(struct request_queue *q) unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - __blk_run_queue(q, false); + __blk_run_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_run_queue); @@ -979,7 +991,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq, blk_queue_end_tag(q, rq); add_acct_request(q, rq, where); - __blk_run_queue(q, false); + __blk_run_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_insert_request); @@ -1323,7 +1335,7 @@ get_rq: } else { spin_lock_irq(q->queue_lock); add_acct_request(q, req, where); - __blk_run_queue(q, false); + __blk_run_queue(q); out_unlock: spin_unlock_irq(q->queue_lock); } @@ -2684,9 +2696,9 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, */ if (from_schedule) { spin_unlock(q->queue_lock); - __blk_run_queue(q, true); + blk_run_queue_async(q); } else { - __blk_run_queue(q, false); + __blk_run_queue(q); spin_unlock(q->queue_lock); } diff --git a/block/blk-exec.c b/block/blk-exec.c index 7482b7fa863b..81e31819a597 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -55,7 +55,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, WARN_ON(irqs_disabled()); spin_lock_irq(q->queue_lock); __elv_add_request(q, rq, where); - __blk_run_queue(q, false); + __blk_run_queue(q); /* the queue is stopped so it won't be plugged+unplugged */ if (rq->cmd_type == REQ_TYPE_PM_RESUME) q->request_fn(q); diff --git a/block/blk-flush.c b/block/blk-flush.c index eba4a2790c6c..6c9b5e189e62 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -218,7 +218,7 @@ static void flush_end_io(struct request *flush_rq, int error) * request_fn may confuse the driver. Always use kblockd. */ if (queued) - __blk_run_queue(q, true); + blk_run_queue_async(q); } /** @@ -274,7 +274,7 @@ static void flush_data_end_io(struct request *rq, int error) * the comment in flush_end_io(). */ if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error)) - __blk_run_queue(q, true); + blk_run_queue_async(q); } /** diff --git a/block/blk.h b/block/blk.h index 61263463e38e..c9df8fc3c999 100644 --- a/block/blk.h +++ b/block/blk.h @@ -22,6 +22,7 @@ void blk_rq_timed_out_timer(unsigned long data); void blk_delete_timer(struct request *); void blk_add_timer(struct request *); void __generic_unplug_device(struct request_queue *); +void blk_run_queue_async(struct request_queue *q); /* * Internal atomic flags for request handling diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 3be881ec95ad..46b0a1d1d925 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3368,7 +3368,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfqd->busy_queues > 1) { cfq_del_timer(cfqd, cfqq); cfq_clear_cfqq_wait_request(cfqq); - __blk_run_queue(cfqd->queue, false); + __blk_run_queue(cfqd->queue); } else { cfq_blkiocg_update_idle_time_stats( &cfqq->cfqg->blkg); @@ -3383,7 +3383,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, * this new queue is RT and the current one is BE */ cfq_preempt_queue(cfqd, cfqq); - __blk_run_queue(cfqd->queue, false); + __blk_run_queue(cfqd->queue); } } @@ -3743,7 +3743,7 @@ static void cfq_kick_queue(struct work_struct *work) struct request_queue *q = cfqd->queue; spin_lock_irq(q->queue_lock); - __blk_run_queue(cfqd->queue, false); + __blk_run_queue(cfqd->queue); spin_unlock_irq(q->queue_lock); } diff --git a/block/elevator.c b/block/elevator.c index 0cdb4e7ebab4..6f6abc08bb56 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -642,7 +642,7 @@ void elv_quiesce_start(struct request_queue *q) */ elv_drain_elevator(q); while (q->rq.elvpriv) { - __blk_run_queue(q, false); + __blk_run_queue(q); spin_unlock_irq(q->queue_lock); msleep(10); spin_lock_irq(q->queue_lock); @@ -695,7 +695,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) * with anything. There's no point in delaying queue * processing. */ - __blk_run_queue(q, false); + __blk_run_queue(q); break; case ELEVATOR_INSERT_SORT_MERGE: diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 6d5c7ff43f5b..ab55c2fa7ce2 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -443,7 +443,7 @@ static void scsi_run_queue(struct request_queue *q) &sdev->request_queue->queue_flags); if (flagset) queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue); - __blk_run_queue(sdev->request_queue, false); + __blk_run_queue(sdev->request_queue); if (flagset) queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue); spin_unlock(sdev->request_queue->queue_lock); diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index fdf3fa639056..28c33506e4ad 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3829,7 +3829,7 @@ fc_bsg_goose_queue(struct fc_rport *rport) !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags); if (flagset) queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q); - __blk_run_queue(rport->rqst_q, false); + __blk_run_queue(rport->rqst_q); if (flagset) queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q); spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3448d89297e8..cbbfd98ad4a3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -697,7 +697,7 @@ extern void blk_start_queue(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); -extern void __blk_run_queue(struct request_queue *q, bool force_kblockd); +extern void __blk_run_queue(struct request_queue *q); extern void blk_run_queue(struct request_queue *); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, From f65647c29b14f5a32ff6f3237b0ef3b375ed5a79 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 18 Apr 2011 08:55:34 -0400 Subject: [PATCH 195/200] Btrfs: fix free space cache leak The free space caching code was recently reworked to cache all the pages it needed instead of using find_get_page everywhere. One loop was missed though, so it ended up leaking pages. This fixes it to use our page array instead of find_get_page. Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index a3f420def0e9..11d2e9cea09e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -732,7 +732,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, out_of_space = true; break; } - page = find_get_page(inode->i_mapping, index); + page = pages[index]; addr = kmap(page); memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); From 80b4895aa4578e9372d76cd4063f82d0c3994d77 Mon Sep 17 00:00:00 2001 From: Jeff Brown Date: Mon, 18 Apr 2011 10:08:02 -0700 Subject: [PATCH 196/200] Input: estimate number of events per packet Calculate a default based on the number of ABS axes, REL axes, and MT slots for the device during input device registration. Signed-off-by: Jeff Brown Reviewed-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 40 ++++++++++++++++++++++++++++++++++++++++ include/linux/input/mt.h | 6 ++++++ 2 files changed, 46 insertions(+) diff --git a/drivers/input/input.c b/drivers/input/input.c index d6e8bd8a851c..ebbceedc92f4 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -1746,6 +1746,42 @@ void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int } EXPORT_SYMBOL(input_set_capability); +static unsigned int input_estimate_events_per_packet(struct input_dev *dev) +{ + int mt_slots; + int i; + unsigned int events; + + if (dev->mtsize) { + mt_slots = dev->mtsize; + } else if (test_bit(ABS_MT_TRACKING_ID, dev->absbit)) { + mt_slots = dev->absinfo[ABS_MT_TRACKING_ID].maximum - + dev->absinfo[ABS_MT_TRACKING_ID].minimum + 1, + clamp(mt_slots, 2, 32); + } else if (test_bit(ABS_MT_POSITION_X, dev->absbit)) { + mt_slots = 2; + } else { + mt_slots = 0; + } + + events = mt_slots + 1; /* count SYN_MT_REPORT and SYN_REPORT */ + + for (i = 0; i < ABS_CNT; i++) { + if (test_bit(i, dev->absbit)) { + if (input_is_mt_axis(i)) + events += mt_slots; + else + events++; + } + } + + for (i = 0; i < REL_CNT; i++) + if (test_bit(i, dev->relbit)) + events++; + + return events; +} + #define INPUT_CLEANSE_BITMASK(dev, type, bits) \ do { \ if (!test_bit(EV_##type, dev->evbit)) \ @@ -1793,6 +1829,10 @@ int input_register_device(struct input_dev *dev) /* Make sure that bitmasks not mentioned in dev->evbit are clean. */ input_cleanse_bitmasks(dev); + if (!dev->hint_events_per_packet) + dev->hint_events_per_packet = + input_estimate_events_per_packet(dev); + /* * If delay and period are pre-set by the driver, then autorepeating * is handled by the driver itself and we don't do it in input.c. diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h index b3ac06a4435d..318bb82325a6 100644 --- a/include/linux/input/mt.h +++ b/include/linux/input/mt.h @@ -48,6 +48,12 @@ static inline void input_mt_slot(struct input_dev *dev, int slot) input_event(dev, EV_ABS, ABS_MT_SLOT, slot); } +static inline bool input_is_mt_axis(int axis) +{ + return axis == ABS_MT_SLOT || + (axis >= ABS_MT_FIRST && axis <= ABS_MT_LAST); +} + void input_mt_report_slot_state(struct input_dev *dev, unsigned int tool_type, bool active); From c36b58e8a9112017c2bcc322cc98e71241814303 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 18 Apr 2011 10:17:17 -0700 Subject: [PATCH 197/200] Input: xen-kbdfront - fix mouse getting stuck after save/restore Mouse gets "stuck" after restore of PV guest but buttons are in working condition. If driver has been configured for ABS coordinates at start it will get XENKBD_TYPE_POS events and then suddenly after restore it'll start getting XENKBD_TYPE_MOTION events, that will be dropped later and they won't get into user-space. Regression was introduced by hunk 5 and 6 of 5ea5254aa0ad269cfbd2875c973ef25ab5b5e9db ("Input: xen-kbdfront - advertise either absolute or relative coordinates"). Driver on restore should ask xen for request-abs-pointer again if it is available. So restore parts that did it before 5ea5254. Acked-by: Olaf Hering Signed-off-by: Igor Mammedov [v1: Expanded the commit description] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Dmitry Torokhov --- drivers/input/misc/xen-kbdfront.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c index 7077f9bf5ead..62bae99424e6 100644 --- a/drivers/input/misc/xen-kbdfront.c +++ b/drivers/input/misc/xen-kbdfront.c @@ -303,7 +303,7 @@ static void xenkbd_backend_changed(struct xenbus_device *dev, enum xenbus_state backend_state) { struct xenkbd_info *info = dev_get_drvdata(&dev->dev); - int val; + int ret, val; switch (backend_state) { case XenbusStateInitialising: @@ -316,6 +316,17 @@ static void xenkbd_backend_changed(struct xenbus_device *dev, case XenbusStateInitWait: InitWait: + ret = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-abs-pointer", "%d", &val); + if (ret < 0) + val = 0; + if (val) { + ret = xenbus_printf(XBT_NIL, info->xbdev->nodename, + "request-abs-pointer", "1"); + if (ret) + pr_warning("xenkbd: can't request abs-pointer"); + } + xenbus_switch_state(dev, XenbusStateConnected); break; From c78193e9c7bcbf25b8237ad0dec82f805c4ea69b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 18 Apr 2011 10:35:30 -0700 Subject: [PATCH 198/200] next_pidmap: fix overflow condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit next_pidmap() just quietly accepted whatever 'last' pid that was passed in, which is not all that safe when one of the users is /proc. Admittedly the proc code should do some sanity checking on the range (and that will be the next commit), but that doesn't mean that the helper functions should just do that pidmap pointer arithmetic without checking the range of its arguments. So clamp 'last' to PID_MAX_LIMIT. The fact that we then do "last+1" doesn't really matter, the for-loop does check against the end of the pidmap array properly (it's only the actual pointer arithmetic overflow case we need to worry about, and going one bit beyond isn't going to overflow). [ Use PID_MAX_LIMIT rather than pid_max as per Eric Biederman ] Reported-by: Tavis Ormandy Analyzed-by: Robert Święcki Cc: Eric W. Biederman Cc: Pavel Emelyanov Signed-off-by: Linus Torvalds --- include/linux/pid.h | 2 +- kernel/pid.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/pid.h b/include/linux/pid.h index 31afb7ecbe1f..cdced84261d7 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -117,7 +117,7 @@ extern struct pid *find_vpid(int nr); */ extern struct pid *find_get_pid(int nr); extern struct pid *find_ge_pid(int nr, struct pid_namespace *); -int next_pidmap(struct pid_namespace *pid_ns, int last); +int next_pidmap(struct pid_namespace *pid_ns, unsigned int last); extern struct pid *alloc_pid(struct pid_namespace *ns); extern void free_pid(struct pid *pid); diff --git a/kernel/pid.c b/kernel/pid.c index 02f221274265..57a8346a270e 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -217,11 +217,14 @@ static int alloc_pidmap(struct pid_namespace *pid_ns) return -1; } -int next_pidmap(struct pid_namespace *pid_ns, int last) +int next_pidmap(struct pid_namespace *pid_ns, unsigned int last) { int offset; struct pidmap *map, *end; + if (last >= PID_MAX_LIMIT) + return -1; + offset = (last + 1) & BITS_PER_PAGE_MASK; map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE]; end = &pid_ns->pidmap[PIDMAP_ENTRIES]; From d8bdc59f215e62098bc5b4256fd9928bf27053a1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 18 Apr 2011 10:36:54 -0700 Subject: [PATCH 199/200] proc: do proper range check on readdir offset Rather than pass in some random truncated offset to the pid-related functions, check that the offset is in range up-front. This is just cleanup, the previous commit fixed the real problem. Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/proc/base.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index dd6628d3ba42..dfa532730e55 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3124,11 +3124,16 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi /* for the /proc/ directory itself, after non-process stuff has been done */ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) { - unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; - struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode); + unsigned int nr; + struct task_struct *reaper; struct tgid_iter iter; struct pid_namespace *ns; + if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) + goto out_no_task; + nr = filp->f_pos - FIRST_PROCESS_ENTRY; + + reaper = get_proc_task(filp->f_path.dentry->d_inode); if (!reaper) goto out_no_task; From f0e615c3cb72b42191b558c130409335812621d8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 18 Apr 2011 21:26:00 -0700 Subject: [PATCH 200/200] Linux 2.6.39-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 322e7334ccb9..b967b967572b 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 39 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Flesh-Eating Bats with Fangs # *DOCUMENTATION*