From 9c9a43ed2734081124407c779b36a4761c41139b Mon Sep 17 00:00:00 2001 From: Mattia Dongili Date: Wed, 5 Jul 2006 23:12:20 +0200 Subject: [PATCH 01/18] [CPUFREQ] return error when failing to set minfreq I just stumbled on this bug/feature, this is how to reproduce it: # echo 450000 > /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq # echo 450000 > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq # echo powersave > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor # cpufreq-info -p 450000 450000 powersave # echo 1800000 > /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq ; echo $? 0 # cpufreq-info -p 450000 450000 powersave Here it is. The kernel refuses to set a min_freq higher than the max_freq but it allows a max_freq lower than min_freq (lowering min_freq also). This behaviour is pretty straightforward (but undocumented) and it doesn't return an error altough failing to accomplish the requested action (set min_freq). The problem (IMO) is basically that userspace is not allowed to set a full policy atomically while the kernel always does that thus it must enforce an ordering on operations. The attached patch returns -EINVAL if trying to increase frequencies starting from scaling_min_freq and documents the correct ordering of writes. Signed-off-by: Mattia Dongili Signed-off-by: Dominik Brodowski Signed-off-by: Dave Jones -- --- Documentation/cpu-freq/user-guide.txt | 5 ++++- drivers/cpufreq/cpufreq.c | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index 7fedc00c3d30..555c8cf3650a 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt @@ -153,10 +153,13 @@ scaling_governor, and by "echoing" the name of another that some governors won't load - they only work on some specific architectures or processors. -scaling_min_freq and +scaling_min_freq and scaling_max_freq show the current "policy limits" (in kHz). By echoing new values into these files, you can change these limits. + NOTE: when setting a policy you need to + first set scaling_max_freq, then + scaling_min_freq. If you have selected the "userspace" governor which allows you to diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index bc1088d9b379..ad996c772c8b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1343,6 +1343,11 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data, struct cpufreq_poli memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo)); + if (policy->min > data->min && policy->min > policy->max) { + ret = -EINVAL; + goto error_out; + } + /* verify the cpu speed can be set within this limit */ ret = cpufreq_driver->verify(policy); if (ret) From dadb49d8746bc4a4b5a310dabf0c838e57a9b531 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=B3=20Bilski?= Date: Mon, 3 Jul 2006 07:19:05 +0200 Subject: [PATCH 02/18] [CPUFREQ] Longhaul - Hook into ACPI C states. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Minimal change necessary for hardware support. Changes in longhaul.c: - most important - now C3 state is causing transition, - code responsible for clearing "bus master" bit removed, - protect bcr2 transition in the same way as longhaul. Signed-off-by: Rafa³ Bilski Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/Kconfig | 2 +- arch/i386/kernel/cpu/cpufreq/longhaul.c | 194 +++++++++++++----------- 2 files changed, 109 insertions(+), 87 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index e44a4c6a4fe5..c593d73908f7 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig @@ -202,7 +202,7 @@ config X86_LONGRUN config X86_LONGHAUL tristate "VIA Cyrix III Longhaul" select CPU_FREQ_TABLE - depends on BROKEN + depends on ACPI_PROCESSOR help This adds the CPUFreq driver for VIA Samuel/CyrixIII, VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 146f607e9c44..d735cb460612 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -29,11 +29,13 @@ #include #include #include -#include #include #include #include +#include +#include +#include #include "longhaul.h" @@ -56,6 +58,8 @@ static int minvid, maxvid; static unsigned int minmult, maxmult; static int can_scale_voltage; static int vrmrev; +static struct acpi_processor *pr = NULL; +static struct acpi_processor_cx *cx = NULL; /* Module parameters */ static int dont_scale_voltage; @@ -118,84 +122,64 @@ static int longhaul_get_cpu_mult(void) return eblcr_table[invalue]; } +/* For processor with BCR2 MSR */ -static void do_powersaver(union msr_longhaul *longhaul, - unsigned int clock_ratio_index) +static void do_longhaul1(int cx_address, unsigned int clock_ratio_index) { - struct pci_dev *dev; - unsigned long flags; - unsigned int tmp_mask; - int version; - int i; - u16 pci_cmd; - u16 cmd_state[64]; + union msr_bcr2 bcr2; + u32 t; - switch (cpu_model) { - case CPU_EZRA_T: - version = 3; - break; - case CPU_NEHEMIAH: - version = 0xf; - break; - default: - return; - } - - rdmsrl(MSR_VIA_LONGHAUL, longhaul->val); - longhaul->bits.SoftBusRatio = clock_ratio_index & 0xf; - longhaul->bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; - longhaul->bits.EnableSoftBusRatio = 1; - longhaul->bits.RevisionKey = 0; - - preempt_disable(); - local_irq_save(flags); - - /* - * get current pci bus master state for all devices - * and clear bus master bit - */ - dev = NULL; - i = 0; - do { - dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); - if (dev != NULL) { - pci_read_config_word(dev, PCI_COMMAND, &pci_cmd); - cmd_state[i++] = pci_cmd; - pci_cmd &= ~PCI_COMMAND_MASTER; - pci_write_config_word(dev, PCI_COMMAND, pci_cmd); - } - } while (dev != NULL); - - tmp_mask=inb(0x21); /* works on C3. save mask. */ - outb(0xFE,0x21); /* TMR0 only */ - outb(0xFF,0x80); /* delay */ + rdmsrl(MSR_VIA_BCR2, bcr2.val); + /* Enable software clock multiplier */ + bcr2.bits.ESOFTBF = 1; + bcr2.bits.CLOCKMUL = clock_ratio_index; + /* Sync to timer tick */ safe_halt(); - wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); - halt(); + ACPI_FLUSH_CPU_CACHE(); + /* Change frequency on next halt or sleep */ + wrmsrl(MSR_VIA_BCR2, bcr2.val); + /* Invoke C3 */ + inb(cx_address); + /* Dummy op - must do something useless after P_LVL3 read */ + t = inl(acpi_fadt.xpm_tmr_blk.address); + /* Disable software clock multiplier */ local_irq_disable(); + rdmsrl(MSR_VIA_BCR2, bcr2.val); + bcr2.bits.ESOFTBF = 0; + wrmsrl(MSR_VIA_BCR2, bcr2.val); +} - outb(tmp_mask,0x21); /* restore mask */ +/* For processor with Longhaul MSR */ - /* restore pci bus master state for all devices */ - dev = NULL; - i = 0; - do { - dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); - if (dev != NULL) { - pci_cmd = cmd_state[i++]; - pci_write_config_byte(dev, PCI_COMMAND, pci_cmd); - } - } while (dev != NULL); - local_irq_restore(flags); - preempt_enable(); +static void do_powersaver(int cx_address, unsigned int clock_ratio_index) +{ + union msr_longhaul longhaul; + u32 t; - /* disable bus ratio bit */ - rdmsrl(MSR_VIA_LONGHAUL, longhaul->val); - longhaul->bits.EnableSoftBusRatio = 0; - longhaul->bits.RevisionKey = version; - wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); + rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); + longhaul.bits.RevisionKey = longhaul.bits.RevisionID; + longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; + longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; + + /* Sync to timer tick */ + safe_halt(); + ACPI_FLUSH_CPU_CACHE(); + /* Change frequency on next halt or sleep */ + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + /* Invoke C3 */ + inb(cx_address); + /* Dummy op - must do something useless after P_LVL3 read */ + t = inl(acpi_fadt.xpm_tmr_blk.address); + + /* Disable bus ratio bit */ + local_irq_disable(); + longhaul.bits.RevisionKey = longhaul.bits.RevisionID; + longhaul.bits.EnableSoftBusRatio = 0; + longhaul.bits.EnableSoftBSEL = 0; + longhaul.bits.EnableSoftVID = 0; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); } /** @@ -209,9 +193,9 @@ static void longhaul_setstate(unsigned int clock_ratio_index) { int speed, mult; struct cpufreq_freqs freqs; - union msr_longhaul longhaul; - union msr_bcr2 bcr2; static unsigned int old_ratio=-1; + unsigned long flags; + unsigned int pic1_mask, pic2_mask; if (old_ratio == clock_ratio_index) return; @@ -234,6 +218,20 @@ static void longhaul_setstate(unsigned int clock_ratio_index) dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", fsb, mult/10, mult%10, print_speed(speed/1000)); + preempt_disable(); + local_irq_save(flags); + + pic2_mask = inb(0xA1); + pic1_mask = inb(0x21); /* works on C3. save mask. */ + outb(0xFF,0xA1); /* Overkill */ + outb(0xFE,0x21); /* TMR0 only */ + + /* Disable bus master arbitration */ + if (pr->flags.bm_check) { + acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1, + ACPI_MTX_DO_NOT_LOCK); + } + switch (longhaul_version) { /* @@ -245,20 +243,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index) */ case TYPE_LONGHAUL_V1: case TYPE_LONGHAUL_V2: - rdmsrl (MSR_VIA_BCR2, bcr2.val); - /* Enable software clock multiplier */ - bcr2.bits.ESOFTBF = 1; - bcr2.bits.CLOCKMUL = clock_ratio_index; - local_irq_disable(); - wrmsrl (MSR_VIA_BCR2, bcr2.val); - safe_halt(); - - /* Disable software clock multiplier */ - rdmsrl (MSR_VIA_BCR2, bcr2.val); - bcr2.bits.ESOFTBF = 0; - local_irq_disable(); - wrmsrl (MSR_VIA_BCR2, bcr2.val); - local_irq_enable(); + do_longhaul1(cx->address, clock_ratio_index); break; /* @@ -273,10 +258,22 @@ static void longhaul_setstate(unsigned int clock_ratio_index) * to work in practice. */ case TYPE_POWERSAVER: - do_powersaver(&longhaul, clock_ratio_index); + do_powersaver(cx->address, clock_ratio_index); break; } + /* Enable bus master arbitration */ + if (pr->flags.bm_check) { + acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0, + ACPI_MTX_DO_NOT_LOCK); + } + + outb(pic2_mask,0xA1); /* restore mask */ + outb(pic1_mask,0x21); + + local_irq_restore(flags); + preempt_enable(); + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } @@ -527,6 +524,18 @@ static unsigned int longhaul_get(unsigned int cpu) return calc_speed(longhaul_get_cpu_mult()); } +acpi_status longhaul_walk_callback(acpi_handle obj_handle, + u32 nesting_level, + void *context, void **return_value) +{ + struct acpi_device *d; + + if ( acpi_bus_get_device(obj_handle, &d) ) { + return 0; + } + *return_value = (void *)acpi_driver_data(d); + return 1; +} static int __init longhaul_cpu_init(struct cpufreq_policy *policy) { @@ -534,6 +543,15 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) char *cpuname=NULL; int ret; + /* Check ACPI support for C3 state */ + acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, + &longhaul_walk_callback, NULL, (void *)&pr); + if (pr == NULL) goto err_acpi; + + cx = &pr->power.states[ACPI_STATE_C3]; + if (cx == NULL || cx->latency > 1000) goto err_acpi; + + /* Now check what we have on this motherboard */ switch (c->x86_model) { case 6: cpu_model = CPU_SAMUEL; @@ -634,6 +652,10 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu); return 0; + +err_acpi: + printk(KERN_ERR PFX "No ACPI support for CPU frequency changes.\n"); + return -ENODEV; } static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy) From 48b7bde0f6d5fd08d046b583cfa0118ad74c6caf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=B3=20Bilski?= Date: Tue, 4 Jul 2006 17:50:57 +0200 Subject: [PATCH 03/18] [CPUFREQ] Longhaul - Workaround issues with APIC. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to worry about local APIC. There is need to worry about I/O APIC, because I/O APIC is replacing good old 8259. According to Nehemiah datasheet VIA is using 3-wire bus to connect local APIC to I/O APIC. "[...] When IA32_APIC_BASE[11] is set to 0, processor APICs based on the 3-wire APIC bus cannot be generally re-enabled until a system hardware reset. The 3-wire bus looses track of arbitration that would be necessary for complete re-enabling. Certain (local) APIC functionality can be enabled. [...]" So we must set disable bit for each interrupt in I/O APIC registers. Same situation as for PIC - we must poke registers direcly. How to do this? I don't know. So at the moment it is better to fail. Signed-off-by: Rafa³ Bilski Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/longhaul.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index d735cb460612..dfd243f497b2 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -688,6 +688,18 @@ static int __init longhaul_init(void) if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6) return -ENODEV; +#ifdef CONFIG_SMP + if (num_online_cpus() > 1) { + return -ENODEV; + printk(KERN_ERR PFX "More than 1 CPU detected, longhaul disabled.\n"); + } +#endif +#ifdef CONFIG_X86_IO_APIC + if (cpu_has_apic) { + printk(KERN_ERR PFX "APIC detected. Longhaul is currently broken in this configuration.\n"); + return -ENODEV; + } +#endif switch (c->x86_model) { case 6 ... 9: return cpufreq_register_driver(&longhaul_driver); From 0d6daba5faed26a2f50a40adf5d4674a9a54717e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=B3=20Bilski?= Date: Fri, 7 Jul 2006 08:48:26 +0200 Subject: [PATCH 04/18] [CPUFREQ] Longhaul - Initialise later. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this longhaul will always fail when compiled into kernel, as it needs to initialise after the ACPI processor module. I lost this when I was splitting patches. Sorry. Signed-off-by: Rafa³ Bilski Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/longhaul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index dfd243f497b2..7388fc3239d8 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -733,6 +733,6 @@ MODULE_AUTHOR ("Dave Jones "); MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); MODULE_LICENSE ("GPL"); -module_init(longhaul_init); +late_initcall(longhaul_init); module_exit(longhaul_exit); From 95a53249db330a3f08090611fdb5fe168a73e650 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 8 Jul 2006 22:20:26 +0200 Subject: [PATCH 05/18] [CPUFREQ] X86_GX_SUSPMOD must depend on PCI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems commit 32ee8c3e470d86588b51dc42ed01e85c5fa0f180 accidentially reverted cdc9cc1d740ffc3d8d8207fbf5df9bf05fcc9955, IOW, it reintroduced the following compile error with CONFIG_PCI=n: <-- snip --> ... CC arch/i386/kernel/cpu/cpufreq/gx-suspmod.o arch/i386/kernel/cpu/cpufreq/gx-suspmod.c: In function ‘gx_detect_chipset’: arch/i386/kernel/cpu/cpufreq/gx-suspmod.c:193: error: implicit declaration of function ‘pci_match_id’ arch/i386/kernel/cpu/cpufreq/gx-suspmod.c:193: warning: comparison between pointer and integer make[3]: *** [arch/i386/kernel/cpu/cpufreq/gx-suspmod.o] Error 1 <-- snip --> This patch therefore re-adds the dependency of X86_GX_SUSPMOD on PCI. Signed-off-by: Adrian Bunk Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index c593d73908f7..ccc1edff5c97 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig @@ -96,6 +96,7 @@ config X86_POWERNOW_K8_ACPI config X86_GX_SUSPMOD tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" + depends on PCI help This add the CPUFreq driver for NatSemi Geode processors which support suspend modulation. From c4a96c1eba206bd4a58a0f2acf2450126bd2b5da Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 9 Jul 2006 19:53:08 +0200 Subject: [PATCH 06/18] [CPUFREQ] Make longhaul_walk_callback() static This patch makes the needlessly global longhaul_walk_callback() static. Signed-off-by: Adrian Bunk Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/longhaul.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 7388fc3239d8..3cfa0741863e 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -524,9 +524,9 @@ static unsigned int longhaul_get(unsigned int cpu) return calc_speed(longhaul_get_cpu_mult()); } -acpi_status longhaul_walk_callback(acpi_handle obj_handle, - u32 nesting_level, - void *context, void **return_value) +static acpi_status longhaul_walk_callback(acpi_handle obj_handle, + u32 nesting_level, + void *context, void **return_value) { struct acpi_device *d; From eb23c751d837848c87fda6b1347d194f6b333681 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=B3=20Bilski?= Date: Sun, 9 Jul 2006 21:47:04 +0200 Subject: [PATCH 07/18] [CPUFREQ] Longhaul - Readd accidentally dropped line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I lost very important line in do_powersaver Signed-off-by: Rafa³ Bilski Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/longhaul.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 3cfa0741863e..fdf8a6424f25 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -162,6 +162,7 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) longhaul.bits.RevisionKey = longhaul.bits.RevisionID; longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; + longhaul.bits.EnableSoftBusRatio = 1; /* Sync to timer tick */ safe_halt(); From 9fb31c3a1d9f42e10e541ee0e2be8d1f27115141 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=B3=20Bilski?= Date: Thu, 13 Jul 2006 19:26:10 +0200 Subject: [PATCH 08/18] [CPUFREQ] Longhaul - Fix power state test to do something more useful MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is changing "always true" test to something usefull. Signed-off-by: Rafa³ Bilski Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/longhaul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index fdf8a6424f25..ccd1f2c39e18 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -550,7 +550,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) if (pr == NULL) goto err_acpi; cx = &pr->power.states[ACPI_STATE_C3]; - if (cx == NULL || cx->latency > 1000) goto err_acpi; + if (cx->address == 0 || cx->latency > 1000) goto err_acpi; /* Now check what we have on this motherboard */ switch (c->x86_model) { From 32deb2d5c4c291d7d9a73198dc357a151e4b978c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=B3=20Bilski?= Date: Sat, 15 Jul 2006 19:31:30 +0200 Subject: [PATCH 09/18] [CPUFREQ] Longhaul - Rename & fix multipliers table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This table is only used by Ezra-T CPUs currently, and has values for some other CPU. Fix them to match the values used by that CPU, and for now make it clearer by renaming the variable. Signed-off-by: Rafa³ Bilski Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/longhaul.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index ccd1f2c39e18..4f2c3aeef724 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -322,9 +322,11 @@ static int guess_fsb(void) static int __init longhaul_get_ranges(void) { unsigned long invalue; - unsigned int multipliers[32]= { - 50,30,40,100,55,35,45,95,90,70,80,60,120,75,85,65, - -1,110,120,-1,135,115,125,105,130,150,160,140,-1,155,-1,145 }; + unsigned int ezra_t_multipliers[32]= { + 90, 30, 40, 100, 55, 35, 45, 95, + 50, 70, 80, 60, 120, 75, 85, 65, + -1, 110, 120, -1, 135, 115, 125, 105, + 130, 150, 160, 140, -1, 155, -1, 145 }; unsigned int j, k = 0; union msr_longhaul longhaul; unsigned long lo, hi; @@ -353,13 +355,13 @@ static int __init longhaul_get_ranges(void) invalue = longhaul.bits.MaxMHzBR; if (longhaul.bits.MaxMHzBR4) invalue += 16; - maxmult=multipliers[invalue]; + maxmult=ezra_t_multipliers[invalue]; invalue = longhaul.bits.MinMHzBR; if (longhaul.bits.MinMHzBR4 == 1) minmult = 30; else - minmult = multipliers[invalue]; + minmult = ezra_t_multipliers[invalue]; fsb = eblcr_fsb_table_v2[longhaul.bits.MaxMHzFSB]; break; } From 3bcb09a35641f2840bd59d8f82154f830dca282c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 6 Jul 2006 12:30:26 -0700 Subject: [PATCH 10/18] [CPUFREQ] [1/2] add __find_governor helper and clean up some error handling. Adds a __find_governor() helper function to look up a governor by name. Also restructures some error handling to conform to the "single-exit" model which is generally preferred for kernel code. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 57 +++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index ad996c772c8b..9b416372a8e4 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -284,39 +284,52 @@ EXPORT_SYMBOL_GPL(cpufreq_notify_transition); * SYSFS INTERFACE * *********************************************************************/ +static struct cpufreq_governor *__find_governor(const char *str_governor) +{ + struct cpufreq_governor *t; + + list_for_each_entry(t, &cpufreq_governor_list, governor_list) + if (!strnicmp(str_governor,t->name,CPUFREQ_NAME_LEN)) + return t; + + return NULL; +} + /** * cpufreq_parse_governor - parse a governor string */ static int cpufreq_parse_governor (char *str_governor, unsigned int *policy, struct cpufreq_governor **governor) { + int err = -EINVAL; + if (!cpufreq_driver) - return -EINVAL; + goto out; + if (cpufreq_driver->setpolicy) { if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { *policy = CPUFREQ_POLICY_PERFORMANCE; - return 0; + err = 0; } else if (!strnicmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) { *policy = CPUFREQ_POLICY_POWERSAVE; - return 0; + err = 0; } - return -EINVAL; - } else { + } else if (cpufreq_driver->target) { struct cpufreq_governor *t; + mutex_lock(&cpufreq_governor_mutex); - if (!cpufreq_driver || !cpufreq_driver->target) - goto out; - list_for_each_entry(t, &cpufreq_governor_list, governor_list) { - if (!strnicmp(str_governor,t->name,CPUFREQ_NAME_LEN)) { - *governor = t; - mutex_unlock(&cpufreq_governor_mutex); - return 0; - } + + t = __find_governor(str_governor); + + if (t != NULL) { + *governor = t; + err = 0; } -out: + mutex_unlock(&cpufreq_governor_mutex); } - return -EINVAL; + out: + return err; } @@ -1265,23 +1278,21 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) int cpufreq_register_governor(struct cpufreq_governor *governor) { - struct cpufreq_governor *t; + int err; if (!governor) return -EINVAL; mutex_lock(&cpufreq_governor_mutex); - list_for_each_entry(t, &cpufreq_governor_list, governor_list) { - if (!strnicmp(governor->name,t->name,CPUFREQ_NAME_LEN)) { - mutex_unlock(&cpufreq_governor_mutex); - return -EBUSY; - } + err = -EBUSY; + if (__find_governor(governor->name) == NULL) { + err = 0; + list_add(&governor->governor_list, &cpufreq_governor_list); } - list_add(&governor->governor_list, &cpufreq_governor_list); mutex_unlock(&cpufreq_governor_mutex); - return 0; + return err; } EXPORT_SYMBOL_GPL(cpufreq_register_governor); From ea71497020c55cd39221e0abad5c1752ac6e3f47 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 6 Jul 2006 12:32:01 -0700 Subject: [PATCH 11/18] [CPUFREQ] [2/2] demand load governor modules. Demand-load cpufreq governor modules if needed. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 9b416372a8e4..b3df613ae4ec 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -321,6 +321,23 @@ static int cpufreq_parse_governor (char *str_governor, unsigned int *policy, t = __find_governor(str_governor); + if (t == NULL) { + char *name = kasprintf(GFP_KERNEL, "cpufreq_%s", str_governor); + + if (name) { + int ret; + + mutex_unlock(&cpufreq_governor_mutex); + ret = request_module(name); + mutex_lock(&cpufreq_governor_mutex); + + if (ret == 0) + t = __find_governor(str_governor); + } + + kfree(name); + } + if (t != NULL) { *governor = t; err = 0; From 12e704db809cd4101b7d3594fc9a96f30fe88a31 Mon Sep 17 00:00:00 2001 From: bert hubert Date: Sun, 30 Jul 2006 21:19:32 +0200 Subject: [PATCH 12/18] [CPUFREQ] Propagate acpi_processor_preregister_performance return value. Note how any error from acpi_processor_preregister_performance is ignored. From: bert hubert Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 567b39bea07e..efb41e81351c 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -384,8 +384,7 @@ static int acpi_cpufreq_early_init_acpi(void) } /* Do initialization in ACPI core */ - acpi_processor_preregister_performance(acpi_perf_data); - return 0; + return acpi_processor_preregister_performance(acpi_perf_data); } static int From ce510193272c295b891e45525a83b543ae3207c1 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Mon, 24 Jul 2006 16:30:00 -0700 Subject: [PATCH 13/18] NFS: Release dcache_lock in an error path of nfs_path In one of the error paths of nfs_path, it may return with dcache_lock still held; fix this by adding and using a new error path Elong_unlock which unlocks dcache_lock. Signed-off-by: Josh Triplett Signed-off-by: Trond Myklebust (cherry picked from f4b90b43677fb23297c56802c3056fc304f988d9 commit) --- fs/nfs/namespace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 19b98ca468eb..86b3169c8cac 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -51,7 +51,7 @@ char *nfs_path(const char *base, const struct dentry *dentry, namelen = dentry->d_name.len; buflen -= namelen + 1; if (buflen < 0) - goto Elong; + goto Elong_unlock; end -= namelen; memcpy(end, dentry->d_name.name, namelen); *--end = '/'; @@ -68,6 +68,8 @@ char *nfs_path(const char *base, const struct dentry *dentry, end -= namelen; memcpy(end, base, namelen); return end; +Elong_unlock: + spin_unlock(&dcache_lock); Elong: return ERR_PTR(-ENAMETOOLONG); } From e4e20512cfe0bacec0764b4925889d1fa94644f9 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 3 Aug 2006 15:07:47 -0400 Subject: [PATCH 14/18] NFS: make 2 functions static nfs_writedata_free() and nfs_readdata_free() can now become static. Signed-off-by: Adrian Bunk Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust (cherry picked from 5e1ce40f0c3c8f67591aff17756930d7a18ceb1a commit) --- fs/nfs/read.c | 2 +- fs/nfs/write.c | 2 +- include/linux/nfs_fs.h | 6 ++---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 52bf634260a1..65c0c5b32351 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -63,7 +63,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) return p; } -void nfs_readdata_free(struct nfs_read_data *p) +static void nfs_readdata_free(struct nfs_read_data *p) { if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 86bac6a5008e..50774991f8d5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -137,7 +137,7 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) return p; } -void nfs_writedata_free(struct nfs_write_data *p) +static void nfs_writedata_free(struct nfs_write_data *p) { if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 55ea853d57bc..247434553ae8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -476,10 +476,9 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page) } /* - * Allocate and free nfs_write_data structures + * Allocate nfs_write_data structures */ extern struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount); -extern void nfs_writedata_free(struct nfs_write_data *p); /* * linux/fs/nfs/read.c @@ -491,10 +490,9 @@ extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); extern void nfs_readdata_release(void *data); /* - * Allocate and free nfs_read_data structures + * Allocate nfs_read_data structures */ extern struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount); -extern void nfs_readdata_free(struct nfs_read_data *p); /* * linux/fs/nfs3proc.c From f3d43c769d14b7065da7f62ec468b1fcb8cd6e06 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 3 Aug 2006 15:07:47 -0400 Subject: [PATCH 15/18] NLM/lockd: remove b_done We never actually set the b_done field any more; it's always zero. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust (cherry picked from af8412d4283ef91356e65e0ed9b025b376aebded commit) --- fs/lockd/svclock.c | 12 +++--------- include/linux/lockd/lockd.h | 1 - 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index baf5ae513481..c9d419703cf3 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -638,9 +638,6 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) if (task->tk_status < 0) { /* RPC error: Re-insert for retransmission */ timeout = 10 * HZ; - } else if (block->b_done) { - /* Block already removed, kill it for real */ - timeout = 0; } else { /* Call was successful, now wait for client callback */ timeout = 60 * HZ; @@ -709,13 +706,10 @@ nlmsvc_retry_blocked(void) break; if (time_after(block->b_when,jiffies)) break; - dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n", - block, block->b_when, block->b_done); + dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", + block, block->b_when); kref_get(&block->b_count); - if (block->b_done) - nlmsvc_unlink_block(block); - else - nlmsvc_grant_blocked(block); + nlmsvc_grant_blocked(block); nlmsvc_release_block(block); } diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index aa4fe905bb4d..0d92c468d55a 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -123,7 +123,6 @@ struct nlm_block { unsigned int b_id; /* block id */ unsigned char b_queued; /* re-queued */ unsigned char b_granted; /* VFS granted lock */ - unsigned char b_done; /* callback complete */ struct nlm_file * b_file; /* file in question */ }; From e0ab53deaa91293a7958d63d5a2cf4c5645ad6f0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Jul 2006 17:22:50 -0400 Subject: [PATCH 16/18] RPC: Ensure that we disconnect TCP socket when client requests error out If we're part way through transmitting a TCP request, and the client errors, then we need to disconnect and reconnect the TCP socket in order to avoid confusing the server. Signed-off-by: Trond Myklebust (cherry picked from 031a50c8b9ea82616abd4a4e18021a25848941ce commit) --- include/linux/sunrpc/xprt.h | 2 +- net/sunrpc/clnt.c | 52 +++++++++++++++++++++---------------- net/sunrpc/xprt.c | 21 +++------------ net/sunrpc/xprtsock.c | 29 ++++++++++++++++++++- 4 files changed, 61 insertions(+), 43 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index e8bbe8118de8..840e47a4ccc5 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -229,7 +229,7 @@ int xprt_reserve_xprt(struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_task *task); int xprt_prepare_transmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); -void xprt_abort_transmit(struct rpc_task *task); +void xprt_end_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4ba271f892c8..d6409e757219 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -921,26 +921,43 @@ call_transmit(struct rpc_task *task) task->tk_status = xprt_prepare_transmit(task); if (task->tk_status != 0) return; + task->tk_action = call_transmit_status; /* Encode here so that rpcsec_gss can use correct sequence number. */ if (rpc_task_need_encode(task)) { - task->tk_rqstp->rq_bytes_sent = 0; + BUG_ON(task->tk_rqstp->rq_bytes_sent != 0); call_encode(task); /* Did the encode result in an error condition? */ if (task->tk_status != 0) - goto out_nosend; + return; } - task->tk_action = call_transmit_status; xprt_transmit(task); if (task->tk_status < 0) return; - if (!task->tk_msg.rpc_proc->p_decode) { - task->tk_action = rpc_exit_task; - rpc_wake_up_task(task); - } - return; -out_nosend: - /* release socket write lock before attempting to handle error */ - xprt_abort_transmit(task); + /* + * On success, ensure that we call xprt_end_transmit() before sleeping + * in order to allow access to the socket to other RPC requests. + */ + call_transmit_status(task); + if (task->tk_msg.rpc_proc->p_decode != NULL) + return; + task->tk_action = rpc_exit_task; + rpc_wake_up_task(task); +} + +/* + * 5a. Handle cleanup after a transmission + */ +static void +call_transmit_status(struct rpc_task *task) +{ + task->tk_action = call_status; + /* + * Special case: if we've been waiting on the socket's write_space() + * callback, then don't call xprt_end_transmit(). + */ + if (task->tk_status == -EAGAIN) + return; + xprt_end_transmit(task); rpc_task_force_reencode(task); } @@ -992,18 +1009,7 @@ call_status(struct rpc_task *task) } /* - * 6a. Handle transmission errors. - */ -static void -call_transmit_status(struct rpc_task *task) -{ - if (task->tk_status != -EAGAIN) - rpc_task_force_reencode(task); - call_status(task); -} - -/* - * 6b. Handle RPC timeout + * 6a. Handle RPC timeout * We do not release the request slot, so we keep using the * same XID for all retransmits. */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 313b68d892c6..e8c2bc4977f3 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -707,12 +707,9 @@ out_unlock: return err; } -void -xprt_abort_transmit(struct rpc_task *task) +void xprt_end_transmit(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; - - xprt_release_write(xprt, task); + xprt_release_write(task->tk_xprt, task); } /** @@ -761,8 +758,6 @@ void xprt_transmit(struct rpc_task *task) task->tk_status = -ENOTCONN; else if (!req->rq_received) rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); - - xprt->ops->release_xprt(xprt, task); spin_unlock_bh(&xprt->transport_lock); return; } @@ -772,18 +767,8 @@ void xprt_transmit(struct rpc_task *task) * schedq, and being picked up by a parallel run of rpciod(). */ task->tk_status = status; - - switch (status) { - case -ECONNREFUSED: + if (status == -ECONNREFUSED) rpc_sleep_on(&xprt->sending, task, NULL, NULL); - case -EAGAIN: - case -ENOTCONN: - return; - default: - break; - } - xprt_release_write(xprt, task); - return; } static inline void do_xprt_reserve(struct rpc_task *task) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ee678ed13b6f..441bd53f5eca 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -413,6 +413,33 @@ static int xs_tcp_send_request(struct rpc_task *task) return status; } +/** + * xs_tcp_release_xprt - clean up after a tcp transmission + * @xprt: transport + * @task: rpc task + * + * This cleans up if an error causes us to abort the transmission of a request. + * In this case, the socket may need to be reset in order to avoid confusing + * the server. + */ +static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) +{ + struct rpc_rqst *req; + + if (task != xprt->snd_task) + return; + if (task == NULL) + goto out_release; + req = task->tk_rqstp; + if (req->rq_bytes_sent == 0) + goto out_release; + if (req->rq_bytes_sent == req->rq_snd_buf.len) + goto out_release; + set_bit(XPRT_CLOSE_WAIT, &task->tk_xprt->state); +out_release: + xprt_release_xprt(xprt, task); +} + /** * xs_close - close a socket * @xprt: transport @@ -1250,7 +1277,7 @@ static struct rpc_xprt_ops xs_udp_ops = { static struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, - .release_xprt = xprt_release_xprt, + .release_xprt = xs_tcp_release_xprt, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, From 5c3e985a2c1908aa97221d3806f85ce7e2fbfa88 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 29 Jul 2006 17:37:40 -0400 Subject: [PATCH 17/18] SUNRPC: Fix obvious refcounting bugs in rpc_pipefs. Doh! Signed-off-by: Trond Myklebust (cherry picked from 496f408f2f0e7ee5481a7c2222189be6c4f5aa6c commit) --- net/sunrpc/rpc_pipe.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index dc6cb93c8830..a3bd2db2e024 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -667,10 +667,11 @@ rpc_mkdir(char *path, struct rpc_clnt *rpc_client) RPCAUTH_info, RPCAUTH_EOF); if (error) goto err_depopulate; + dget(dentry); out: mutex_unlock(&dir->i_mutex); rpc_release_path(&nd); - return dget(dentry); + return dentry; err_depopulate: rpc_depopulate(dentry); __rpc_rmdir(dir, dentry); @@ -731,10 +732,11 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) rpci->flags = flags; rpci->ops = ops; inode_dir_notify(dir, DN_CREATE); + dget(dentry); out: mutex_unlock(&dir->i_mutex); rpc_release_path(&nd); - return dget(dentry); + return dentry; err_dput: dput(dentry); dentry = ERR_PTR(-ENOMEM); From eca7994f60eb6550d9e9b36d3b641a5a0e18a7c1 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Wed, 28 Jun 2006 13:50:09 -0500 Subject: [PATCH 18/18] [IA64] make uncached allocator more node aware The uncached allocator has a function, uncached_get_new_chunk(), that needs to be serialized on a per node basis. It also has a global variable, allocated_granules, which should be defined on a per node basis and protected by that serialization. Additionally, all error returns from functions called (like ia64_pal_mc_drain()) should be handled appropriately. Signed-off-by: Dean Nelson Acked-by: Jes Sorenson Signed-off-by: Tony Luck --- arch/ia64/kernel/uncached.c | 86 ++++++++++++++++++++++++------------- 1 file changed, 57 insertions(+), 29 deletions(-) diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index 5f03b9e524dd..4c73a6763669 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -32,32 +32,38 @@ extern void __init efi_memmap_walk_uc(efi_freemem_callback_t, void *); -#define MAX_UNCACHED_GRANULES 5 -static int allocated_granules; +struct uncached_pool { + struct gen_pool *pool; + struct mutex add_chunk_mutex; /* serialize adding a converted chunk */ + int nchunks_added; /* #of converted chunks added to pool */ + atomic_t status; /* smp called function's return status*/ +}; -struct gen_pool *uncached_pool[MAX_NUMNODES]; +#define MAX_CONVERTED_CHUNKS_PER_NODE 2 + +struct uncached_pool uncached_pools[MAX_NUMNODES]; static void uncached_ipi_visibility(void *data) { int status; + struct uncached_pool *uc_pool = (struct uncached_pool *)data; status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); if ((status != PAL_VISIBILITY_OK) && (status != PAL_VISIBILITY_OK_REMOTE_NEEDED)) - printk(KERN_DEBUG "pal_prefetch_visibility() returns %i on " - "CPU %i\n", status, raw_smp_processor_id()); + atomic_inc(&uc_pool->status); } static void uncached_ipi_mc_drain(void *data) { int status; + struct uncached_pool *uc_pool = (struct uncached_pool *)data; status = ia64_pal_mc_drain(); - if (status) - printk(KERN_WARNING "ia64_pal_mc_drain() failed with %i on " - "CPU %i\n", status, raw_smp_processor_id()); + if (status != PAL_STATUS_SUCCESS) + atomic_inc(&uc_pool->status); } @@ -70,21 +76,34 @@ static void uncached_ipi_mc_drain(void *data) * This is accomplished by first allocating a granule of cached memory pages * and then converting them to uncached memory pages. */ -static int uncached_add_chunk(struct gen_pool *pool, int nid) +static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) { struct page *page; - int status, i; + int status, i, nchunks_added = uc_pool->nchunks_added; unsigned long c_addr, uc_addr; - if (allocated_granules >= MAX_UNCACHED_GRANULES) + if (mutex_lock_interruptible(&uc_pool->add_chunk_mutex) != 0) + return -1; /* interrupted by a signal */ + + if (uc_pool->nchunks_added > nchunks_added) { + /* someone added a new chunk while we were waiting */ + mutex_unlock(&uc_pool->add_chunk_mutex); + return 0; + } + + if (uc_pool->nchunks_added >= MAX_CONVERTED_CHUNKS_PER_NODE) { + mutex_unlock(&uc_pool->add_chunk_mutex); return -1; + } /* attempt to allocate a granule's worth of cached memory pages */ page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO, IA64_GRANULE_SHIFT-PAGE_SHIFT); - if (!page) + if (!page) { + mutex_unlock(&uc_pool->add_chunk_mutex); return -1; + } /* convert the memory pages from cached to uncached */ @@ -102,11 +121,14 @@ static int uncached_add_chunk(struct gen_pool *pool, int nid) flush_tlb_kernel_range(uc_addr, uc_adddr + IA64_GRANULE_SIZE); status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); - if (!status) { - status = smp_call_function(uncached_ipi_visibility, NULL, 0, 1); - if (status) + if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) { + atomic_set(&uc_pool->status, 0); + status = smp_call_function(uncached_ipi_visibility, uc_pool, + 0, 1); + if (status || atomic_read(&uc_pool->status)) goto failed; - } + } else if (status != PAL_VISIBILITY_OK) + goto failed; preempt_disable(); @@ -120,20 +142,24 @@ static int uncached_add_chunk(struct gen_pool *pool, int nid) preempt_enable(); - ia64_pal_mc_drain(); - status = smp_call_function(uncached_ipi_mc_drain, NULL, 0, 1); - if (status) + status = ia64_pal_mc_drain(); + if (status != PAL_STATUS_SUCCESS) + goto failed; + atomic_set(&uc_pool->status, 0); + status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 0, 1); + if (status || atomic_read(&uc_pool->status)) goto failed; /* * The chunk of memory pages has been converted to uncached so now we * can add it to the pool. */ - status = gen_pool_add(pool, uc_addr, IA64_GRANULE_SIZE, nid); + status = gen_pool_add(uc_pool->pool, uc_addr, IA64_GRANULE_SIZE, nid); if (status) goto failed; - allocated_granules++; + uc_pool->nchunks_added++; + mutex_unlock(&uc_pool->add_chunk_mutex); return 0; /* failed to convert or add the chunk so give it back to the kernel */ @@ -142,6 +168,7 @@ failed: ClearPageUncached(&page[i]); free_pages(c_addr, IA64_GRANULE_SHIFT-PAGE_SHIFT); + mutex_unlock(&uc_pool->add_chunk_mutex); return -1; } @@ -158,7 +185,7 @@ failed: unsigned long uncached_alloc_page(int starting_nid) { unsigned long uc_addr; - struct gen_pool *pool; + struct uncached_pool *uc_pool; int nid; if (unlikely(starting_nid >= MAX_NUMNODES)) @@ -171,14 +198,14 @@ unsigned long uncached_alloc_page(int starting_nid) do { if (!node_online(nid)) continue; - pool = uncached_pool[nid]; - if (pool == NULL) + uc_pool = &uncached_pools[nid]; + if (uc_pool->pool == NULL) continue; do { - uc_addr = gen_pool_alloc(pool, PAGE_SIZE); + uc_addr = gen_pool_alloc(uc_pool->pool, PAGE_SIZE); if (uc_addr != 0) return uc_addr; - } while (uncached_add_chunk(pool, nid) == 0); + } while (uncached_add_chunk(uc_pool, nid) == 0); } while ((nid = (nid + 1) % MAX_NUMNODES) != starting_nid); @@ -197,7 +224,7 @@ EXPORT_SYMBOL(uncached_alloc_page); void uncached_free_page(unsigned long uc_addr) { int nid = paddr_to_nid(uc_addr - __IA64_UNCACHED_OFFSET); - struct gen_pool *pool = uncached_pool[nid]; + struct gen_pool *pool = uncached_pools[nid].pool; if (unlikely(pool == NULL)) return; @@ -224,7 +251,7 @@ static int __init uncached_build_memmap(unsigned long uc_start, unsigned long uc_end, void *arg) { int nid = paddr_to_nid(uc_start - __IA64_UNCACHED_OFFSET); - struct gen_pool *pool = uncached_pool[nid]; + struct gen_pool *pool = uncached_pools[nid].pool; size_t size = uc_end - uc_start; touch_softlockup_watchdog(); @@ -242,7 +269,8 @@ static int __init uncached_init(void) int nid; for_each_online_node(nid) { - uncached_pool[nid] = gen_pool_create(PAGE_SHIFT, nid); + uncached_pools[nid].pool = gen_pool_create(PAGE_SHIFT, nid); + mutex_init(&uncached_pools[nid].add_chunk_mutex); } efi_memmap_walk_uc(uncached_build_memmap, NULL);