From cd878479792cc1e4bc9d62ed0ef2c4454743848c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 11 Aug 2006 17:59:28 -0400 Subject: [PATCH 01/11] [CPUFREQ] Fix typo. Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b3df613ae4ec..d35a9f06ab7b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -32,7 +32,7 @@ #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, "cpufreq-core", msg) /** - * The "cpufreq driver" - the arch- or hardware-dependend low + * The "cpufreq driver" - the arch- or hardware-dependent low * level driver of CPUFreq support, and its spinlock. This lock * also protects the cpufreq_cpu_data array. */ From 1ce28d6b19112a7c76af8e971e2de3109d19a943 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 31 Jul 2006 22:25:20 +0400 Subject: [PATCH 02/11] [CPUFREQ][1/2] ondemand: updated tune for hardware coordination Try to make dbs_check_cpu() call on all CPUs at the same jiffy. This will help when multiple cores share P-states via Hardware Coordination. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Alexey Starikovskiy Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 52cf1f021825..f507a869acbc 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -305,6 +305,9 @@ static void do_dbs_timer(void *data) { unsigned int cpu = smp_processor_id(); struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu); + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + delay -= jiffies % delay; if (!dbs_info->enable) return; @@ -312,18 +315,18 @@ static void do_dbs_timer(void *data) lock_cpu_hotplug(); dbs_check_cpu(dbs_info); unlock_cpu_hotplug(); - queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, - usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); + queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); } static inline void dbs_timer_init(unsigned int cpu) { struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu); + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + delay -= jiffies % delay; INIT_WORK(&dbs_info->work, do_dbs_timer, 0); - queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, - usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); - return; + queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); } static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) From 05ca0350e8caa91a5ec9961c585c98005b6934ea Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 31 Jul 2006 22:28:12 +0400 Subject: [PATCH 03/11] [CPUFREQ][2/2] ondemand: updated add powersave_bias tunable ondemand selects the minimum frequency that can retire a workload with negligible idle time -- ideally resulting in the highest performance/power efficiency with negligible performance impact. But on some systems and some workloads, this algorithm is more performance biased than necessary, and de-tuning it a bit to allow some performance impact can save measurable power. This patch adds a "powersave_bias" tunable to ondemand to allow it to reduce its target frequency by a specified percent. By default, the powersave_bias is 0 and has no effect. powersave_bias is in units of 0.1%, so it has an effective range of 1 through 1000, resulting in 0.1% to 100% impact. In practice, users will not be able to detect a difference between 0.1% increments, but 1.0% increments turned out to be too large. Also, the max value of 1000 (100%) would simply peg the system in its deepest power saving P-state, unless the processor really has a hardware P-state at 0Hz:-) For example, If ondemand requests 2.0GHz based on utilization, and powersave_bias=100, this code will knock 10% off the target and seek a target of 1.8GHz instead of 2.0GHz until the next sampling. If 1.8 is an exact match with an hardware frequency we use it, otherwise we average our time between the frequency next higher than 1.8 and next lower than 1.8. Note that a user or administrative program can change powersave_bias at run-time depending on how they expect the system to be used. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Alexey Starikovskiy Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 155 ++++++++++++++++++++++++++--- 1 file changed, 139 insertions(+), 16 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index f507a869acbc..34874c2f1885 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -55,6 +55,10 @@ struct cpu_dbs_info_s { struct cpufreq_policy *cur_policy; struct work_struct work; unsigned int enable; + struct cpufreq_frequency_table *freq_table; + unsigned int freq_lo; + unsigned int freq_lo_jiffies; + unsigned int freq_hi_jiffies; }; static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); @@ -72,15 +76,15 @@ static DEFINE_MUTEX(dbs_mutex); static struct workqueue_struct *kondemand_wq; -struct dbs_tuners { +static struct dbs_tuners { unsigned int sampling_rate; unsigned int up_threshold; unsigned int ignore_nice; -}; - -static struct dbs_tuners dbs_tuners_ins = { + unsigned int powersave_bias; +} dbs_tuners_ins = { .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, .ignore_nice = 0, + .powersave_bias = 0, }; static inline cputime64_t get_cpu_idle_time(unsigned int cpu) @@ -96,6 +100,69 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu) return retval; } +/* + * Find right freq to be set now with powersave_bias on. + * Returns the freq_hi to be used right now and will set freq_hi_jiffies, + * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. + */ +unsigned int powersave_bias_target(struct cpufreq_policy *policy, + unsigned int freq_next, unsigned int relation) +{ + unsigned int freq_req, freq_reduc, freq_avg; + unsigned int freq_hi, freq_lo; + unsigned int index = 0; + unsigned int jiffies_total, jiffies_hi, jiffies_lo; + struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, policy->cpu); + + if (!dbs_info->freq_table) { + dbs_info->freq_lo = 0; + dbs_info->freq_lo_jiffies = 0; + return freq_next; + } + + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, + relation, &index); + freq_req = dbs_info->freq_table[index].frequency; + freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000; + freq_avg = freq_req - freq_reduc; + + /* Find freq bounds for freq_avg in freq_table */ + index = 0; + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, + CPUFREQ_RELATION_H, &index); + freq_lo = dbs_info->freq_table[index].frequency; + index = 0; + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, + CPUFREQ_RELATION_L, &index); + freq_hi = dbs_info->freq_table[index].frequency; + + /* Find out how long we have to be in hi and lo freqs */ + if (freq_hi == freq_lo) { + dbs_info->freq_lo = 0; + dbs_info->freq_lo_jiffies = 0; + return freq_lo; + } + jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + jiffies_hi = (freq_avg - freq_lo) * jiffies_total; + jiffies_hi += ((freq_hi - freq_lo) / 2); + jiffies_hi /= (freq_hi - freq_lo); + jiffies_lo = jiffies_total - jiffies_hi; + dbs_info->freq_lo = freq_lo; + dbs_info->freq_lo_jiffies = jiffies_lo; + dbs_info->freq_hi_jiffies = jiffies_hi; + return freq_hi; +} + +static void ondemand_powersave_bias_init(void) +{ + int i; + for_each_online_cpu(i) { + struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, i); + dbs_info->freq_table = cpufreq_frequency_get_table(i); + dbs_info->freq_lo = 0; + } +} + /************************** sysfs interface ************************/ static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) { @@ -124,6 +191,7 @@ static ssize_t show_##file_name \ show_one(sampling_rate, sampling_rate); show_one(up_threshold, up_threshold); show_one(ignore_nice_load, ignore_nice); +show_one(powersave_bias, powersave_bias); static ssize_t store_sampling_rate(struct cpufreq_policy *unused, const char *buf, size_t count) @@ -198,6 +266,27 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, return count; } +static ssize_t store_powersave_bias(struct cpufreq_policy *unused, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + if (input > 1000) + input = 1000; + + mutex_lock(&dbs_mutex); + dbs_tuners_ins.powersave_bias = input; + ondemand_powersave_bias_init(); + mutex_unlock(&dbs_mutex); + + return count; +} + #define define_one_rw(_name) \ static struct freq_attr _name = \ __ATTR(_name, 0644, show_##_name, store_##_name) @@ -205,6 +294,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name) define_one_rw(sampling_rate); define_one_rw(up_threshold); define_one_rw(ignore_nice_load); +define_one_rw(powersave_bias); static struct attribute * dbs_attributes[] = { &sampling_rate_max.attr, @@ -212,6 +302,7 @@ static struct attribute * dbs_attributes[] = { &sampling_rate.attr, &up_threshold.attr, &ignore_nice_load.attr, + &powersave_bias.attr, NULL }; @@ -234,6 +325,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) if (!this_dbs_info->enable) return; + this_dbs_info->freq_lo = 0; policy = this_dbs_info->cur_policy; cur_jiffies = jiffies64_to_cputime64(get_jiffies_64()); total_ticks = (unsigned int) cputime64_sub(cur_jiffies, @@ -274,11 +366,18 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) /* Check for frequency increase */ if (load > dbs_tuners_ins.up_threshold) { /* if we are already at full speed then break out early */ - if (policy->cur == policy->max) - return; + if (!dbs_tuners_ins.powersave_bias) { + if (policy->cur == policy->max) + return; - __cpufreq_driver_target(policy, policy->max, - CPUFREQ_RELATION_H); + __cpufreq_driver_target(policy, policy->max, + CPUFREQ_RELATION_H); + } else { + int freq = powersave_bias_target(policy, policy->max, + CPUFREQ_RELATION_H); + __cpufreq_driver_target(policy, freq, + CPUFREQ_RELATION_L); + } return; } @@ -293,14 +392,23 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) * policy. To be safe, we focus 10 points under the threshold. */ if (load < (dbs_tuners_ins.up_threshold - 10)) { - unsigned int freq_next; - freq_next = (policy->cur * load) / + unsigned int freq_next = (policy->cur * load) / (dbs_tuners_ins.up_threshold - 10); - - __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); + if (!dbs_tuners_ins.powersave_bias) { + __cpufreq_driver_target(policy, freq_next, + CPUFREQ_RELATION_L); + } else { + int freq = powersave_bias_target(policy, freq_next, + CPUFREQ_RELATION_L); + __cpufreq_driver_target(policy, freq, + CPUFREQ_RELATION_L); + } } } +/* Sampling types */ +enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; + static void do_dbs_timer(void *data) { unsigned int cpu = smp_processor_id(); @@ -311,10 +419,24 @@ static void do_dbs_timer(void *data) if (!dbs_info->enable) return; - - lock_cpu_hotplug(); - dbs_check_cpu(dbs_info); - unlock_cpu_hotplug(); + /* Common NORMAL_SAMPLE setup */ + INIT_WORK(&dbs_info->work, do_dbs_timer, (void *)DBS_NORMAL_SAMPLE); + if (!dbs_tuners_ins.powersave_bias || + (unsigned long) data == DBS_NORMAL_SAMPLE) { + lock_cpu_hotplug(); + dbs_check_cpu(dbs_info); + unlock_cpu_hotplug(); + if (dbs_info->freq_lo) { + /* Setup timer for SUB_SAMPLE */ + INIT_WORK(&dbs_info->work, do_dbs_timer, + (void *)DBS_SUB_SAMPLE); + delay = dbs_info->freq_hi_jiffies; + } + } else { + __cpufreq_driver_target(dbs_info->cur_policy, + dbs_info->freq_lo, + CPUFREQ_RELATION_H); + } queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); } @@ -325,6 +447,7 @@ static inline void dbs_timer_init(unsigned int cpu) int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); delay -= jiffies % delay; + ondemand_powersave_bias_init(); INIT_WORK(&dbs_info->work, do_dbs_timer, 0); queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); } From 179da8e6e8903a8cdb19bb12672d50dc33f0fde6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=B3=20Bilski?= Date: Tue, 8 Aug 2006 19:12:20 +0200 Subject: [PATCH 04/11] [CPUFREQ] Longhaul - Disable arbiter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ACPI C3 works for "Powersaver" processors, so use it only for them. Older CPU will change frequency on "halt" only. But we can protect transition in two ways: - by ACPI PM2 register, there is "bus master arbiter disable" bit. This isn't tested because VIA mainboards don't have PM2 register, - by PLE133 PCI/AGP arbiter disable register. There are two bits in this register. First is "PCI arbiter disable", second "AGP arbiter disable". This is working on VIA Epia 800 mainboards. Test on bm_control is more proper because this is true when PM2 register exist. Signed-off-by: Rafał Bilski Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/longhaul.c | 86 ++++++++++++++++++------- 1 file changed, 64 insertions(+), 22 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 4f2c3aeef724..83a8793f1db8 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -60,6 +61,7 @@ static int can_scale_voltage; static int vrmrev; static struct acpi_processor *pr = NULL; static struct acpi_processor_cx *cx = NULL; +static int port22_en = 0; /* Module parameters */ static int dont_scale_voltage; @@ -124,10 +126,9 @@ static int longhaul_get_cpu_mult(void) /* For processor with BCR2 MSR */ -static void do_longhaul1(int cx_address, unsigned int clock_ratio_index) +static void do_longhaul1(unsigned int clock_ratio_index) { union msr_bcr2 bcr2; - u32 t; rdmsrl(MSR_VIA_BCR2, bcr2.val); /* Enable software clock multiplier */ @@ -136,13 +137,11 @@ static void do_longhaul1(int cx_address, unsigned int clock_ratio_index) /* Sync to timer tick */ safe_halt(); - ACPI_FLUSH_CPU_CACHE(); /* Change frequency on next halt or sleep */ wrmsrl(MSR_VIA_BCR2, bcr2.val); - /* Invoke C3 */ - inb(cx_address); - /* Dummy op - must do something useless after P_LVL3 read */ - t = inl(acpi_fadt.xpm_tmr_blk.address); + /* Invoke transition */ + ACPI_FLUSH_CPU_CACHE(); + halt(); /* Disable software clock multiplier */ local_irq_disable(); @@ -166,9 +165,9 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) /* Sync to timer tick */ safe_halt(); - ACPI_FLUSH_CPU_CACHE(); /* Change frequency on next halt or sleep */ wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + ACPI_FLUSH_CPU_CACHE(); /* Invoke C3 */ inb(cx_address); /* Dummy op - must do something useless after P_LVL3 read */ @@ -227,10 +226,13 @@ static void longhaul_setstate(unsigned int clock_ratio_index) outb(0xFF,0xA1); /* Overkill */ outb(0xFE,0x21); /* TMR0 only */ - /* Disable bus master arbitration */ - if (pr->flags.bm_check) { + if (pr->flags.bm_control) { + /* Disable bus master arbitration */ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1, ACPI_MTX_DO_NOT_LOCK); + } else if (port22_en) { + /* Disable AGP and PCI arbiters */ + outb(3, 0x22); } switch (longhaul_version) { @@ -244,7 +246,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index) */ case TYPE_LONGHAUL_V1: case TYPE_LONGHAUL_V2: - do_longhaul1(cx->address, clock_ratio_index); + do_longhaul1(clock_ratio_index); break; /* @@ -259,14 +261,20 @@ static void longhaul_setstate(unsigned int clock_ratio_index) * to work in practice. */ case TYPE_POWERSAVER: + /* Don't allow wakeup */ + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0, + ACPI_MTX_DO_NOT_LOCK); do_powersaver(cx->address, clock_ratio_index); break; } - /* Enable bus master arbitration */ - if (pr->flags.bm_check) { + if (pr->flags.bm_control) { + /* Enable bus master arbitration */ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0, ACPI_MTX_DO_NOT_LOCK); + } else if (port22_en) { + /* Enable arbiters */ + outb(0, 0x22); } outb(pic2_mask,0xA1); /* restore mask */ @@ -540,21 +548,33 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle, return 1; } +/* VIA don't support PM2 reg, but have something similar */ +static int enable_arbiter_disable(void) +{ + struct pci_dev *dev; + u8 pci_cmd; + + /* Find PLE133 host bridge */ + dev = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, NULL); + if (dev != NULL) { + /* Enable access to port 0x22 */ + pci_read_config_byte(dev, 0x78, &pci_cmd); + if ( !(pci_cmd & 1<<7) ) { + pci_cmd |= 1<<7; + pci_write_config_byte(dev, 0x78, pci_cmd); + } + return 1; + } + return 0; +} + static int __init longhaul_cpu_init(struct cpufreq_policy *policy) { struct cpuinfo_x86 *c = cpu_data; char *cpuname=NULL; int ret; - /* Check ACPI support for C3 state */ - acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - &longhaul_walk_callback, NULL, (void *)&pr); - if (pr == NULL) goto err_acpi; - - cx = &pr->power.states[ACPI_STATE_C3]; - if (cx->address == 0 || cx->latency > 1000) goto err_acpi; - - /* Now check what we have on this motherboard */ + /* Check what we have on this motherboard */ switch (c->x86_model) { case 6: cpu_model = CPU_SAMUEL; @@ -636,6 +656,28 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) break; }; + /* Find ACPI data for processor */ + acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, + &longhaul_walk_callback, NULL, (void *)&pr); + if (pr == NULL) + goto err_acpi; + + if (longhaul_version == TYPE_POWERSAVER) { + /* Check ACPI support for C3 state */ + cx = &pr->power.states[ACPI_STATE_C3]; + if (cx->address == 0 || cx->latency > 1000) + goto err_acpi; + } else { + /* Check ACPI support for bus master arbiter disable */ + if (!pr->flags.bm_control) { + if (!enable_arbiter_disable()) { + printk(KERN_ERR PFX "No ACPI support. No VT8601 host bridge. Aborting.\n"); + return -ENODEV; + } else + port22_en = 1; + } + } + ret = longhaul_get_ranges(); if (ret != 0) return ret; From 6595413fc9453a211f4b5d5cc42f0bbf3daa615b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=B3=20Bilski?= Date: Sun, 13 Aug 2006 09:16:20 +0200 Subject: [PATCH 05/11] [CPUFREQ] Longhaul - Add ignore_latency option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some laptops with VIA C3 processor, CLE266 chipset and AMI BIOS have incorrect latency values in FADT table. These laptops seems to be C3 capable, but latency values are to big: 101 for C2 and 1017 for C3. This option will allow user to skip C3 latency test but not C3 address test. AMI BIOS is setting C3 address to correct value in DSDT table. Signed-off-by: Rafał Bilski Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/longhaul.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 83a8793f1db8..43bbf948d45d 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -65,7 +65,7 @@ static int port22_en = 0; /* Module parameters */ static int dont_scale_voltage; - +static int ignore_latency = 0; #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) @@ -665,8 +665,10 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) if (longhaul_version == TYPE_POWERSAVER) { /* Check ACPI support for C3 state */ cx = &pr->power.states[ACPI_STATE_C3]; - if (cx->address == 0 || cx->latency > 1000) + if (cx->address == 0 || + (cx->latency > 1000 && ignore_latency == 0) ) goto err_acpi; + } else { /* Check ACPI support for bus master arbiter disable */ if (!pr->flags.bm_control) { @@ -773,6 +775,8 @@ static void __exit longhaul_exit(void) module_param (dont_scale_voltage, int, 0644); MODULE_PARM_DESC(dont_scale_voltage, "Don't scale voltage of processor"); +module_param(ignore_latency, int, 0644); +MODULE_PARM_DESC(ignore_latency, "Skip ACPI C3 latency test"); MODULE_AUTHOR ("Dave Jones "); MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); @@ -780,4 +784,3 @@ MODULE_LICENSE ("GPL"); late_initcall(longhaul_init); module_exit(longhaul_exit); - From b5ecf60fe6b18de0bc59d336d444835d4ef835ed Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 13 Aug 2006 23:00:08 +0200 Subject: [PATCH 06/11] [CPUFREQ] make drivers/cpufreq/cpufreq_ondemand.c:powersave_bias_target() static This patch makes the needlessly global powersave_bias_target() static. Signed-off-by: Adrian Bunk Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 34874c2f1885..5ca2fd5d1ed1 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -105,8 +105,9 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu) * Returns the freq_hi to be used right now and will set freq_hi_jiffies, * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. */ -unsigned int powersave_bias_target(struct cpufreq_policy *policy, - unsigned int freq_next, unsigned int relation) +static unsigned int powersave_bias_target(struct cpufreq_policy *policy, + unsigned int freq_next, + unsigned int relation) { unsigned int freq_req, freq_reduc, freq_avg; unsigned int freq_hi, freq_lo; From 3906f4edeef976c081c4e7bd92164d2f59c325ae Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 5 Sep 2006 17:15:47 -0400 Subject: [PATCH 07/11] [CPUFREQ] Fix sparse warning in ondemand drivers/cpufreq/cpufreq_ondemand.c:323:2: warning: Using plain integer as NULL pointer Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 5ca2fd5d1ed1..bf8aa45d4f01 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -449,7 +449,7 @@ static inline void dbs_timer_init(unsigned int cpu) delay -= jiffies % delay; ondemand_powersave_bias_init(); - INIT_WORK(&dbs_info->work, do_dbs_timer, 0); + INIT_WORK(&dbs_info->work, do_dbs_timer, NULL); queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); } From db44aaf3a2f599163c53ce96658aca688b3466f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=B3=20Bilski?= Date: Wed, 16 Aug 2006 01:07:33 +0200 Subject: [PATCH 08/11] [CPUFREQ] Longhaul - Add voltage scaling to driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename option "dont_scale_voltage" to "scale_voltage" because don't will be default. Use "pos" for calculating voltage. In this way driver don't need to know mV value or low level value. Simply min U is one pos and max U is second pos. All pos between these two are used. Assume that min U is for min f and max U for max f. For frequency between min and max calculate pos based on difference between current frequency and min f. Values in mobile VRM table changed to values from C3-M datasheet. Signed-off-by: Rafał Bilski Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/longhaul.c | 97 ++++++++++++++----------- arch/i386/kernel/cpu/cpufreq/longhaul.h | 48 +++++++++--- 2 files changed, 94 insertions(+), 51 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 43bbf948d45d..f5cc9f5c9bab 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -53,19 +53,26 @@ #define CPU_NEHEMIAH 5 static int cpu_model; -static unsigned int numscales=16, numvscales; +static unsigned int numscales=16; static unsigned int fsb; -static int minvid, maxvid; + +static struct mV_pos *vrm_mV_table; +static unsigned char *mV_vrm_table; +struct f_msr { + unsigned char vrm; +}; +static struct f_msr f_msr_table[32]; + +static unsigned int highest_speed, lowest_speed; /* kHz */ static unsigned int minmult, maxmult; static int can_scale_voltage; -static int vrmrev; static struct acpi_processor *pr = NULL; static struct acpi_processor_cx *cx = NULL; -static int port22_en = 0; +static int port22_en; /* Module parameters */ -static int dont_scale_voltage; -static int ignore_latency = 0; +static int scale_voltage; +static int ignore_latency; #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) @@ -73,7 +80,6 @@ static int ignore_latency = 0; /* Clock ratios multiplied by 10 */ static int clock_ratio[32]; static int eblcr_table[32]; -static int voltage_table[32]; static unsigned int highest_speed, lowest_speed; /* kHz */ static int longhaul_version; static struct cpufreq_frequency_table *longhaul_table; @@ -163,6 +169,11 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; longhaul.bits.EnableSoftBusRatio = 1; + if (can_scale_voltage) { + longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm; + longhaul.bits.EnableSoftVID = 1; + } + /* Sync to timer tick */ safe_halt(); /* Change frequency on next halt or sleep */ @@ -454,53 +465,57 @@ static int __init longhaul_get_ranges(void) static void __init longhaul_setup_voltagescaling(void) { union msr_longhaul longhaul; + struct mV_pos minvid, maxvid; + unsigned int j, speed, pos, kHz_step, numvscales; - rdmsrl (MSR_VIA_LONGHAUL, longhaul.val); - - if (!(longhaul.bits.RevisionID & 1)) + rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); + if (!(longhaul.bits.RevisionID & 1)) { + printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n"); return; + } - minvid = longhaul.bits.MinimumVID; - maxvid = longhaul.bits.MaximumVID; - vrmrev = longhaul.bits.VRMRev; + if (!longhaul.bits.VRMRev) { + printk (KERN_INFO PFX "VRM 8.5\n"); + vrm_mV_table = &vrm85_mV[0]; + mV_vrm_table = &mV_vrm85[0]; + } else { + printk (KERN_INFO PFX "Mobile VRM\n"); + vrm_mV_table = &mobilevrm_mV[0]; + mV_vrm_table = &mV_mobilevrm[0]; + } - if (minvid == 0 || maxvid == 0) { + minvid = vrm_mV_table[longhaul.bits.MinimumVID]; + maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; + numvscales = maxvid.pos - minvid.pos + 1; + kHz_step = (highest_speed - lowest_speed) / numvscales; + + if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " "Voltage scaling disabled.\n", - minvid/1000, minvid%1000, maxvid/1000, maxvid%1000); + minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000); return; } - if (minvid == maxvid) { + if (minvid.mV == maxvid.mV) { printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are " "both %d.%03d. Voltage scaling disabled\n", - maxvid/1000, maxvid%1000); + maxvid.mV/1000, maxvid.mV%1000); return; } - if (vrmrev==0) { - dprintk ("VRM 8.5\n"); - memcpy (voltage_table, vrm85scales, sizeof(voltage_table)); - numvscales = (voltage_table[maxvid]-voltage_table[minvid])/25; - } else { - dprintk ("Mobile VRM\n"); - memcpy (voltage_table, mobilevrmscales, sizeof(voltage_table)); - numvscales = (voltage_table[maxvid]-voltage_table[minvid])/5; + printk(KERN_INFO PFX "Max VID=%d.%03d Min VID=%d.%03d, %d possible voltage scales\n", + maxvid.mV/1000, maxvid.mV%1000, + minvid.mV/1000, minvid.mV%1000, + numvscales); + + j = 0; + while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { + speed = longhaul_table[j].frequency; + pos = (speed - lowest_speed) / kHz_step + minvid.pos; + f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos]; + j++; } - /* Current voltage isn't readable at first, so we need to - set it to a known value. The spec says to use maxvid */ - longhaul.bits.RevisionKey = longhaul.bits.RevisionID; /* FIXME: This is bad. */ - longhaul.bits.EnableSoftVID = 1; - longhaul.bits.SoftVID = maxvid; - wrmsrl (MSR_VIA_LONGHAUL, longhaul.val); - - minvid = voltage_table[minvid]; - maxvid = voltage_table[maxvid]; - - dprintk ("Min VID=%d.%03d Max VID=%d.%03d, %d possible voltage scales\n", - maxvid/1000, maxvid%1000, minvid/1000, minvid%1000, numvscales); - can_scale_voltage = 1; } @@ -685,7 +700,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) return ret; if ((longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) && - (dont_scale_voltage==0)) + (scale_voltage != 0)) longhaul_setup_voltagescaling(); policy->governor = CPUFREQ_DEFAULT_GOVERNOR; @@ -773,8 +788,8 @@ static void __exit longhaul_exit(void) kfree(longhaul_table); } -module_param (dont_scale_voltage, int, 0644); -MODULE_PARM_DESC(dont_scale_voltage, "Don't scale voltage of processor"); +module_param (scale_voltage, int, 0644); +MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); module_param(ignore_latency, int, 0644); MODULE_PARM_DESC(ignore_latency, "Skip ACPI C3 latency test"); diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h index d3a95d77ee85..bc4682aad69b 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.h +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h @@ -450,17 +450,45 @@ static int __initdata nehemiah_c_eblcr[32] = { * Voltage scales. Div/Mod by 1000 to get actual voltage. * Which scale to use depends on the VRM type in use. */ -static int __initdata vrm85scales[32] = { - 1250, 1200, 1150, 1100, 1050, 1800, 1750, 1700, - 1650, 1600, 1550, 1500, 1450, 1400, 1350, 1300, - 1275, 1225, 1175, 1125, 1075, 1825, 1775, 1725, - 1675, 1625, 1575, 1525, 1475, 1425, 1375, 1325, + +struct mV_pos { + unsigned short mV; + unsigned short pos; }; -static int __initdata mobilevrmscales[32] = { - 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, - 1600, 1550, 1500, 1450, 1500, 1350, 1300, -1, - 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, - 1075, 1050, 1025, 1000, 975, 950, 925, -1, +static struct mV_pos __initdata vrm85_mV[32] = { + {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, + {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, + {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, + {1450, 16}, {1400, 14}, {1350, 12}, {1300, 10}, + {1275, 9}, {1225, 7}, {1175, 5}, {1125, 3}, + {1075, 1}, {1825, 31}, {1775, 29}, {1725, 27}, + {1675, 25}, {1625, 23}, {1575, 21}, {1525, 19}, + {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} +}; + +static unsigned char __initdata mV_vrm85[32] = { + 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, + 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, + 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, + 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 +}; + +static struct mV_pos __initdata mobilevrm_mV[32] = { + {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, + {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, + {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, + {1150, 19}, {1100, 18}, {1050, 17}, {1000, 16}, + {975, 15}, {950, 14}, {925, 13}, {900, 12}, + {875, 11}, {850, 10}, {825, 9}, {800, 8}, + {775, 7}, {750, 6}, {725, 5}, {700, 4}, + {675, 3}, {650, 2}, {625, 1}, {600, 0} +}; + +static unsigned char __initdata mV_mobilevrm[32] = { + 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, + 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, + 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 }; From 8adcc0c674004c0f9467031a93dc639c2b01411f Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 1 Sep 2006 14:02:24 -0700 Subject: [PATCH 09/11] [CPUFREQ] Workaround for BIOS bug in software coordination of frequency Some buggy BIOSes do a "software any" kind of coordination without telling about it to OS. So, when OS sets frequency on one CPU on these platforms, it will also impact all the other logical CPUs that are in the same power domain. Attached patch is a workaround for those buggy BIOSes. Patch should be a noop on the normal non-buggy platforms. Applies over previously sent acpi-cpufreq and software coordination bug fix patch Signed-off-by: Denis Sadykov Signed-off-by: Venkatesh Pallipadi Signed-off-by: Alexey Starikovskiy Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c | 39 ++++++++++++++++- .../kernel/cpu/cpufreq/speedstep-centrino.c | 42 ++++++++++++++++++- 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index e6ea00edcb54..ea19d091fd41 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -32,6 +32,7 @@ #include #include #include /* current */ +#include #include #include #include @@ -387,6 +388,33 @@ static int acpi_cpufreq_early_init_acpi(void) return acpi_processor_preregister_performance(acpi_perf_data); } +/* + * Some BIOSes do SW_ANY coordination internally, either set it up in hw + * or do it in BIOS firmware and won't inform about it to OS. If not + * detected, this has a side effect of making CPU run at a different speed + * than OS intended it to run at. Detect it and handle it cleanly. + */ +static int bios_with_sw_any_bug; + +static int __init sw_any_bug_found(struct dmi_system_id *d) +{ + bios_with_sw_any_bug = 1; + return 0; +} + +static struct dmi_system_id __initdata sw_any_bug_dmi_table[] = { + { + .callback = sw_any_bug_found, + .ident = "Supermicro Server X6DLP", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), + DMI_MATCH(DMI_BIOS_VERSION, "080010"), + DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), + }, + }, + { } +}; + static int acpi_cpufreq_cpu_init ( struct cpufreq_policy *policy) @@ -422,8 +450,17 @@ acpi_cpufreq_cpu_init ( * coordination is required. */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || - policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) + policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { policy->cpus = perf->shared_cpu_map; + } + +#ifdef CONFIG_SMP + dmi_check_system(sw_any_bug_dmi_table); + if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { + policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; + policy->cpus = cpu_core_map[cpu]; + } +#endif if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index b77f1358bd79..dba6bb28d298 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -23,6 +23,7 @@ #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI #include +#include #include #endif @@ -377,6 +378,35 @@ static int centrino_cpu_early_init_acpi(void) return 0; } + +/* + * Some BIOSes do SW_ANY coordination internally, either set it up in hw + * or do it in BIOS firmware and won't inform about it to OS. If not + * detected, this has a side effect of making CPU run at a different speed + * than OS intended it to run at. Detect it and handle it cleanly. + */ +static int bios_with_sw_any_bug; +static int __init sw_any_bug_found(struct dmi_system_id *d) +{ + bios_with_sw_any_bug = 1; + return 0; +} + + +static struct dmi_system_id __initdata sw_any_bug_dmi_table[] = { + { + .callback = sw_any_bug_found, + .ident = "Supermicro Server X6DLP", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), + DMI_MATCH(DMI_BIOS_VERSION, "080010"), + DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), + }, + }, + { } +}; + + /* * centrino_cpu_init_acpi - register with ACPI P-States library * @@ -398,14 +428,24 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) dprintk(PFX "obtaining ACPI data failed\n"); return -EIO; } + policy->shared_type = p->shared_type; /* * Will let policy->cpus know about dependency only when software * coordination is required. */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || - policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) + policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { policy->cpus = p->shared_cpu_map; + } + +#ifdef CONFIG_SMP + dmi_check_system(sw_any_bug_dmi_table); + if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { + policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; + policy->cpus = cpu_core_map[cpu]; + } +#endif /* verify the acpi_data */ if (p->state_count <= 1) { From ddad65df0048e210c93640b59b3bad12701febb6 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 22 Sep 2006 19:15:23 -0400 Subject: [PATCH 10/11] [CPUFREQ] Fix some more CPU hotplug locking. Lukewarm IQ detected in hotplug locking BUG: warning at kernel/cpu.c:38/lock_cpu_hotplug() [] lock_cpu_hotplug+0x42/0x65 [] cpufreq_update_policy+0x25/0xad [] kprobe_flush_task+0x18/0x40 [] schedule+0x63f/0x68b [] __link_module+0x0/0x1f [] __cond_resched+0x16/0x34 [] cond_resched+0x26/0x31 [] wait_for_completion+0x17/0xb1 [] cpufreq_stat_cpu_callback+0x13/0x20 [cpufreq_stats] [] cpufreq_stats_init+0x74/0x8b [cpufreq_stats] [] sys_init_module+0x91/0x174 [] sysenter_past_esp+0x56/0x79 As there are other places that call cpufreq_update_policy without the hotplug lock, it seems better to keep the hotplug locking at the lower level for the time being until this is revamped. Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_stats.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 25eee5394201..c2ecc599dc5f 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -350,12 +350,10 @@ __init cpufreq_stats_init(void) } register_hotcpu_notifier(&cpufreq_stat_cpu_notifier); - lock_cpu_hotplug(); for_each_online_cpu(cpu) { cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, CPU_ONLINE, (void *)(long)cpu); } - unlock_cpu_hotplug(); return 0; } static void From 24669f7d00d387799fc6a39452ab22d7f078f043 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 12 Sep 2006 18:55:53 -0700 Subject: [PATCH 11/11] [CPUFREQ] sw_any_bug_dmi_table can be used on resume, so it isn't initdata sw_any_bug_dmi_table can be used on resume, so it isn't initdata. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index dba6bb28d298..7a9325349e94 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -393,7 +393,7 @@ static int __init sw_any_bug_found(struct dmi_system_id *d) } -static struct dmi_system_id __initdata sw_any_bug_dmi_table[] = { +static struct dmi_system_id sw_any_bug_dmi_table[] = { { .callback = sw_any_bug_found, .ident = "Supermicro Server X6DLP",