From 69bb2600c9f8ca450fede9633edf9c2513c9ee6f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 15:33:18 +0100 Subject: [PATCH 1/6] cputime: Fix nsecs_to_cputime() return type cast Even though nsec based cputime_t maps to u64, nsecs_to_cputime() must return a cputime_t value. We want to enforce this kind of cast in order to track down buggy manipulations of cputime_t such as direct access of its values under wrong assumptions on its backend type (nsecs, jiffies, etc...) by core code. Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker --- include/asm-generic/cputime_nsecs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index 2c9e62c2bfd0..768294f8bf85 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -44,7 +44,8 @@ typedef u64 __nocast cputime64_t; /* * Convert cputime <-> nanoseconds */ -#define nsecs_to_cputime(__nsecs) ((__force u64)(__nsecs)) +#define nsecs_to_cputime(__nsecs) \ + (__force cputime_t)(__nsecs) /* From bfc3f0281e08066fa8111c3972cff6edc1049864 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 16:33:42 +0100 Subject: [PATCH 2/6] cputime: Default implementation of nsecs -> cputime conversion The architectures that override cputime_t (s390, ppc) don't provide any version of nsecs_to_cputime(). Indeed this cputime_t implementation by backend only happens when CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y under which the core code doesn't make any use of nsecs_to_cputime(). At least for now. We are going to make a broader use of it so lets provide a default version with a per usecs granularity. It should be good enough for most usecases. Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker --- drivers/cpufreq/cpufreq_stats.c | 2 +- drivers/s390/cio/cio.c | 2 +- fs/proc/stat.c | 2 +- fs/proc/uptime.c | 2 +- include/linux/cputime.h | 11 +++++++++++ include/linux/kernel_stat.h | 2 +- include/linux/sched.h | 2 +- 7 files changed, 17 insertions(+), 6 deletions(-) create mode 100644 include/linux/cputime.h diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 5793e1447fb1..79911a27a48a 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include static spinlock_t cpufreq_stats_lock; diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 88e35d85d205..5154513de112 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 6f599c62f0cc..9d231e9e5f0e 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #ifndef arch_irq_stat_cpu diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 7141b8d0ca9e..33de567c25af 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include static int uptime_proc_show(struct seq_file *m, void *v) { diff --git a/include/linux/cputime.h b/include/linux/cputime.h new file mode 100644 index 000000000000..2842ebe2844d --- /dev/null +++ b/include/linux/cputime.h @@ -0,0 +1,11 @@ +#ifndef __LINUX_CPUTIME_H +#define __LINUX_CPUTIME_H + +#include + +#ifndef nsecs_to_cputime +# define nsecs_to_cputime(__nsecs) \ + usecs_to_cputime((__nsecs) / NSEC_PER_USEC) +#endif + +#endif /* __LINUX_CPUTIME_H */ diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 51c72be4a7c3..d7c61317db86 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include /* * 'kernel_stat.h' contains the definitions needed for doing diff --git a/include/linux/sched.h b/include/linux/sched.h index 68a0e84463a0..1ac566c48d3d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -27,7 +27,7 @@ struct sched_param { #include #include -#include +#include #include #include From d8a9ce3f8ad2b546b9ebaf65de809da0793f11c5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 16:22:37 +0100 Subject: [PATCH 3/6] cputime: Bring cputime -> nsecs conversion We already have nsecs_to_cputime(). Now we need to be able to convert the other way around in order to fix a bug on steal time accounting. Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker --- include/asm-generic/cputime_jiffies.h | 4 +++- include/asm-generic/cputime_nsecs.h | 2 ++ include/linux/cputime.h | 5 +++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/asm-generic/cputime_jiffies.h b/include/asm-generic/cputime_jiffies.h index 272ecba9f588..d5cb78f53986 100644 --- a/include/asm-generic/cputime_jiffies.h +++ b/include/asm-generic/cputime_jiffies.h @@ -15,8 +15,10 @@ typedef u64 __nocast cputime64_t; /* - * Convert nanoseconds to cputime + * Convert nanoseconds <-> cputime */ +#define cputime_to_nsecs(__ct) \ + jiffies_to_nsecs(cputime_to_jiffies(__ct)) #define nsecs_to_cputime64(__nsec) \ jiffies64_to_cputime64(nsecs_to_jiffies64(__nsec)) #define nsecs_to_cputime(__nsec) \ diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index 768294f8bf85..4e817606c549 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -44,6 +44,8 @@ typedef u64 __nocast cputime64_t; /* * Convert cputime <-> nanoseconds */ +#define cputime_to_nsecs(__ct) \ + (__force u64)(__ct) #define nsecs_to_cputime(__nsecs) \ (__force cputime_t)(__nsecs) diff --git a/include/linux/cputime.h b/include/linux/cputime.h index 2842ebe2844d..f2eb2ee535ca 100644 --- a/include/linux/cputime.h +++ b/include/linux/cputime.h @@ -3,6 +3,11 @@ #include +#ifndef cputime_to_nsecs +# define cputime_to_nsecs(__ct) \ + (cputime_to_usecs(__ct) * NSEC_PER_USEC) +#endif + #ifndef nsecs_to_cputime # define nsecs_to_cputime(__nsecs) \ usecs_to_cputime((__nsecs) / NSEC_PER_USEC) From dee08a72deefac251267ed2717717596aa8b6818 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 17:02:22 +0100 Subject: [PATCH 4/6] cputime: Fix jiffies based cputime assumption on steal accounting The steal guest time accounting code assumes that cputime_t is based on jiffies. So when CONFIG_NO_HZ_FULL=y, which implies that cputime_t is based on nsecs, steal_account_process_tick() passes the delta in jiffies to account_steal_time() which then accounts it as if it's a value in nsecs. As a result, accounting 1 second of steal time (with HZ=100 that would be 100 jiffies) is spuriously accounted as 100 nsecs. As such /proc/stat may report 0 values of steal time even when two guests have run concurrently for a few seconds on the same host and same CPU. In order to fix this, lets convert the nsecs based steal delta to cputime instead of jiffies by using the right conversion API. Given that the steal time is stored in cputime_t and this type can have a smaller granularity than nsecs, we only account the rounded converted value and leave the remaining nsecs for the next deltas. Reported-by: Huiqingding Reported-by: Marcelo Tosatti Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker --- kernel/sched/cputime.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 99947919e30b..c91b09770ebd 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -258,16 +258,22 @@ static __always_inline bool steal_account_process_tick(void) { #ifdef CONFIG_PARAVIRT if (static_key_false(¶virt_steal_enabled)) { - u64 steal, st = 0; + u64 steal; + cputime_t steal_ct; steal = paravirt_steal_clock(smp_processor_id()); steal -= this_rq()->prev_steal_time; - st = steal_ticks(steal); - this_rq()->prev_steal_time += st * TICK_NSEC; + /* + * cputime_t may be less precise than nsecs (eg: if it's + * based on jiffies). Lets cast the result to cputime + * granularity and account the rest on the next rounds. + */ + steal_ct = nsecs_to_cputime(steal); + this_rq()->prev_steal_time += cputime_to_nsecs(steal_ct); - account_steal_time(st); - return st; + account_steal_time(steal_ct); + return steal_ct; } #endif return false; From 300a9d887ea221f344962506f724e02101bacc08 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 17:05:57 +0100 Subject: [PATCH 5/6] sched: Remove needless round trip nsecs <-> tick conversion of steal time When update_rq_clock_task() accounts the pending steal time for a task, it converts the steal delta from nsecs to tick then from tick to nsecs. There is no apparent good reason for doing that though because both the task clock and the prev steal delta are u64 and store values in nsecs. So lets remove the needless conversion. Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker --- kernel/sched/core.c | 6 ------ kernel/sched/sched.h | 10 ---------- 2 files changed, 16 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b46131ef6aab..b14a188af898 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -823,19 +823,13 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) #endif #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING if (static_key_false((¶virt_steal_rq_enabled))) { - u64 st; - steal = paravirt_steal_clock(cpu_of(rq)); steal -= rq->prev_steal_time_rq; if (unlikely(steal > delta)) steal = delta; - st = steal_ticks(steal); - steal = st * TICK_NSEC; - rq->prev_steal_time_rq += steal; - delta -= steal; } #endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c2119fd20f8b..5ec991010122 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1214,16 +1214,6 @@ extern void update_idle_cpu_load(struct rq *this_rq); extern void init_task_runnable_average(struct task_struct *p); -#ifdef CONFIG_PARAVIRT -static inline u64 steal_ticks(u64 steal) -{ - if (unlikely(steal > NSEC_PER_SEC)) - return div_u64(steal, TICK_NSEC); - - return __iter_div_u64_rem(steal, TICK_NSEC, &steal); -} -#endif - static inline void inc_nr_running(struct rq *rq) { rq->nr_running++; From 073d8224d299528778e90773becd1e890953443c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 17:24:12 +0100 Subject: [PATCH 6/6] arch: Remove stub cputime.h headers Many architectures have a stub cputime.h that only include the default cputime.h Lets remove the useless headers, we only need to mention that we want the default headers on the Kbuild files. Cc: Archs Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker --- arch/alpha/include/asm/Kbuild | 1 + arch/alpha/include/asm/cputime.h | 6 ------ arch/cris/include/asm/Kbuild | 1 + arch/cris/include/asm/cputime.h | 6 ------ arch/frv/include/asm/Kbuild | 1 + arch/frv/include/asm/cputime.h | 6 ------ arch/m32r/include/asm/Kbuild | 1 + arch/m32r/include/asm/cputime.h | 6 ------ arch/microblaze/include/asm/Kbuild | 1 + arch/microblaze/include/asm/cputime.h | 1 - arch/mn10300/include/asm/Kbuild | 1 + arch/mn10300/include/asm/cputime.h | 1 - arch/score/include/asm/Kbuild | 1 + arch/score/include/asm/cputime.h | 6 ------ arch/x86/include/asm/Kbuild | 1 + arch/x86/include/asm/cputime.h | 1 - 16 files changed, 8 insertions(+), 33 deletions(-) delete mode 100644 arch/alpha/include/asm/cputime.h delete mode 100644 arch/cris/include/asm/cputime.h delete mode 100644 arch/frv/include/asm/cputime.h delete mode 100644 arch/m32r/include/asm/cputime.h delete mode 100644 arch/microblaze/include/asm/cputime.h delete mode 100644 arch/mn10300/include/asm/cputime.h delete mode 100644 arch/score/include/asm/cputime.h delete mode 100644 arch/x86/include/asm/cputime.h diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 7736f426ff3b..96e54bed5088 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += mcs_spinlock.h diff --git a/arch/alpha/include/asm/cputime.h b/arch/alpha/include/asm/cputime.h deleted file mode 100644 index 19577fd93230..000000000000 --- a/arch/alpha/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ALPHA_CPUTIME_H -#define __ALPHA_CPUTIME_H - -#include - -#endif /* __ALPHA_CPUTIME_H */ diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 056027f38351..afff5105909d 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -5,6 +5,7 @@ header-y += arch-v32/ generic-y += barrier.h generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += kvm_para.h diff --git a/arch/cris/include/asm/cputime.h b/arch/cris/include/asm/cputime.h deleted file mode 100644 index 4446a65656fa..000000000000 --- a/arch/cris/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __CRIS_CPUTIME_H -#define __CRIS_CPUTIME_H - -#include - -#endif /* __CRIS_CPUTIME_H */ diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index babb9338ebf8..87b95eb8aee5 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -1,5 +1,6 @@ generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += mcs_spinlock.h diff --git a/arch/frv/include/asm/cputime.h b/arch/frv/include/asm/cputime.h deleted file mode 100644 index f6c373ad2b80..000000000000 --- a/arch/frv/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_CPUTIME_H -#define _ASM_CPUTIME_H - -#include - -#endif /* _ASM_CPUTIME_H */ diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index 5825a35b2c56..67779a74b62d 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -1,5 +1,6 @@ generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += mcs_spinlock.h diff --git a/arch/m32r/include/asm/cputime.h b/arch/m32r/include/asm/cputime.h deleted file mode 100644 index 0a47550df2b7..000000000000 --- a/arch/m32r/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __M32R_CPUTIME_H -#define __M32R_CPUTIME_H - -#include - -#endif /* __M32R_CPUTIME_H */ diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 1f590ab8f323..c98ed95c0541 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += barrier.h generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += mcs_spinlock.h diff --git a/arch/microblaze/include/asm/cputime.h b/arch/microblaze/include/asm/cputime.h deleted file mode 100644 index 6d68ad7e0ea3..000000000000 --- a/arch/microblaze/include/asm/cputime.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index cbc6b9bf45da..654d5ba6e310 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += barrier.h generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += mcs_spinlock.h diff --git a/arch/mn10300/include/asm/cputime.h b/arch/mn10300/include/asm/cputime.h deleted file mode 100644 index 6d68ad7e0ea3..000000000000 --- a/arch/mn10300/include/asm/cputime.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index 4630cf217b5b..2f947aba4bd4 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -4,6 +4,7 @@ header-y += generic-y += barrier.h generic-y += clkdev.h +generic-y += cputime.h generic-y += hash.h generic-y += mcs_spinlock.h generic-y += preempt.h diff --git a/arch/score/include/asm/cputime.h b/arch/score/include/asm/cputime.h deleted file mode 100644 index 1fced99f0d67..000000000000 --- a/arch/score/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_SCORE_CPUTIME_H -#define _ASM_SCORE_CPUTIME_H - -#include - -#endif /* _ASM_SCORE_CPUTIME_H */ diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index a8fee078b92f..4acddc43ee0c 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -5,4 +5,5 @@ genhdr-y += unistd_64.h genhdr-y += unistd_x32.h generic-y += clkdev.h +generic-y += cputime.h generic-y += mcs_spinlock.h diff --git a/arch/x86/include/asm/cputime.h b/arch/x86/include/asm/cputime.h deleted file mode 100644 index 6d68ad7e0ea3..000000000000 --- a/arch/x86/include/asm/cputime.h +++ /dev/null @@ -1 +0,0 @@ -#include