From 3424dabb6508d538e9ec1a2aa889fefbd83df2d0 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 27 Feb 2013 14:05:53 +0000 Subject: [PATCH 01/18] metag: perf: fix core internal / perf channel mux The value written to the PERF_ICOREx or PERF_CHANx register to select the performance events for the core internal and perf channel events was (tmp & 0x0f), but tmp was set to (config & 0xf0) so it would always be 0. Correct it to use config instead of tmp. Signed-off-by: James Hogan Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- arch/metag/kernel/perf/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c index a876d5ff3897..f38bf6d4dc55 100644 --- a/arch/metag/kernel/perf/perf_event.c +++ b/arch/metag/kernel/perf/perf_event.c @@ -634,7 +634,7 @@ static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx) break; } - metag_out32((tmp & 0x0f), perf_addr); + metag_out32((config & 0x0f), perf_addr); /* * Now we use the high nibble as the performance event to From c43ca04b5e7854b3996f84a495e4553941e76266 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 27 Feb 2013 16:16:38 +0000 Subject: [PATCH 02/18] metag: perf: fix wrap handling in delta calculation When calculating the delta, mask with MAX_PERIOD (24 bits) to handle wrapping, which particularly happens with periodic sampling since the value is intentionally set so that it will overflow soon. Signed-off-by: James Hogan Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- arch/metag/kernel/perf/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c index f38bf6d4dc55..8096db2a550b 100644 --- a/arch/metag/kernel/perf/perf_event.c +++ b/arch/metag/kernel/perf/perf_event.c @@ -211,7 +211,7 @@ again: /* * Calculate the delta and add it to the counter. */ - delta = new_raw_count - prev_raw_count; + delta = (new_raw_count - prev_raw_count) & MAX_PERIOD; local64_add(delta, &event->count); } From db59932f62386cdfd8510c27a83118c5e915e9ea Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 27 Feb 2013 16:16:38 +0000 Subject: [PATCH 03/18] metag: perf: fixes for interrupting perf counters The overflow handler needs to read modify write when re-enabling the counter so as not to change the counter value as it may have been changed to ready the next interrupt on overflow. Similarly for interrupting counters metag_pmu_enable_counter needs to leave the counter value unchanged rather than resetting it to zero. Signed-off-by: James Hogan Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- arch/metag/kernel/perf/perf_event.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c index 8096db2a550b..a00f527eade5 100644 --- a/arch/metag/kernel/perf/perf_event.c +++ b/arch/metag/kernel/perf/perf_event.c @@ -643,13 +643,15 @@ static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx) config = tmp >> 4; } - /* - * Enabled counters start from 0. Early cores clear the count on - * write but newer cores don't, so we make sure that the count is - * set to 0. - */ tmp = ((config & 0xf) << 28) | ((1 << 24) << cpu_2_hwthread_id[get_cpu()]); + if (metag_pmu->max_period) + /* + * Cores supporting overflow interrupts may have had the counter + * set to a specific value that needs preserving. + */ + tmp |= metag_in32(PERF_COUNT(idx)) & 0x00ffffff; + metag_out32(tmp, PERF_COUNT(idx)); unlock: raw_spin_unlock_irqrestore(&events->pmu_lock, flags); @@ -764,10 +766,16 @@ static irqreturn_t metag_pmu_counter_overflow(int irq, void *dev) /* * Enable the counter again once core overflow processing has - * completed. + * completed. Note the counter value may have been modified while it was + * inactive to set it up ready for the next interrupt. */ - if (!perf_event_overflow(event, &sampledata, regs)) + if (!perf_event_overflow(event, &sampledata, regs)) { + __global_lock2(flags); + counter = (counter & 0xff000000) | + (metag_in32(PERF_COUNT(idx)) & 0x00ffffff); metag_out32(counter, PERF_COUNT(idx)); + __global_unlock2(flags); + } return IRQ_HANDLED; } From c6ac1e6edacc7e1fb0405d61f95a797c6a712411 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 27 Feb 2013 16:16:38 +0000 Subject: [PATCH 04/18] metag: perf: add missing prev_count updates The prev_count needs setting when changing the counter value, otherwise the calculated delta will be wrong, which for frequency sampling (dynamic period sampling) results in sampling at too high a frequency. For non-interrupting performance counters it should also be cleared when enabling the counter since the write to the PERF_COUNT register will clear the perf counter. This also includes a minor change to remove the u64 cast from the metag_pmu->write() call as metag_pmu->write() takes a u32 anyway, and in any case GCC is smart enough to optimise away the cast. Signed-off-by: James Hogan Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- arch/metag/kernel/perf/perf_event.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c index a00f527eade5..5bf984feaaa1 100644 --- a/arch/metag/kernel/perf/perf_event.c +++ b/arch/metag/kernel/perf/perf_event.c @@ -240,8 +240,10 @@ int metag_pmu_event_set_period(struct perf_event *event, if (left > (s64)metag_pmu->max_period) left = metag_pmu->max_period; - if (metag_pmu->write) - metag_pmu->write(idx, (u64)(-left) & MAX_PERIOD); + if (metag_pmu->write) { + local64_set(&hwc->prev_count, -(s32)left); + metag_pmu->write(idx, -left & MAX_PERIOD); + } perf_event_update_userpage(event); @@ -651,6 +653,12 @@ static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx) * set to a specific value that needs preserving. */ tmp |= metag_in32(PERF_COUNT(idx)) & 0x00ffffff; + else + /* + * Older cores reset the counter on write, so prev_count needs + * resetting too so we can calculate a correct delta. + */ + local64_set(&event->prev_count, 0); metag_out32(tmp, PERF_COUNT(idx)); unlock: From 2033dc54e6fffac01f6129c0038c2e78102cf59a Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 27 Feb 2013 16:16:38 +0000 Subject: [PATCH 05/18] metag: perf: fix frequency sampling (dynamic period) Frequency sampling mode dynamically adjusts the sample period so as to hit a particular frequency of samples. The sample period starts at just 1 and then gets increased if the interrupt rate is too high. This changed sample period needs handling in metag_pmu_event_set_period to update period_left (as the ARM equivalent does). The calculated delta also needs subtracting from period_left in metag_pmu_event_update in order to hit the conditional blocks in metag_pmu_event_set_period which update last_period (which is used in the dynamic sampling period calculation). Signed-off-by: James Hogan Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- arch/metag/kernel/perf/perf_event.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c index 5bf984feaaa1..6210126de78a 100644 --- a/arch/metag/kernel/perf/perf_event.c +++ b/arch/metag/kernel/perf/perf_event.c @@ -214,6 +214,7 @@ again: delta = (new_raw_count - prev_raw_count) & MAX_PERIOD; local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); } int metag_pmu_event_set_period(struct perf_event *event, @@ -223,6 +224,10 @@ int metag_pmu_event_set_period(struct perf_event *event, s64 period = hwc->sample_period; int ret = 0; + /* The period may have been changed */ + if (unlikely(period != hwc->last_period)) + left += period - hwc->last_period; + if (unlikely(left <= -period)) { left = period; local64_set(&hwc->period_left, left); From 9344de1b1c64b2217f3b15508266deff2a8d6c84 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 27 Feb 2013 16:48:42 +0000 Subject: [PATCH 06/18] metag: perf: use hard_processor_id() to get thread Use hard_processor_id() to get the current thread number rather than get_cpu() and the hardware thread mapping. There was no matching put_cpu(), and in any case this should be slightly more efficient. Signed-off-by: James Hogan Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- arch/metag/kernel/perf/perf_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c index 6210126de78a..54fde35b4b9c 100644 --- a/arch/metag/kernel/perf/perf_event.c +++ b/arch/metag/kernel/perf/perf_event.c @@ -22,9 +22,9 @@ #include #include -#include #include #include +#include #include "perf_event.h" @@ -651,7 +651,7 @@ static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx) } tmp = ((config & 0xf) << 28) | - ((1 << 24) << cpu_2_hwthread_id[get_cpu()]); + ((1 << 24) << hard_processor_id()); if (metag_pmu->max_period) /* * Cores supporting overflow interrupts may have had the counter From 1fb4dc5c39af941d3abc597337e0ea776bfce0f2 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 27 Feb 2013 21:54:51 +0000 Subject: [PATCH 07/18] metag: perf: don't reset TXTACTCYC The thread active cycle counter TXTACTCYC is used in __delay so it shouldn't really be reset to zero by perf. Fix perf to just read the value, and instead of clearing it, record the prev_count value in enable_counter so that the delta calculations know about the previous value. Signed-off-by: James Hogan Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- arch/metag/kernel/perf/perf_event.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c index 54fde35b4b9c..a1eff3687e8e 100644 --- a/arch/metag/kernel/perf/perf_event.c +++ b/arch/metag/kernel/perf/perf_event.c @@ -617,9 +617,7 @@ static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx) WARN_ONCE((config != 0x100), "invalid configuration (%d) for counter (%d)\n", config, idx); - - /* Reset the cycle count */ - __core_reg_set(TXTACTCYC, 0); + local64_set(&event->prev_count, __core_reg_get(TXTACTCYC)); goto unlock; } @@ -708,9 +706,8 @@ static u64 metag_pmu_read_counter(int idx) { u32 tmp = 0; - /* The act of reading the cycle counter also clears it */ if (METAG_INST_COUNTER == idx) { - __core_reg_swap(TXTACTCYC, tmp); + tmp = __core_reg_get(TXTACTCYC); goto out; } From f27086f5dcb0c7e9622f724d5279e4dfe4e844a2 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 21 Feb 2013 15:34:59 +0000 Subject: [PATCH 08/18] metag: perf: prepare for use by oprofile To allow our perf_events code to work with oprofile the PERF_TYPE_RAW event type attribute is implemented, which allows the internal encoding of events to be used externally (this requires some tweaks so that it handles invalid event types more gracefully), and perf_pmu_name() is adjusted to return metag_pmu->name instead of metag_pmu->pmu.name (which is changed to "meta2"). Signed-off-by: James Hogan Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Robert Richter Cc: oprofile-list@lists.sf.net --- arch/metag/kernel/perf/perf_event.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c index a1eff3687e8e..366569425c52 100644 --- a/arch/metag/kernel/perf/perf_event.c +++ b/arch/metag/kernel/perf/perf_event.c @@ -40,10 +40,10 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); /* PMU admin */ const char *perf_pmu_name(void) { - if (metag_pmu) - return metag_pmu->pmu.name; + if (!metag_pmu) + return NULL; - return NULL; + return metag_pmu->name; } EXPORT_SYMBOL_GPL(perf_pmu_name); @@ -171,6 +171,7 @@ static int metag_pmu_event_init(struct perf_event *event) switch (event->attr.type) { case PERF_TYPE_HARDWARE: case PERF_TYPE_HW_CACHE: + case PERF_TYPE_RAW: err = _hw_perf_event_init(event); break; @@ -556,6 +557,10 @@ static int _hw_perf_event_init(struct perf_event *event) if (err) return err; break; + + case PERF_TYPE_RAW: + mapping = attr->config; + break; } /* Return early if the event is unsupported */ @@ -623,7 +628,7 @@ static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx) /* Check for a core internal or performance channel event. */ if (tmp) { - void *perf_addr = (void *)PERF_COUNT(idx); + void *perf_addr; /* * Anything other than a cycle count will write the low- @@ -637,9 +642,14 @@ static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx) case 0xf0: perf_addr = (void *)PERF_CHAN(idx); break; + + default: + perf_addr = NULL; + break; } - metag_out32((config & 0x0f), perf_addr); + if (perf_addr) + metag_out32((config & 0x0f), perf_addr); /* * Now we use the high nibble as the performance event to @@ -848,7 +858,7 @@ static int __init init_hw_perf_events(void) metag_pmu->max_period = 0; } - metag_pmu->name = "Meta 2"; + metag_pmu->name = "meta2"; metag_pmu->version = version; metag_pmu->pmu = pmu; } From 00e6c92304ce38ff48029471c929d31a25e5cf10 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 15 Mar 2013 10:21:56 +0000 Subject: [PATCH 09/18] metag: OProfile support Add OProfile support for metag, using the perf backend, and falling back to generic timer based sampling if perf counter interrupt support is disabled. The oprofile code prepends "metag/" to the perf pmu name to give "metag/meta2" which is more consistent with other oprofile arch names. The backtrace code makes use of for kernel backtracing, and a simple frame pointer walk for userland backtracing. Signed-off-by: James Hogan Cc: Robert Richter Cc: oprofile-list@lists.sf.net --- arch/metag/Kconfig | 4 ++ arch/metag/Makefile | 2 + arch/metag/oprofile/Makefile | 17 +++++++++ arch/metag/oprofile/backtrace.c | 63 +++++++++++++++++++++++++++++++ arch/metag/oprofile/backtrace.h | 6 +++ arch/metag/oprofile/common.c | 66 +++++++++++++++++++++++++++++++++ 6 files changed, 158 insertions(+) create mode 100644 arch/metag/oprofile/Makefile create mode 100644 arch/metag/oprofile/backtrace.c create mode 100644 arch/metag/oprofile/backtrace.h create mode 100644 arch/metag/oprofile/common.c diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index afc8973d1488..b06b41861aac 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -25,6 +25,7 @@ config METAG select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_MOD_ARCH_SPECIFIC + select HAVE_OPROFILE select HAVE_PERF_EVENTS select HAVE_SYSCALL_TRACEPOINTS select IRQ_DOMAIN @@ -209,6 +210,9 @@ config METAG_PERFCOUNTER_IRQS When disabled, Performance Counters information will be collected based on Timer Interrupt. +config HW_PERF_EVENTS + def_bool METAG_PERFCOUNTER_IRQS && PERF_EVENTS + config METAG_DA bool "DA support" help diff --git a/arch/metag/Makefile b/arch/metag/Makefile index 81bd6a1c7483..b566116b171b 100644 --- a/arch/metag/Makefile +++ b/arch/metag/Makefile @@ -49,6 +49,8 @@ core-y += arch/metag/mm/ libs-y += arch/metag/lib/ libs-y += arch/metag/tbx/ +drivers-$(CONFIG_OPROFILE) += arch/metag/oprofile/ + boot := arch/metag/boot boot_targets += uImage diff --git a/arch/metag/oprofile/Makefile b/arch/metag/oprofile/Makefile new file mode 100644 index 000000000000..c9639d4734d6 --- /dev/null +++ b/arch/metag/oprofile/Makefile @@ -0,0 +1,17 @@ +obj-$(CONFIG_OPROFILE) += oprofile.o + +oprofile-core-y += buffer_sync.o +oprofile-core-y += cpu_buffer.o +oprofile-core-y += event_buffer.o +oprofile-core-y += oprof.o +oprofile-core-y += oprofile_files.o +oprofile-core-y += oprofile_stats.o +oprofile-core-y += oprofilefs.o +oprofile-core-y += timer_int.o +oprofile-core-$(CONFIG_HW_PERF_EVENTS) += oprofile_perf.o + +oprofile-y += backtrace.o +oprofile-y += common.o +oprofile-y += $(addprefix ../../../drivers/oprofile/,$(oprofile-core-y)) + +ccflags-y += -Werror diff --git a/arch/metag/oprofile/backtrace.c b/arch/metag/oprofile/backtrace.c new file mode 100644 index 000000000000..7cc3f37cb40e --- /dev/null +++ b/arch/metag/oprofile/backtrace.c @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2010-2013 Imagination Technologies Ltd. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include + +#include "backtrace.h" + +static void user_backtrace_fp(unsigned long __user *fp, unsigned int depth) +{ + while (depth-- && access_ok(VERIFY_READ, fp, 8)) { + unsigned long addr; + unsigned long __user *fpnew; + if (__copy_from_user_inatomic(&addr, fp + 1, sizeof(addr))) + break; + addr -= 4; + + oprofile_add_trace(addr); + + /* stack grows up, so frame pointers must decrease */ + if (__copy_from_user_inatomic(&fpnew, fp + 0, sizeof(fpnew))) + break; + if (fpnew >= fp) + break; + fp = fpnew; + } +} + +static int kernel_backtrace_frame(struct stackframe *frame, void *data) +{ + unsigned int *depth = data; + + oprofile_add_trace(frame->pc); + + /* decrement depth and stop if we reach 0 */ + if ((*depth)-- == 0) + return 1; + + /* otherwise onto the next frame */ + return 0; +} + +void metag_backtrace(struct pt_regs * const regs, unsigned int depth) +{ + if (user_mode(regs)) { + unsigned long *fp = (unsigned long *)regs->ctx.AX[1].U0; + user_backtrace_fp((unsigned long __user __force *)fp, depth); + } else { + struct stackframe frame; + frame.fp = regs->ctx.AX[1].U0; /* A0FrP */ + frame.sp = user_stack_pointer(regs); /* A0StP */ + frame.lr = 0; /* from stack */ + frame.pc = regs->ctx.CurrPC; /* PC */ + walk_stackframe(&frame, &kernel_backtrace_frame, &depth); + } +} diff --git a/arch/metag/oprofile/backtrace.h b/arch/metag/oprofile/backtrace.h new file mode 100644 index 000000000000..c0fcc4265abb --- /dev/null +++ b/arch/metag/oprofile/backtrace.h @@ -0,0 +1,6 @@ +#ifndef _METAG_OPROFILE_BACKTRACE_H +#define _METAG_OPROFILE_BACKTRACE_H + +void metag_backtrace(struct pt_regs * const regs, unsigned int depth); + +#endif diff --git a/arch/metag/oprofile/common.c b/arch/metag/oprofile/common.c new file mode 100644 index 000000000000..ba26152b3c00 --- /dev/null +++ b/arch/metag/oprofile/common.c @@ -0,0 +1,66 @@ +/* + * arch/metag/oprofile/common.c + * + * Copyright (C) 2013 Imagination Technologies Ltd. + * + * Based on arch/sh/oprofile/common.c: + * + * Copyright (C) 2003 - 2010 Paul Mundt + * + * Based on arch/mips/oprofile/common.c: + * + * Copyright (C) 2004, 2005 Ralf Baechle + * Copyright (C) 2005 MIPS Technologies, Inc. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include +#include +#include +#include + +#include "backtrace.h" + +#ifdef CONFIG_HW_PERF_EVENTS +/* + * This will need to be reworked when multiple PMUs are supported. + */ +static char *metag_pmu_op_name; + +char *op_name_from_perf_id(void) +{ + return metag_pmu_op_name; +} + +int __init oprofile_arch_init(struct oprofile_operations *ops) +{ + ops->backtrace = metag_backtrace; + + if (perf_num_counters() == 0) + return -ENODEV; + + metag_pmu_op_name = kasprintf(GFP_KERNEL, "metag/%s", + perf_pmu_name()); + if (unlikely(!metag_pmu_op_name)) + return -ENOMEM; + + return oprofile_perf_init(ops); +} + +void oprofile_arch_exit(void) +{ + oprofile_perf_exit(); + kfree(metag_pmu_op_name); +} +#else +int __init oprofile_arch_init(struct oprofile_operations *ops) +{ + ops->backtrace = metag_backtrace; + /* fall back to timer interrupt PC sampling */ + return -ENODEV; +} +void oprofile_arch_exit(void) {} +#endif /* CONFIG_HW_PERF_EVENTS */ From 9e7129630329d50b8e8c3403bb71c85a7c3cbe35 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 7 Mar 2013 17:20:53 +0000 Subject: [PATCH 10/18] metag: smp: copy cache partition and enable GCOn When starting an SMP hardware thread, copy the cache partition configuration so that the threads share the same cache partitions. Also enable the GCOn bit if running in the local half of the virtual address space to enable coherency of shared local cache partitions. An atomic unlock system event is executed by the new cpu before any memory is read to ensure that any writes made by the boot cpu prior to full coherency taking effect are visible to the new cpu. This is to allow SMP to work even when the bootloader hasn't configured the caches for coherency. A log message is printed to describe the cache partition changes so that the user is aware of potential unintentional cache wastage if they've configured the cache partitions in the wrong way. Signed-off-by: James Hogan --- arch/metag/include/asm/metag_mem.h | 3 + arch/metag/kernel/head.S | 8 ++ arch/metag/kernel/smp.c | 115 +++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+) diff --git a/arch/metag/include/asm/metag_mem.h b/arch/metag/include/asm/metag_mem.h index 3f7b54d8ccac..aa5a076df439 100644 --- a/arch/metag/include/asm/metag_mem.h +++ b/arch/metag/include/asm/metag_mem.h @@ -700,6 +700,9 @@ #define SYSC_xCPARTG_AND_S 8 #define SYSC_xCPARTL_OR_BITS 0x000F0000 /* Ors into top 4 bits */ #define SYSC_xCPARTL_OR_S 16 +#ifdef METAC_2_1 +#define SYSC_DCPART_GCON_BIT 0x00100000 /* Coherent shared local */ +#endif /* METAC_2_1 */ #define SYSC_xCPARTG_OR_BITS 0x0F000000 /* Ors into top 4 bits */ #define SYSC_xCPARTG_OR_S 24 #define SYSC_CWRMODE_BIT 0x80000000 /* Write cache mode bit */ diff --git a/arch/metag/kernel/head.S b/arch/metag/kernel/head.S index 969dffabc03a..713f71d1bdfe 100644 --- a/arch/metag/kernel/head.S +++ b/arch/metag/kernel/head.S @@ -1,6 +1,7 @@ ! Copyright 2005,2006,2007,2009 Imagination Technologies #include +#include #include #undef __exit @@ -48,6 +49,13 @@ __exit: .global _secondary_startup .type _secondary_startup,function _secondary_startup: +#if CONFIG_PAGE_OFFSET < LINGLOBAL_BASE + ! In case GCOn has just been turned on we need to fence any writes that + ! the boot thread might have performed prior to coherency taking effect. + MOVT D0Re0,#HI(LINSYSEVENT_WR_ATOMIC_UNLOCK) + MOV D1Re0,#0 + SETD [D0Re0], D1Re0 +#endif MOVT A0StP,#HI(_secondary_data_stack) ADD A0StP,A0StP,#LO(_secondary_data_stack) GETD A0StP,[A0StP] diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c index 4b6d1f14df32..4e7751ac75d2 100644 --- a/arch/metag/kernel/smp.c +++ b/arch/metag/kernel/smp.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include #include @@ -37,6 +39,9 @@ #include #include +#define SYSC_DCPART(n) (SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n)) +#define SYSC_ICPART(n) (SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n)) + DECLARE_PER_CPU(PTBI, pTBI); void *secondary_data_stack; @@ -99,6 +104,114 @@ int __cpuinit boot_secondary(unsigned int thread, struct task_struct *idle) return 0; } +/** + * describe_cachepart_change: describe a change to cache partitions. + * @thread: Hardware thread number. + * @label: Label of cache type, e.g. "dcache" or "icache". + * @sz: Total size of the cache. + * @old: Old cache partition configuration (*CPART* register). + * @new: New cache partition configuration (*CPART* register). + * + * If the cache partition has changed, prints a message to the log describing + * those changes. + */ +static __cpuinit void describe_cachepart_change(unsigned int thread, + const char *label, + unsigned int sz, + unsigned int old, + unsigned int new) +{ + unsigned int lor1, land1, gor1, gand1; + unsigned int lor2, land2, gor2, gand2; + unsigned int diff = old ^ new; + + if (!diff) + return; + + pr_info("Thread %d: %s partition changed:", thread, label); + if (diff & (SYSC_xCPARTL_OR_BITS | SYSC_xCPARTL_AND_BITS)) { + lor1 = (old & SYSC_xCPARTL_OR_BITS) >> SYSC_xCPARTL_OR_S; + lor2 = (new & SYSC_xCPARTL_OR_BITS) >> SYSC_xCPARTL_OR_S; + land1 = (old & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S; + land2 = (new & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S; + pr_cont(" L:%#x+%#x->%#x+%#x", + (lor1 * sz) >> 4, + ((land1 + 1) * sz) >> 4, + (lor2 * sz) >> 4, + ((land2 + 1) * sz) >> 4); + } + if (diff & (SYSC_xCPARTG_OR_BITS | SYSC_xCPARTG_AND_BITS)) { + gor1 = (old & SYSC_xCPARTG_OR_BITS) >> SYSC_xCPARTG_OR_S; + gor2 = (new & SYSC_xCPARTG_OR_BITS) >> SYSC_xCPARTG_OR_S; + gand1 = (old & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S; + gand2 = (new & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S; + pr_cont(" G:%#x+%#x->%#x+%#x", + (gor1 * sz) >> 4, + ((gand1 + 1) * sz) >> 4, + (gor2 * sz) >> 4, + ((gand2 + 1) * sz) >> 4); + } + if (diff & SYSC_CWRMODE_BIT) + pr_cont(" %sWR", + (new & SYSC_CWRMODE_BIT) ? "+" : "-"); + if (diff & SYSC_DCPART_GCON_BIT) + pr_cont(" %sGCOn", + (new & SYSC_DCPART_GCON_BIT) ? "+" : "-"); + pr_cont("\n"); +} + +/** + * setup_smp_cache: ensure cache coherency for new SMP thread. + * @thread: New hardware thread number. + * + * Ensures that coherency is enabled and that the threads share the same cache + * partitions. + */ +static __cpuinit void setup_smp_cache(unsigned int thread) +{ + unsigned int this_thread, lflags; + unsigned int dcsz, dcpart_this, dcpart_old, dcpart_new; + unsigned int icsz, icpart_old, icpart_new; + + /* + * Copy over the current thread's cache partition configuration to the + * new thread so that they share cache partitions. + */ + __global_lock2(lflags); + this_thread = hard_processor_id(); + /* Share dcache partition */ + dcpart_this = metag_in32(SYSC_DCPART(this_thread)); + dcpart_old = metag_in32(SYSC_DCPART(thread)); + dcpart_new = dcpart_this; +#if PAGE_OFFSET < LINGLOBAL_BASE + /* + * For the local data cache to be coherent the threads must also have + * GCOn enabled. + */ + dcpart_new |= SYSC_DCPART_GCON_BIT; + metag_out32(dcpart_new, SYSC_DCPART(this_thread)); +#endif + metag_out32(dcpart_new, SYSC_DCPART(thread)); + /* Share icache partition too */ + icpart_new = metag_in32(SYSC_ICPART(this_thread)); + icpart_old = metag_in32(SYSC_ICPART(thread)); + metag_out32(icpart_new, SYSC_ICPART(thread)); + __global_unlock2(lflags); + + /* + * Log if the cache partitions were altered so the user is aware of any + * potential unintentional cache wastage. + */ + dcsz = get_dcache_size(); + icsz = get_dcache_size(); + describe_cachepart_change(this_thread, "dcache", dcsz, + dcpart_this, dcpart_new); + describe_cachepart_change(thread, "dcache", dcsz, + dcpart_old, dcpart_new); + describe_cachepart_change(thread, "icache", icsz, + icpart_old, icpart_new); +} + int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) { unsigned int thread = cpu_2_hwthread_id[cpu]; @@ -108,6 +221,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) flush_tlb_all(); + setup_smp_cache(thread); + /* * Tell the secondary CPU where to find its idle thread's stack. */ From 4d8edbfefb630559220939ad5a3bdd8a75190cc3 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 8 Mar 2013 15:27:49 +0000 Subject: [PATCH 11/18] metag: cachepart: take into account small cache bits The CORE_CONFIG2 register has bits to indicate that the data or code cache is small, i.e. that the size described in the field should be divided by 64. Take this into account in get_icache_size() and get_dcache_size(). Signed-off-by: James Hogan --- arch/metag/kernel/cachepart.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/metag/kernel/cachepart.c b/arch/metag/kernel/cachepart.c index 3a589dfb966b..c737edb5b2d5 100644 --- a/arch/metag/kernel/cachepart.c +++ b/arch/metag/kernel/cachepart.c @@ -24,15 +24,21 @@ unsigned int get_dcache_size(void) { unsigned int config2 = metag_in32(METAC_CORE_CONFIG2); - return 0x1000 << ((config2 & METAC_CORECFG2_DCSZ_BITS) - >> METAC_CORECFG2_DCSZ_S); + unsigned int sz = 0x1000 << ((config2 & METAC_CORECFG2_DCSZ_BITS) + >> METAC_CORECFG2_DCSZ_S); + if (config2 & METAC_CORECFG2_DCSMALL_BIT) + sz >>= 6; + return sz; } unsigned int get_icache_size(void) { unsigned int config2 = metag_in32(METAC_CORE_CONFIG2); - return 0x1000 << ((config2 & METAC_CORE_C2ICSZ_BITS) - >> METAC_CORE_C2ICSZ_S); + unsigned int sz = 0x1000 << ((config2 & METAC_CORE_C2ICSZ_BITS) + >> METAC_CORE_C2ICSZ_S); + if (config2 & METAC_CORECFG2_ICSMALL_BIT) + sz >>= 6; + return sz; } unsigned int get_global_dcache_size(void) From b7fb9e6a48b13e1edea1f0b1f00a45defd7c9e0e Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 8 Mar 2013 15:30:01 +0000 Subject: [PATCH 12/18] metag: cachepart: fix get_global_dcache_size() typo Compilation is broken when the kernel is destined to live in the global part of the virtual address space: arch/metag/kernel/cachepart.c In function 'get_thread_cache_size': arch/metag/kernel/cachepart.c +71 : error: implicit declaration of function 'get_global_dache_size' Fix the typo. Signed-off-by: James Hogan --- arch/metag/kernel/cachepart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/metag/kernel/cachepart.c b/arch/metag/kernel/cachepart.c index c737edb5b2d5..954548b1bea8 100644 --- a/arch/metag/kernel/cachepart.c +++ b/arch/metag/kernel/cachepart.c @@ -67,7 +67,7 @@ static unsigned int get_thread_cache_size(unsigned int cache, int thread_id) return 0; #if PAGE_OFFSET >= LINGLOBAL_BASE /* Checking for global cache */ - cache_size = (cache == DCACHE ? get_global_dache_size() : + cache_size = (cache == DCACHE ? get_global_dcache_size() : get_global_icache_size()); offset = 8; #else From 2b8660ed3bfe95523561e6d6a6f1ce91389006b1 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Wed, 20 Mar 2013 11:25:08 +0100 Subject: [PATCH 13/18] memblock: Kill ARCH_POPULATES_NODE_MAP once more The Kconfig symbol ARCH_POPULATES_NODE_MAP was killed in v3.3. After that it popped up again in microblaze and metag. Nobody noticed, probably because these Kconfig symbols are entirely unused and these architectures both select HAVE_MEMBLOCK_NODE_MAP. Anyhow, these two entries can also be killed. Signed-off-by: Paul Bolle Acked-by: Michal Simek Signed-off-by: James Hogan --- arch/metag/mm/Kconfig | 3 --- arch/microblaze/Kconfig | 3 --- 2 files changed, 6 deletions(-) diff --git a/arch/metag/mm/Kconfig b/arch/metag/mm/Kconfig index 975f2f4e3ecf..794f26a187f9 100644 --- a/arch/metag/mm/Kconfig +++ b/arch/metag/mm/Kconfig @@ -98,9 +98,6 @@ config MAX_ACTIVE_REGIONS default "2" if SPARSEMEM default "1" -config ARCH_POPULATES_NODE_MAP - def_bool y - config ARCH_SELECT_MEMORY_MODEL def_bool y diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 7843d11156e6..9dbb2448a9b2 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -38,9 +38,6 @@ config RWSEM_GENERIC_SPINLOCK config ZONE_DMA def_bool y -config ARCH_POPULATES_NODE_MAP - def_bool y - config RWSEM_XCHGADD_ALGORITHM bool From 876d6dcdf26a2e860801ec61195e580d03f7b204 Mon Sep 17 00:00:00 2001 From: Paul Clothier Date: Tue, 19 Mar 2013 12:04:43 +0000 Subject: [PATCH 14/18] metag: ptrace: Implement NT_METAG_TLS Implement functionality to get the TLS pointer for the metag architecture using regsets. This provides multi-threaded debug support for GDB. Signed-off-by: Paul Clothier Signed-off-by: James Hogan --- arch/metag/kernel/ptrace.c | 34 ++++++++++++++++++++++++++++++++++ include/uapi/linux/elf.h | 1 + 2 files changed, 35 insertions(+) diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c index 47a8828615a5..7563628822bd 100644 --- a/arch/metag/kernel/ptrace.c +++ b/arch/metag/kernel/ptrace.c @@ -288,10 +288,36 @@ static int metag_rp_state_set(struct task_struct *target, return metag_rp_state_copyin(regs, pos, count, kbuf, ubuf); } +static int metag_tls_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + void __user *tls = target->thread.tls_ptr; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); +} + +static int metag_tls_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + void __user *tls; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); + if (ret) + return ret; + + target->thread.tls_ptr = tls; + return ret; +} + enum metag_regset { REGSET_GENERAL, REGSET_CBUF, REGSET_READPIPE, + REGSET_TLS, }; static const struct user_regset metag_regsets[] = { @@ -319,6 +345,14 @@ static const struct user_regset metag_regsets[] = { .get = metag_rp_state_get, .set = metag_rp_state_set, }, + [REGSET_TLS] = { + .core_note_type = NT_METAG_TLS, + .n = 1, + .size = sizeof(void *), + .align = sizeof(void *), + .get = metag_tls_get, + .set = metag_tls_set, + }, }; static const struct user_regset_view user_metag_view = { diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 8072d352b98f..ef6103bf1f9b 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -397,6 +397,7 @@ typedef struct elf64_shdr { #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ #define NT_METAG_CBUF 0x500 /* Metag catch buffer registers */ #define NT_METAG_RPIPE 0x501 /* Metag read pipeline state */ +#define NT_METAG_TLS 0x502 /* Metag TLS pointer */ /* Note header in a PT_NOTE section */ From e605ff8c1a785f49801ad903846f62e609f954f7 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 26 Mar 2013 14:10:42 +0000 Subject: [PATCH 15/18] metag: export _metag_da_present and cpu_2_hwthread_id Export the symbols _metag_da_present and cpu_2_hwthread_id to modules (GPL only) to allow the imgdafs file system to be built as a module. Signed-off-by: James Hogan --- arch/metag/kernel/da.c | 2 ++ arch/metag/kernel/setup.c | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/metag/kernel/da.c b/arch/metag/kernel/da.c index 52aabb658fde..a35dbed6fffa 100644 --- a/arch/metag/kernel/da.c +++ b/arch/metag/kernel/da.c @@ -5,12 +5,14 @@ */ +#include #include #include #include #include bool _metag_da_present; +EXPORT_SYMBOL_GPL(_metag_da_present); int __init metag_da_probe(void) { diff --git a/arch/metag/kernel/setup.c b/arch/metag/kernel/setup.c index 879246170aec..4f5726f1a55b 100644 --- a/arch/metag/kernel/setup.c +++ b/arch/metag/kernel/setup.c @@ -124,6 +124,7 @@ struct machine_desc *machine_desc __initdata; u8 cpu_2_hwthread_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_HWTHREAD_ID }; +EXPORT_SYMBOL_GPL(cpu_2_hwthread_id); /* * Map a hardware thread ID to a Linux CPU number From 82bbb8325a232e29e1c2ae09200384ddb989c0d5 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 26 Mar 2013 14:40:58 +0000 Subject: [PATCH 16/18] metag: add exported for extended context handling Add an exported header file containing the definitions of some bits which can be set in D0.8 to indicate to the kernel that certain DSP state should be preserved. The definitions have the same names and values as the ones in the kernel-internal , to make it easier for DSP assembly code to be compatible between Linux and non-Linux operating systems. Signed-off-by: James Hogan --- arch/metag/include/uapi/asm/Kbuild | 1 + arch/metag/include/uapi/asm/ech.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 arch/metag/include/uapi/asm/ech.h diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild index 876c71f866de..84e09feb4d54 100644 --- a/arch/metag/include/uapi/asm/Kbuild +++ b/arch/metag/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm header-y += byteorder.h +header-y += ech.h header-y += ptrace.h header-y += resource.h header-y += sigcontext.h diff --git a/arch/metag/include/uapi/asm/ech.h b/arch/metag/include/uapi/asm/ech.h new file mode 100644 index 000000000000..ac94d1cf9be4 --- /dev/null +++ b/arch/metag/include/uapi/asm/ech.h @@ -0,0 +1,15 @@ +#ifndef _UAPI_METAG_ECH_H +#define _UAPI_METAG_ECH_H + +/* + * These bits can be set in the top half of the D0.8 register when DSP context + * switching is enabled, in order to support partial DSP context save/restore. + */ + +#define TBICTX_XEXT_BIT 0x1000 /* Enable extended context save */ +#define TBICTX_XTDP_BIT 0x0800 /* DSP accumulators/RAM/templates */ +#define TBICTX_XHL2_BIT 0x0400 /* Hardware loops */ +#define TBICTX_XAXX_BIT 0x0200 /* Extended AX registers (A*.4-7) */ +#define TBICTX_XDX8_BIT 0x0100 /* Extended DX registers (D*.8-15) */ + +#endif /* _UAPI_METAG_ECH_H */ From 0b4184c26b18ecbc0ec0657f514b087c3a6216fb Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 17 Apr 2013 10:59:22 +0100 Subject: [PATCH 17/18] metag: avoid unnecessary builtin dtb rebuilds The builtin .dtb.S intermediate file needs to be marked with .SECONDARY so that it isn't automatically deleted (which causes it to be regenerated on every build). Also add *.dtb.S to clean-files so it gets cleaned up by make clean. Similarly, if the specified builtin dtb isn't already in dtb-y (e.g. imported into the tree and specified in CONFIG_METAG_BUILTIN_DTB_NAME) it too will be treated as an intermediate and deleted automatically (again causing it to be regenerated on every build), so add it to dtb-y so it gets added to targets and the dtbs target. Signed-off-by: James Hogan Cc: Vineet Gupta Cc: Grant Likely Cc: Michal Marek Cc: Sam Ravnborg Reviewed-by: Stephen Warren --- arch/metag/boot/dts/Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/metag/boot/dts/Makefile b/arch/metag/boot/dts/Makefile index e0b5afd8bde8..dbd95217733a 100644 --- a/arch/metag/boot/dts/Makefile +++ b/arch/metag/boot/dts/Makefile @@ -4,13 +4,17 @@ dtb-y += skeleton.dtb builtindtb-y := skeleton ifneq ($(CONFIG_METAG_BUILTIN_DTB_NAME),"") - builtindtb-y := $(CONFIG_METAG_BUILTIN_DTB_NAME) + builtindtb-y := $(patsubst "%",%,$(CONFIG_METAG_BUILTIN_DTB_NAME)) endif -obj-$(CONFIG_METAG_BUILTIN_DTB) += $(patsubst "%",%,$(builtindtb-y)).dtb.o + +dtb-$(CONFIG_METAG_BUILTIN_DTB) += $(builtindtb-y).dtb +obj-$(CONFIG_METAG_BUILTIN_DTB) += $(builtindtb-y).dtb.o targets += dtbs targets += $(dtb-y) +.SECONDARY: $(obj)/$(builtindtb-y).dtb.S + dtbs: $(addprefix $(obj)/, $(dtb-y)) -clean-files += *.dtb +clean-files += *.dtb *.dtb.S From 164c013858a2e89b450cd8021a8be896f9e05697 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 27 Mar 2013 15:19:53 +0000 Subject: [PATCH 18/18] metag: defconfigs: increase log buffer 8KiB => 128KiB The Meta defconfigs set the log buffer size to just 8KiB, but with the fairly recent conversion of the kernel log buffer into a structured binary format, log messages appear to consume more space in the buffer, and in some cases it's not big enough to store the entire boot log. Therefore switch all the defconfigs to use the default size of 128KiB. Signed-off-by: James Hogan --- arch/metag/configs/meta1_defconfig | 1 - arch/metag/configs/meta2_defconfig | 1 - arch/metag/configs/meta2_smp_defconfig | 1 - 3 files changed, 3 deletions(-) diff --git a/arch/metag/configs/meta1_defconfig b/arch/metag/configs/meta1_defconfig index c35a75e8ecfe..01cd67e4403d 100644 --- a/arch/metag/configs/meta1_defconfig +++ b/arch/metag/configs/meta1_defconfig @@ -1,6 +1,5 @@ # CONFIG_LOCALVERSION_AUTO is not set # CONFIG_SWAP is not set -CONFIG_LOG_BUF_SHIFT=13 CONFIG_SYSFS_DEPRECATED=y CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_KALLSYMS_ALL=y diff --git a/arch/metag/configs/meta2_defconfig b/arch/metag/configs/meta2_defconfig index fb3148410183..643392ba7ed5 100644 --- a/arch/metag/configs/meta2_defconfig +++ b/arch/metag/configs/meta2_defconfig @@ -1,7 +1,6 @@ # CONFIG_LOCALVERSION_AUTO is not set # CONFIG_SWAP is not set CONFIG_SYSVIPC=y -CONFIG_LOG_BUF_SHIFT=13 CONFIG_SYSFS_DEPRECATED=y CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_KALLSYMS_ALL=y diff --git a/arch/metag/configs/meta2_smp_defconfig b/arch/metag/configs/meta2_smp_defconfig index 6c7b777ac276..f3306737da20 100644 --- a/arch/metag/configs/meta2_smp_defconfig +++ b/arch/metag/configs/meta2_smp_defconfig @@ -1,7 +1,6 @@ # CONFIG_LOCALVERSION_AUTO is not set # CONFIG_SWAP is not set CONFIG_SYSVIPC=y -CONFIG_LOG_BUF_SHIFT=13 CONFIG_SYSFS_DEPRECATED=y CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_KALLSYMS_ALL=y