diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index bb6bff51ce48..13929771bee7 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c @@ -61,7 +61,7 @@ static unsigned int spu_cycle_reset; #define NUM_THREADS 2 /* number of physical threads in * physical processor */ -#define NUM_TRACE_BUS_WORDS 4 +#define NUM_DEBUG_BUS_WORDS 4 #define NUM_INPUT_BUS_WORDS 2 #define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ @@ -169,7 +169,6 @@ static DEFINE_SPINLOCK(virt_cntr_lock); static u32 ctr_enabled; -static unsigned char trace_bus[NUM_TRACE_BUS_WORDS]; static unsigned char input_bus[NUM_INPUT_BUS_WORDS]; /* @@ -298,7 +297,7 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) p->signal_group = event / 100; p->bus_word = bus_word; - p->sub_unit = (unit_mask & 0x0000f000) >> 12; + p->sub_unit = GET_SUB_UNIT(unit_mask); pm_regs.pm07_cntrl[ctr] = 0; pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles); @@ -334,16 +333,16 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) p->bit = signal_bit; } - for (i = 0; i < NUM_TRACE_BUS_WORDS; i++) { + for (i = 0; i < NUM_DEBUG_BUS_WORDS; i++) { if (bus_word & (1 << i)) { pm_regs.debug_bus_control |= - (bus_type << (31 - (2 * i) + 1)); + (bus_type << (30 - (2 * i))); for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) { if (input_bus[j] == 0xff) { input_bus[j] = i; pm_regs.group_control |= - (i << (31 - i)); + (i << (30 - (2 * j))); break; } @@ -450,6 +449,12 @@ static void cell_virtual_cntr(unsigned long data) hdw_thread = 1 ^ hdw_thread; next_hdw_thread = hdw_thread; + pm_regs.group_control = 0; + pm_regs.debug_bus_control = 0; + + for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) + input_bus[i] = 0xff; + /* * There are some per thread events. Must do the * set event, for the thread that is being started @@ -619,9 +624,6 @@ static int cell_reg_setup(struct op_counter_config *ctr, pmc_cntrl[1][i].vcntr = i; } - for (i = 0; i < NUM_TRACE_BUS_WORDS; i++) - trace_bus[i] = 0xff; - for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) input_bus[i] = 0xff; diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.c b/arch/powerpc/platforms/cell/cbe_cpufreq.c index 13d5a87f13b1..ec7c8f45a215 100644 --- a/arch/powerpc/platforms/cell/cbe_cpufreq.c +++ b/arch/powerpc/platforms/cell/cbe_cpufreq.c @@ -21,8 +21,9 @@ */ #include +#include + #include -#include #include #include #include "cbe_cpufreq.h" diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c index 6a2c1b0a9a94..69288f653144 100644 --- a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c +++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c @@ -23,7 +23,8 @@ #include #include #include -#include +#include + #include #include #include diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c index 16a9b07e7b0c..a839c6cf3447 100644 --- a/arch/powerpc/platforms/cell/cbe_regs.c +++ b/arch/powerpc/platforms/cell/cbe_regs.c @@ -9,13 +9,13 @@ #include #include #include +#include +#include #include #include #include #include -#include -#include #include /* diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index b465494cc24c..39fa2149fd02 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -26,13 +26,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c index 1ed303678887..99d688e88cbe 100644 --- a/arch/powerpc/platforms/cell/pmu.c +++ b/arch/powerpc/platforms/cell/pmu.c @@ -213,7 +213,7 @@ u32 cbe_read_pm(u32 cpu, enum pm_reg_name reg) break; case pm_interval: - READ_SHADOW_REG(val, pm_interval); + READ_MMIO_UPPER32(val, pm_interval); break; case pm_start_stop: diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 6a56b6474f52..7f42b7d0adcb 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -51,7 +52,6 @@ #include #include #include -#include #include #include "interrupt.h" diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index c83c3e3f5178..f73263ba9841 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,13 @@ EXPORT_SYMBOL_GPL(spu_priv1_ops); struct cbe_spu_info cbe_spu_info[MAX_NUMNODES]; EXPORT_SYMBOL_GPL(cbe_spu_info); +/* + * The spufs fault-handling code needs to call force_sig_info to raise signals + * on DMA errors. Export it here to avoid general kernel-wide access to this + * function + */ +EXPORT_SYMBOL_GPL(force_sig_info); + /* * Protects cbe_spu_info and spu->number. */ @@ -66,6 +74,10 @@ static LIST_HEAD(spu_full_list); static DEFINE_SPINLOCK(spu_full_list_lock); static DEFINE_MUTEX(spu_full_list_mutex); +struct spu_slb { + u64 esid, vsid; +}; + void spu_invalidate_slbs(struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; @@ -114,6 +126,12 @@ void spu_associate_mm(struct spu *spu, struct mm_struct *mm) } EXPORT_SYMBOL_GPL(spu_associate_mm); +int spu_64k_pages_available(void) +{ + return mmu_psize_defs[MMU_PAGE_64K].shift != 0; +} +EXPORT_SYMBOL_GPL(spu_64k_pages_available); + static int __spu_trap_invalid_dma(struct spu *spu) { pr_debug("%s\n", __FUNCTION__); @@ -143,11 +161,22 @@ static void spu_restart_dma(struct spu *spu) out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND); } -static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) +static inline void spu_load_slb(struct spu *spu, int slbe, struct spu_slb *slb) { struct spu_priv2 __iomem *priv2 = spu->priv2; + + pr_debug("%s: adding SLB[%d] 0x%016lx 0x%016lx\n", + __func__, slbe, slb->vsid, slb->esid); + + out_be64(&priv2->slb_index_W, slbe); + out_be64(&priv2->slb_vsid_RW, slb->vsid); + out_be64(&priv2->slb_esid_RW, slb->esid); +} + +static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) +{ struct mm_struct *mm = spu->mm; - u64 esid, vsid, llp; + struct spu_slb slb; int psize; pr_debug("%s\n", __FUNCTION__); @@ -159,7 +188,7 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) printk("%s: invalid access during switch!\n", __func__); return 1; } - esid = (ea & ESID_MASK) | SLB_ESID_V; + slb.esid = (ea & ESID_MASK) | SLB_ESID_V; switch(REGION_ID(ea)) { case USER_REGION_ID: @@ -168,21 +197,21 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) #else psize = mm->context.user_psize; #endif - vsid = (get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) | - SLB_VSID_USER; + slb.vsid = (get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M) + << SLB_VSID_SHIFT) | SLB_VSID_USER; break; case VMALLOC_REGION_ID: if (ea < VMALLOC_END) psize = mmu_vmalloc_psize; else psize = mmu_io_psize; - vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) | - SLB_VSID_KERNEL; + slb.vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) + << SLB_VSID_SHIFT) | SLB_VSID_KERNEL; break; case KERNEL_REGION_ID: psize = mmu_linear_psize; - vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) | - SLB_VSID_KERNEL; + slb.vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) + << SLB_VSID_SHIFT) | SLB_VSID_KERNEL; break; default: /* Future: support kernel segments so that drivers @@ -191,11 +220,9 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) pr_debug("invalid region access at %016lx\n", ea); return 1; } - llp = mmu_psize_defs[psize].sllp; + slb.vsid |= mmu_psize_defs[psize].sllp; - out_be64(&priv2->slb_index_W, spu->slb_replace); - out_be64(&priv2->slb_vsid_RW, vsid | llp); - out_be64(&priv2->slb_esid_RW, esid); + spu_load_slb(spu, spu->slb_replace, &slb); spu->slb_replace++; if (spu->slb_replace >= 8) @@ -232,6 +259,74 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) return 0; } +static void __spu_kernel_slb(void *addr, struct spu_slb *slb) +{ + unsigned long ea = (unsigned long)addr; + u64 llp; + + if (REGION_ID(ea) == KERNEL_REGION_ID) + llp = mmu_psize_defs[mmu_linear_psize].sllp; + else + llp = mmu_psize_defs[mmu_virtual_psize].sllp; + + slb->vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) | + SLB_VSID_KERNEL | llp; + slb->esid = (ea & ESID_MASK) | SLB_ESID_V; +} + +/** + * Given an array of @nr_slbs SLB entries, @slbs, return non-zero if the + * address @new_addr is present. + */ +static inline int __slb_present(struct spu_slb *slbs, int nr_slbs, + void *new_addr) +{ + unsigned long ea = (unsigned long)new_addr; + int i; + + for (i = 0; i < nr_slbs; i++) + if (!((slbs[i].esid ^ ea) & ESID_MASK)) + return 1; + + return 0; +} + +/** + * Setup the SPU kernel SLBs, in preparation for a context save/restore. We + * need to map both the context save area, and the save/restore code. + * + * Because the lscsa and code may cross segment boundaires, we check to see + * if mappings are required for the start and end of each range. We currently + * assume that the mappings are smaller that one segment - if not, something + * is seriously wrong. + */ +void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa, + void *code, int code_size) +{ + struct spu_slb slbs[4]; + int i, nr_slbs = 0; + /* start and end addresses of both mappings */ + void *addrs[] = { + lscsa, (void *)lscsa + sizeof(*lscsa) - 1, + code, code + code_size - 1 + }; + + /* check the set of addresses, and create a new entry in the slbs array + * if there isn't already a SLB for that address */ + for (i = 0; i < ARRAY_SIZE(addrs); i++) { + if (__slb_present(slbs, nr_slbs, addrs[i])) + continue; + + __spu_kernel_slb(addrs[i], &slbs[nr_slbs]); + nr_slbs++; + } + + /* Add the set of SLBs */ + for (i = 0; i < nr_slbs; i++) + spu_load_slb(spu, i, &slbs[i]); +} +EXPORT_SYMBOL_GPL(spu_setup_kernel_slbs); + static irqreturn_t spu_irq_class_0(int irq, void *data) { @@ -479,13 +574,27 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr); int spu_add_sysdev_attr_group(struct attribute_group *attrs) { struct spu *spu; + int rc = 0; mutex_lock(&spu_full_list_mutex); - list_for_each_entry(spu, &spu_full_list, full_list) - sysfs_create_group(&spu->sysdev.kobj, attrs); + list_for_each_entry(spu, &spu_full_list, full_list) { + rc = sysfs_create_group(&spu->sysdev.kobj, attrs); + + /* we're in trouble here, but try unwinding anyway */ + if (rc) { + printk(KERN_ERR "%s: can't create sysfs group '%s'\n", + __func__, attrs->name); + + list_for_each_entry_continue_reverse(spu, + &spu_full_list, full_list) + sysfs_remove_group(&spu->sysdev.kobj, attrs); + break; + } + } + mutex_unlock(&spu_full_list_mutex); - return 0; + return rc; } EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group); diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c index 1b010707488d..9979197ff409 100644 --- a/arch/powerpc/platforms/cell/spu_manage.c +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -345,7 +345,7 @@ static int __init of_create_spu(struct spu *spu, void *data) } ret = spu_map_interrupts_old(spu, spe); if (ret) { - printk(KERN_ERR "%s: could not map interrupts", + printk(KERN_ERR "%s: could not map interrupts\n", spu->name); goto out_unmap; } @@ -411,10 +411,15 @@ static void init_affinity_qs20_harcoded(void) static int of_has_vicinity(void) { - struct spu* spu; + struct device_node *dn; - spu = list_first_entry(&cbe_spu_info[0].spus, struct spu, cbe_list); - return of_find_property(spu_devnode(spu), "vicinity", NULL) != NULL; + for_each_node_by_type(dn, "spe") { + if (of_find_property(dn, "vicinity", NULL)) { + of_node_put(dn); + return 1; + } + } + return 0; } static struct spu *devnode_spu(int cbe, struct device_node *dn) @@ -525,7 +530,7 @@ static int __init init_affinity(void) if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) init_affinity_qs20_harcoded(); else - printk("No affinity configuration found"); + printk("No affinity configuration found\n"); } return 0; diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c index f4b3c052dabf..d606e575a204 100644 --- a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c +++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c @@ -73,7 +73,7 @@ int spu_alloc_lscsa(struct spu_state *csa) int i, j, n_4k; /* Check availability of 64K pages */ - if (mmu_psize_defs[MMU_PAGE_64K].shift == 0) + if (!spu_64k_pages_available()) goto fail; csa->use_big_pages = 1; diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index 3d64c81cc6e2..8cbc6574820f 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -691,35 +691,9 @@ static inline void resume_mfc_queue(struct spu_state *csa, struct spu *spu) out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE); } -static inline void get_kernel_slb(u64 ea, u64 slb[2]) +static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu, + unsigned int *code, int code_size) { - u64 llp; - - if (REGION_ID(ea) == KERNEL_REGION_ID) - llp = mmu_psize_defs[mmu_linear_psize].sllp; - else - llp = mmu_psize_defs[mmu_virtual_psize].sllp; - slb[0] = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) | - SLB_VSID_KERNEL | llp; - slb[1] = (ea & ESID_MASK) | SLB_ESID_V; -} - -static inline void load_mfc_slb(struct spu *spu, u64 slb[2], int slbe) -{ - struct spu_priv2 __iomem *priv2 = spu->priv2; - - out_be64(&priv2->slb_index_W, slbe); - eieio(); - out_be64(&priv2->slb_vsid_RW, slb[0]); - out_be64(&priv2->slb_esid_RW, slb[1]); - eieio(); -} - -static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu) -{ - u64 code_slb[2]; - u64 lscsa_slb[2]; - /* Save, Step 47: * Restore, Step 30. * If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All @@ -735,11 +709,7 @@ static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu) * translation is desired by OS environment). */ spu_invalidate_slbs(spu); - get_kernel_slb((unsigned long)&spu_save_code[0], code_slb); - get_kernel_slb((unsigned long)csa->lscsa, lscsa_slb); - load_mfc_slb(spu, code_slb, 0); - if ((lscsa_slb[0] != code_slb[0]) || (lscsa_slb[1] != code_slb[1])) - load_mfc_slb(spu, lscsa_slb, 1); + spu_setup_kernel_slbs(spu, csa->lscsa, code, code_size); } static inline void set_switch_active(struct spu_state *csa, struct spu *spu) @@ -1866,7 +1836,8 @@ static void save_lscsa(struct spu_state *prev, struct spu *spu) */ resume_mfc_queue(prev, spu); /* Step 46. */ - setup_mfc_slbs(prev, spu); /* Step 47. */ + /* Step 47. */ + setup_mfc_slbs(prev, spu, spu_save_code, sizeof(spu_save_code)); set_switch_active(prev, spu); /* Step 48. */ enable_interrupts(prev, spu); /* Step 49. */ save_ls_16kb(prev, spu); /* Step 50. */ @@ -1971,7 +1942,8 @@ static void restore_lscsa(struct spu_state *next, struct spu *spu) setup_spu_status_part1(next, spu); /* Step 27. */ setup_spu_status_part2(next, spu); /* Step 28. */ restore_mfc_rag(next, spu); /* Step 29. */ - setup_mfc_slbs(next, spu); /* Step 30. */ + /* Step 30. */ + setup_mfc_slbs(next, spu, spu_restore_code, sizeof(spu_restore_code)); set_spu_npc(next, spu); /* Step 31. */ set_signot1(next, spu); /* Step 32. */ set_signot2(next, spu); /* Step 33. */ diff --git a/arch/powerpc/platforms/celleb/iommu.c b/arch/powerpc/platforms/celleb/iommu.c index 287450a07c41..61df97f4e1a6 100644 --- a/arch/powerpc/platforms/celleb/iommu.c +++ b/arch/powerpc/platforms/celleb/iommu.c @@ -22,8 +22,8 @@ #include #include #include +#include -#include #include #include "beat_wrapper.h" diff --git a/arch/powerpc/platforms/celleb/setup.c b/arch/powerpc/platforms/celleb/setup.c index 5a3f73478f4b..8b03a1bdc79f 100644 --- a/arch/powerpc/platforms/celleb/setup.c +++ b/arch/powerpc/platforms/celleb/setup.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -52,7 +53,6 @@ #include #include #include -#include #include #include diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h index b1accce77bb5..314aad357d98 100644 --- a/include/asm-powerpc/spu.h +++ b/include/asm-powerpc/spu.h @@ -104,6 +104,7 @@ struct spu_context; struct spu_runqueue; +struct spu_lscsa; struct device_node; enum spu_utilization_state { @@ -200,6 +201,9 @@ int spu_irq_class_0_bottom(struct spu *spu); int spu_irq_class_1_bottom(struct spu *spu); void spu_irq_setaffinity(struct spu *spu, int cpu); +void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa, + void *code, int code_size); + #ifdef CONFIG_KEXEC void crash_register_spus(struct list_head *list); #else @@ -210,6 +214,7 @@ static inline void crash_register_spus(struct list_head *list) extern void spu_invalidate_slbs(struct spu *spu); extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm); +int spu_64k_pages_available(void); /* Calls from the memory management to the SPU */ struct mm_struct;