49a695ba72
Notable changes: - Support for 4PB user address space on 64-bit, opt-in via mmap(). - Removal of POWER4 support, which was accidentally broken in 2016 and no one noticed, and blocked use of some modern instructions. - Workarounds so that the hypervisor can enable Transactional Memory on Power9. - A series to disable the DAWR (Data Address Watchpoint Register) on Power9. - More information displayed in the meltdown/spectre_v1/v2 sysfs files. - A vpermxor (Power8 Altivec) implementation for the raid6 Q Syndrome. - A big series to make the allocation of our pacas (per cpu area), kernel page tables, and per-cpu stacks NUMA aware when using the Radix MMU on Power9. And as usual many fixes, reworks and cleanups. Thanks to: Aaro Koskinen, Alexandre Belloni, Alexey Kardashevskiy, Alistair Popple, Andy Shevchenko, Aneesh Kumar K.V, Anshuman Khandual, Balbir Singh, Benjamin Herrenschmidt, Christophe Leroy, Christophe Lombard, Cyril Bur, Daniel Axtens, Dave Young, Finn Thain, Frederic Barrat, Gustavo Romero, Horia Geantă, Jonathan Neuschäfer, Kees Cook, Larry Finger, Laurent Dufour, Laurent Vivier, Logan Gunthorpe, Madhavan Srinivasan, Mark Greer, Mark Hairgrove, Markus Elfring, Mathieu Malaterre, Matt Brown, Matt Evans, Mauricio Faria de Oliveira, Michael Neuling, Naveen N. Rao, Nicholas Piggin, Paul Mackerras, Philippe Bergheaud, Ram Pai, Rob Herring, Sam Bobroff, Segher Boessenkool, Simon Guo, Simon Horman, Stewart Smith, Sukadev Bhattiprolu, Suraj Jitindar Singh, Thiago Jung Bauermann, Vaibhav Jain, Vaidyanathan Srinivasan, Vasant Hegde, Wei Yongjun. -----BEGIN PGP SIGNATURE----- iQIwBAABCAAaBQJayKxDExxtcGVAZWxsZXJtYW4uaWQuYXUACgkQUevqPMjhpYAr JQ/6A9Xs4zHDn9OeT9esEIxciETqUlrP0Wp64c4JVC7EkG1E7xRDZ4Xb4m8R2nNt 9sPhtNO1yCtEk6kFQtPNB0N8v6pud4I6+aMcYnn+tP8mJRYQ4x9bYaF3Hw98IKmE Kd6TglmsUQvh2GpwPiF93KpzzWu1HB2kZzzqJcAMTMh7C79Qz00BjrTJltzXB2jx tJ+B4lVy8BeU8G5nDAzJEEwb5Ypkn8O40rS/lpAwVTYOBJ8Rbyq8Fj82FeREK9YO 4EGaEKPkC/FdzX7OJV3v2/nldCd8pzV471fAoGuBUhJiJBMBoBybcTHIdDex7LlL zMLV1mUtGo8iolRPhL8iCH+GGifZz2WzstYCozz7hgIraWtc/frq9rZp6q0LdH/K trk7UbPGlVb92ecWZVpZyEcsMzKrCgZqnAe9wRNh1uEKScEdzd/bmRaMhENUObRh Hili6AVvmSKExpy7k2sZP/oUMaeC15/xz8Lk7l8a/iCkYhNmPYh5iSXM5+UKpcRT FYOcO0o3DwXsN46Whow3nJ7TqAsDy9/ecPUG71JQi3ZrHnRrm8jxkn8MCG5pZ1Fi KvKDxlg6RiJo3DF9/fSOpJUokvMwqBS5dJo4eh5eiDy94aBTqmBKFecvPxQm7a0L l3uXCF/6JuXEvMukFjGBO4RiYhw8i+B2uKsh81XUh7HKrgE= =HAB1 -----END PGP SIGNATURE----- Merge tag 'powerpc-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux Pull powerpc updates from Michael Ellerman: "Notable changes: - Support for 4PB user address space on 64-bit, opt-in via mmap(). - Removal of POWER4 support, which was accidentally broken in 2016 and no one noticed, and blocked use of some modern instructions. - Workarounds so that the hypervisor can enable Transactional Memory on Power9. - A series to disable the DAWR (Data Address Watchpoint Register) on Power9. - More information displayed in the meltdown/spectre_v1/v2 sysfs files. - A vpermxor (Power8 Altivec) implementation for the raid6 Q Syndrome. - A big series to make the allocation of our pacas (per cpu area), kernel page tables, and per-cpu stacks NUMA aware when using the Radix MMU on Power9. And as usual many fixes, reworks and cleanups. Thanks to: Aaro Koskinen, Alexandre Belloni, Alexey Kardashevskiy, Alistair Popple, Andy Shevchenko, Aneesh Kumar K.V, Anshuman Khandual, Balbir Singh, Benjamin Herrenschmidt, Christophe Leroy, Christophe Lombard, Cyril Bur, Daniel Axtens, Dave Young, Finn Thain, Frederic Barrat, Gustavo Romero, Horia Geantă, Jonathan Neuschäfer, Kees Cook, Larry Finger, Laurent Dufour, Laurent Vivier, Logan Gunthorpe, Madhavan Srinivasan, Mark Greer, Mark Hairgrove, Markus Elfring, Mathieu Malaterre, Matt Brown, Matt Evans, Mauricio Faria de Oliveira, Michael Neuling, Naveen N. Rao, Nicholas Piggin, Paul Mackerras, Philippe Bergheaud, Ram Pai, Rob Herring, Sam Bobroff, Segher Boessenkool, Simon Guo, Simon Horman, Stewart Smith, Sukadev Bhattiprolu, Suraj Jitindar Singh, Thiago Jung Bauermann, Vaibhav Jain, Vaidyanathan Srinivasan, Vasant Hegde, Wei Yongjun" * tag 'powerpc-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (207 commits) powerpc/64s/idle: Fix restore of AMOR on POWER9 after deep sleep powerpc/64s: Fix POWER9 DD2.2 and above in cputable features powerpc/64s: Fix pkey support in dt_cpu_ftrs, add CPU_FTR_PKEY bit powerpc/64s: Fix dt_cpu_ftrs to have restore_cpu clear unwanted LPCR bits Revert "powerpc/64s/idle: POWER9 ESL=0 stop avoid save/restore overhead" powerpc: iomap.c: introduce io{read|write}64_{lo_hi|hi_lo} powerpc: io.h: move iomap.h include so that it can use readq/writeq defs cxl: Fix possible deadlock when processing page faults from cxllib powerpc/hw_breakpoint: Only disable hw breakpoint if cpu supports it powerpc/mm/radix: Update command line parsing for disable_radix powerpc/mm/radix: Parse disable_radix commandline correctly. powerpc/mm/hugetlb: initialize the pagetable cache correctly for hugetlb powerpc/mm/radix: Update pte fragment count from 16 to 256 on radix powerpc/mm/keys: Update documentation and remove unnecessary check powerpc/64s/idle: POWER9 ESL=0 stop avoid save/restore overhead powerpc/64s/idle: Consolidate power9_offline_stop()/power9_idle_stop() powerpc/powernv: Always stop secondaries before reboot/shutdown powerpc: hard disable irqs in smp_send_stop loop powerpc: use NMI IPI for smp_send_stop powerpc/powernv: Fix SMT4 forcing idle code ...
273 lines
6.3 KiB
C
273 lines
6.3 KiB
C
/* -*- linux-c -*- ------------------------------------------------------- *
|
|
*
|
|
* Copyright 2002 H. Peter Anvin - All Rights Reserved
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
|
|
* Boston MA 02111-1307, USA; either version 2 of the License, or
|
|
* (at your option) any later version; incorporated herein by reference.
|
|
*
|
|
* ----------------------------------------------------------------------- */
|
|
|
|
/*
|
|
* raid6/algos.c
|
|
*
|
|
* Algorithm list and algorithm selection for RAID-6
|
|
*/
|
|
|
|
#include <linux/raid/pq.h>
|
|
#ifndef __KERNEL__
|
|
#include <sys/mman.h>
|
|
#include <stdio.h>
|
|
#else
|
|
#include <linux/module.h>
|
|
#include <linux/gfp.h>
|
|
#if !RAID6_USE_EMPTY_ZERO_PAGE
|
|
/* In .bss so it's zeroed */
|
|
const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
|
|
EXPORT_SYMBOL(raid6_empty_zero_page);
|
|
#endif
|
|
#endif
|
|
|
|
struct raid6_calls raid6_call;
|
|
EXPORT_SYMBOL_GPL(raid6_call);
|
|
|
|
const struct raid6_calls * const raid6_algos[] = {
|
|
#if defined(__ia64__)
|
|
&raid6_intx16,
|
|
&raid6_intx32,
|
|
#endif
|
|
#if defined(__i386__) && !defined(__arch_um__)
|
|
&raid6_mmxx1,
|
|
&raid6_mmxx2,
|
|
&raid6_sse1x1,
|
|
&raid6_sse1x2,
|
|
&raid6_sse2x1,
|
|
&raid6_sse2x2,
|
|
#ifdef CONFIG_AS_AVX2
|
|
&raid6_avx2x1,
|
|
&raid6_avx2x2,
|
|
#endif
|
|
#ifdef CONFIG_AS_AVX512
|
|
&raid6_avx512x1,
|
|
&raid6_avx512x2,
|
|
#endif
|
|
#endif
|
|
#if defined(__x86_64__) && !defined(__arch_um__)
|
|
&raid6_sse2x1,
|
|
&raid6_sse2x2,
|
|
&raid6_sse2x4,
|
|
#ifdef CONFIG_AS_AVX2
|
|
&raid6_avx2x1,
|
|
&raid6_avx2x2,
|
|
&raid6_avx2x4,
|
|
#endif
|
|
#ifdef CONFIG_AS_AVX512
|
|
&raid6_avx512x1,
|
|
&raid6_avx512x2,
|
|
&raid6_avx512x4,
|
|
#endif
|
|
#endif
|
|
#ifdef CONFIG_ALTIVEC
|
|
&raid6_altivec1,
|
|
&raid6_altivec2,
|
|
&raid6_altivec4,
|
|
&raid6_altivec8,
|
|
&raid6_vpermxor1,
|
|
&raid6_vpermxor2,
|
|
&raid6_vpermxor4,
|
|
&raid6_vpermxor8,
|
|
#endif
|
|
#if defined(CONFIG_S390)
|
|
&raid6_s390vx8,
|
|
#endif
|
|
&raid6_intx1,
|
|
&raid6_intx2,
|
|
&raid6_intx4,
|
|
&raid6_intx8,
|
|
#ifdef CONFIG_KERNEL_MODE_NEON
|
|
&raid6_neonx1,
|
|
&raid6_neonx2,
|
|
&raid6_neonx4,
|
|
&raid6_neonx8,
|
|
#endif
|
|
NULL
|
|
};
|
|
|
|
void (*raid6_2data_recov)(int, size_t, int, int, void **);
|
|
EXPORT_SYMBOL_GPL(raid6_2data_recov);
|
|
|
|
void (*raid6_datap_recov)(int, size_t, int, void **);
|
|
EXPORT_SYMBOL_GPL(raid6_datap_recov);
|
|
|
|
const struct raid6_recov_calls *const raid6_recov_algos[] = {
|
|
#ifdef CONFIG_AS_AVX512
|
|
&raid6_recov_avx512,
|
|
#endif
|
|
#ifdef CONFIG_AS_AVX2
|
|
&raid6_recov_avx2,
|
|
#endif
|
|
#ifdef CONFIG_AS_SSSE3
|
|
&raid6_recov_ssse3,
|
|
#endif
|
|
#ifdef CONFIG_S390
|
|
&raid6_recov_s390xc,
|
|
#endif
|
|
#if defined(CONFIG_KERNEL_MODE_NEON)
|
|
&raid6_recov_neon,
|
|
#endif
|
|
&raid6_recov_intx1,
|
|
NULL
|
|
};
|
|
|
|
#ifdef __KERNEL__
|
|
#define RAID6_TIME_JIFFIES_LG2 4
|
|
#else
|
|
/* Need more time to be stable in userspace */
|
|
#define RAID6_TIME_JIFFIES_LG2 9
|
|
#define time_before(x, y) ((x) < (y))
|
|
#endif
|
|
|
|
static inline const struct raid6_recov_calls *raid6_choose_recov(void)
|
|
{
|
|
const struct raid6_recov_calls *const *algo;
|
|
const struct raid6_recov_calls *best;
|
|
|
|
for (best = NULL, algo = raid6_recov_algos; *algo; algo++)
|
|
if (!best || (*algo)->priority > best->priority)
|
|
if (!(*algo)->valid || (*algo)->valid())
|
|
best = *algo;
|
|
|
|
if (best) {
|
|
raid6_2data_recov = best->data2;
|
|
raid6_datap_recov = best->datap;
|
|
|
|
pr_info("raid6: using %s recovery algorithm\n", best->name);
|
|
} else
|
|
pr_err("raid6: Yikes! No recovery algorithm found!\n");
|
|
|
|
return best;
|
|
}
|
|
|
|
static inline const struct raid6_calls *raid6_choose_gen(
|
|
void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
|
|
{
|
|
unsigned long perf, bestgenperf, bestxorperf, j0, j1;
|
|
int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */
|
|
const struct raid6_calls *const *algo;
|
|
const struct raid6_calls *best;
|
|
|
|
for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
|
|
if (!best || (*algo)->prefer >= best->prefer) {
|
|
if ((*algo)->valid && !(*algo)->valid())
|
|
continue;
|
|
|
|
perf = 0;
|
|
|
|
preempt_disable();
|
|
j0 = jiffies;
|
|
while ((j1 = jiffies) == j0)
|
|
cpu_relax();
|
|
while (time_before(jiffies,
|
|
j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
|
|
(*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs);
|
|
perf++;
|
|
}
|
|
preempt_enable();
|
|
|
|
if (perf > bestgenperf) {
|
|
bestgenperf = perf;
|
|
best = *algo;
|
|
}
|
|
pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
|
|
(perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
|
|
|
|
if (!(*algo)->xor_syndrome)
|
|
continue;
|
|
|
|
perf = 0;
|
|
|
|
preempt_disable();
|
|
j0 = jiffies;
|
|
while ((j1 = jiffies) == j0)
|
|
cpu_relax();
|
|
while (time_before(jiffies,
|
|
j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
|
|
(*algo)->xor_syndrome(disks, start, stop,
|
|
PAGE_SIZE, *dptrs);
|
|
perf++;
|
|
}
|
|
preempt_enable();
|
|
|
|
if (best == *algo)
|
|
bestxorperf = perf;
|
|
|
|
pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name,
|
|
(perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
|
|
}
|
|
}
|
|
|
|
if (best) {
|
|
pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
|
|
best->name,
|
|
(bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
|
|
if (best->xor_syndrome)
|
|
pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
|
|
(bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
|
|
raid6_call = *best;
|
|
} else
|
|
pr_err("raid6: Yikes! No algorithm found!\n");
|
|
|
|
return best;
|
|
}
|
|
|
|
|
|
/* Try to pick the best algorithm */
|
|
/* This code uses the gfmul table as convenient data set to abuse */
|
|
|
|
int __init raid6_select_algo(void)
|
|
{
|
|
const int disks = (65536/PAGE_SIZE)+2;
|
|
|
|
const struct raid6_calls *gen_best;
|
|
const struct raid6_recov_calls *rec_best;
|
|
char *syndromes;
|
|
void *dptrs[(65536/PAGE_SIZE)+2];
|
|
int i;
|
|
|
|
for (i = 0; i < disks-2; i++)
|
|
dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
|
|
|
|
/* Normal code - use a 2-page allocation to avoid D$ conflict */
|
|
syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
|
|
|
|
if (!syndromes) {
|
|
pr_err("raid6: Yikes! No memory available.\n");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
dptrs[disks-2] = syndromes;
|
|
dptrs[disks-1] = syndromes + PAGE_SIZE;
|
|
|
|
/* select raid gen_syndrome function */
|
|
gen_best = raid6_choose_gen(&dptrs, disks);
|
|
|
|
/* select raid recover functions */
|
|
rec_best = raid6_choose_recov();
|
|
|
|
free_pages((unsigned long)syndromes, 1);
|
|
|
|
return gen_best && rec_best ? 0 : -EINVAL;
|
|
}
|
|
|
|
static void raid6_exit(void)
|
|
{
|
|
do { } while (0);
|
|
}
|
|
|
|
subsys_initcall(raid6_select_algo);
|
|
module_exit(raid6_exit);
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_DESCRIPTION("RAID6 Q-syndrome calculations");
|