linux/arch/arm/lib/xor-neon.c

/*
 * linux/arch/arm/lib/xor-neon.c
 *
 * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/raid/xor.h>
#include <linux/module.h>

MODULE_LICENSE("GPL");

#ifndef __ARM_NEON__
#error You should compile this file with '-mfloat-abi=softfp -mfpu=neon'
#endif

/*
 * Pull in the reference implementations while instructing GCC (through
 * -ftree-vectorize) to attempt to exploit implicit parallelism and emit
 * NEON instructions.
 */
#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
#pragma GCC optimize "tree-vectorize"
#else
/*
 * While older versions of GCC do not generate incorrect code, they fail to
 * recognize the parallel nature of these functions, and emit plain ARM code,
 * which is known to be slower than the optimized ARM code in asm-arm/xor.h.
 */
#warning This code requires at least version 4.6 of GCC
#endif

#pragma GCC diagnostic ignored "-Wunused-variable"
#include <asm-generic/xor.h>

struct xor_block_template const xor_block_neon_inner = {
	.name	= "__inner_neon__",
	.do_2	= xor_8regs_2,
	.do_3	= xor_8regs_3,
	.do_4	= xor_8regs_4,
	.do_5	= xor_8regs_5,
};
EXPORT_SYMBOL(xor_block_neon_inner);
ARM: crypto: add NEON accelerated XOR implementation Add a source file xor-neon.c (which is really just the reference C implementation passed through the GCC vectorizer) and hook it up to the XOR framework. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Acked-by: Nicolas Pitre <nico@linaro.org> 2013-05-17 18:51:23 +02:00			`/*`
			`* linux/arch/arm/lib/xor-neon.c`
			`*`
			`* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>`
			`*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License version 2 as`
			`* published by the Free Software Foundation.`
			`*/`

			`#include <linux/raid/xor.h>`
ARM: 7835/2: fix modular build of xor_blocks() with NEON enabled Commit 0195659 introduced a NEON accelerated version of the xor_blocks() function, but it needs the changes in this patch to allow it to be built as a module rather than statically into the kernel. This patch creates a separate module xor-neon.ko which exports the NEON inner xor_blocks() functions depended upon by the regular xor.ko if it is built with CONFIG_KERNEL_MODE_NEON=y Reported-by: Josh Boyer <jwboyer@fedoraproject.org> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> 2013-09-09 16:08:38 +02:00			`#include <linux/module.h>`

			`MODULE_LICENSE("GPL");`
ARM: crypto: add NEON accelerated XOR implementation Add a source file xor-neon.c (which is really just the reference C implementation passed through the GCC vectorizer) and hook it up to the XOR framework. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Acked-by: Nicolas Pitre <nico@linaro.org> 2013-05-17 18:51:23 +02:00
			`#ifndef __ARM_NEON__`
			`#error You should compile this file with '-mfloat-abi=softfp -mfpu=neon'`
			`#endif`

			`/*`
			`* Pull in the reference implementations while instructing GCC (through`
			`* -ftree-vectorize) to attempt to exploit implicit parallelism and emit`
			`* NEON instructions.`
			`*/`
			`#if __GNUC__ > 4 \|\| (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)`
			`#pragma GCC optimize "tree-vectorize"`
			`#else`
			`/*`
			`* While older versions of GCC do not generate incorrect code, they fail to`
			`* recognize the parallel nature of these functions, and emit plain ARM code,`
			`* which is known to be slower than the optimized ARM code in asm-arm/xor.h.`
			`*/`
			`#warning This code requires at least version 4.6 of GCC`
			`#endif`

			`#pragma GCC diagnostic ignored "-Wunused-variable"`
			`#include <asm-generic/xor.h>`

			`struct xor_block_template const xor_block_neon_inner = {`
			`.name = "__inner_neon__",`
			`.do_2 = xor_8regs_2,`
			`.do_3 = xor_8regs_3,`
			`.do_4 = xor_8regs_4,`
			`.do_5 = xor_8regs_5,`
			`};`
ARM: 7835/2: fix modular build of xor_blocks() with NEON enabled Commit 0195659 introduced a NEON accelerated version of the xor_blocks() function, but it needs the changes in this patch to allow it to be built as a module rather than statically into the kernel. This patch creates a separate module xor-neon.ko which exports the NEON inner xor_blocks() functions depended upon by the regular xor.ko if it is built with CONFIG_KERNEL_MODE_NEON=y Reported-by: Josh Boyer <jwboyer@fedoraproject.org> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> 2013-09-09 16:08:38 +02:00			`EXPORT_SYMBOL(xor_block_neon_inner);`