fast_hash: avoid indirect function calls

By default the arch_fast_hash hashing function pointers are initialized
to jhash(2). If during boot-up a CPU with SSE4.2 is detected they get
updated to the CRC32 ones. This dispatching scheme incurs a function
pointer lookup and indirect call for every hashing operation.

rhashtable as a user of arch_fast_hash e.g. stores pointers to hashing
functions in its structure, too, causing two indirect branches per
hashing operation.

Using alternative_call we can get away with one of those indirect branches.

Acked-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Hannes Frederic Sowa 2014-11-05 00:23:04 +01:00 committed by David S. Miller
parent 2c99cd914d
commit e5a2c89995
6 changed files with 98 additions and 93 deletions

View File

@ -1,7 +1,48 @@
#ifndef _ASM_X86_HASH_H #ifndef __ASM_X86_HASH_H
#define _ASM_X86_HASH_H #define __ASM_X86_HASH_H
struct fast_hash_ops; #include <linux/cpufeature.h>
extern void setup_arch_fast_hash(struct fast_hash_ops *ops); #include <asm/alternative.h>
#endif /* _ASM_X86_HASH_H */ u32 __intel_crc4_2_hash(const void *data, u32 len, u32 seed);
u32 __intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed);
/*
* non-inline versions of jhash so gcc does not need to generate
* duplicate code in every object file
*/
u32 __jhash(const void *data, u32 len, u32 seed);
u32 __jhash2(const u32 *data, u32 len, u32 seed);
/*
* for documentation of these functions please look into
* <include/asm-generic/hash.h>
*/
static inline u32 arch_fast_hash(const void *data, u32 len, u32 seed)
{
u32 hash;
alternative_call(__jhash, __intel_crc4_2_hash, X86_FEATURE_XMM4_2,
#ifdef CONFIG_X86_64
"=a" (hash), "D" (data), "S" (len), "d" (seed));
#else
"=a" (hash), "a" (data), "d" (len), "c" (seed));
#endif
return hash;
}
static inline u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
{
u32 hash;
alternative_call(__jhash2, __intel_crc4_2_hash2, X86_FEATURE_XMM4_2,
#ifdef CONFIG_X86_64
"=a" (hash), "D" (data), "S" (len), "d" (seed));
#else
"=a" (hash), "a" (data), "d" (len), "c" (seed));
#endif
return hash;
}
#endif /* __ASM_X86_HASH_H */

View File

@ -31,13 +31,13 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <linux/hash.h>
#include <linux/init.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/hash.h> #include <asm/hash.h>
#include <linux/hash.h>
#include <linux/jhash.h>
static inline u32 crc32_u32(u32 crc, u32 val) static inline u32 crc32_u32(u32 crc, u32 val)
{ {
#ifdef CONFIG_AS_CRC32 #ifdef CONFIG_AS_CRC32
@ -48,7 +48,7 @@ static inline u32 crc32_u32(u32 crc, u32 val)
return crc; return crc;
} }
static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed) u32 __intel_crc4_2_hash(const void *data, u32 len, u32 seed)
{ {
const u32 *p32 = (const u32 *) data; const u32 *p32 = (const u32 *) data;
u32 i, tmp = 0; u32 i, tmp = 0;
@ -71,22 +71,27 @@ static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed)
return seed; return seed;
} }
EXPORT_SYMBOL(__intel_crc4_2_hash);
static u32 intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed) u32 __intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed)
{ {
const u32 *p32 = (const u32 *) data;
u32 i; u32 i;
for (i = 0; i < len; i++) for (i = 0; i < len; i++)
seed = crc32_u32(seed, *p32++); seed = crc32_u32(seed, *data++);
return seed; return seed;
} }
EXPORT_SYMBOL(__intel_crc4_2_hash2);
void __init setup_arch_fast_hash(struct fast_hash_ops *ops) u32 __jhash(const void *data, u32 len, u32 seed)
{ {
if (cpu_has_xmm4_2) { return jhash(data, len, seed);
ops->hash = intel_crc4_2_hash;
ops->hash2 = intel_crc4_2_hash2;
}
} }
EXPORT_SYMBOL(__jhash);
u32 __jhash2(const u32 *data, u32 len, u32 seed)
{
return jhash2(data, len, seed);
}
EXPORT_SYMBOL(__jhash2);

View File

@ -1,9 +1,41 @@
#ifndef __ASM_GENERIC_HASH_H #ifndef __ASM_GENERIC_HASH_H
#define __ASM_GENERIC_HASH_H #define __ASM_GENERIC_HASH_H
struct fast_hash_ops; #include <linux/jhash.h>
static inline void setup_arch_fast_hash(struct fast_hash_ops *ops)
/**
* arch_fast_hash - Caclulates a hash over a given buffer that can have
* arbitrary size. This function will eventually use an
* architecture-optimized hashing implementation if
* available, and trades off distribution for speed.
*
* @data: buffer to hash
* @len: length of buffer in bytes
* @seed: start seed
*
* Returns 32bit hash.
*/
static inline u32 arch_fast_hash(const void *data, u32 len, u32 seed)
{ {
return jhash(data, len, seed);
}
/**
* arch_fast_hash2 - Caclulates a hash over a given buffer that has a
* size that is of a multiple of 32bit words. This
* function will eventually use an architecture-
* optimized hashing implementation if available,
* and trades off distribution for speed.
*
* @data: buffer to hash (must be 32bit padded)
* @len: number of 32bit words
* @seed: start seed
*
* Returns 32bit hash.
*/
static inline u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
{
return jhash2(data, len, seed);
} }
#endif /* __ASM_GENERIC_HASH_H */ #endif /* __ASM_GENERIC_HASH_H */

View File

@ -84,38 +84,4 @@ static inline u32 hash32_ptr(const void *ptr)
return (u32)val; return (u32)val;
} }
struct fast_hash_ops {
u32 (*hash)(const void *data, u32 len, u32 seed);
u32 (*hash2)(const u32 *data, u32 len, u32 seed);
};
/**
* arch_fast_hash - Caclulates a hash over a given buffer that can have
* arbitrary size. This function will eventually use an
* architecture-optimized hashing implementation if
* available, and trades off distribution for speed.
*
* @data: buffer to hash
* @len: length of buffer in bytes
* @seed: start seed
*
* Returns 32bit hash.
*/
extern u32 arch_fast_hash(const void *data, u32 len, u32 seed);
/**
* arch_fast_hash2 - Caclulates a hash over a given buffer that has a
* size that is of a multiple of 32bit words. This
* function will eventually use an architecture-
* optimized hashing implementation if available,
* and trades off distribution for speed.
*
* @data: buffer to hash (must be 32bit padded)
* @len: number of 32bit words
* @seed: start seed
*
* Returns 32bit hash.
*/
extern u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed);
#endif /* _LINUX_HASH_H */ #endif /* _LINUX_HASH_H */

View File

@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
percpu-refcount.o percpu_ida.o hash.o rhashtable.o percpu-refcount.o percpu_ida.o rhashtable.o
obj-y += string_helpers.o obj-y += string_helpers.o
obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
obj-y += kstrtox.o obj-y += kstrtox.o

View File

@ -1,39 +0,0 @@
/* General purpose hashing library
*
* That's a start of a kernel hashing library, which can be extended
* with further algorithms in future. arch_fast_hash{2,}() will
* eventually resolve to an architecture optimized implementation.
*
* Copyright 2013 Francesco Fusco <ffusco@redhat.com>
* Copyright 2013 Daniel Borkmann <dborkman@redhat.com>
* Copyright 2013 Thomas Graf <tgraf@redhat.com>
* Licensed under the GNU General Public License, version 2.0 (GPLv2)
*/
#include <linux/jhash.h>
#include <linux/hash.h>
#include <linux/cache.h>
static struct fast_hash_ops arch_hash_ops __read_mostly = {
.hash = jhash,
.hash2 = jhash2,
};
u32 arch_fast_hash(const void *data, u32 len, u32 seed)
{
return arch_hash_ops.hash(data, len, seed);
}
EXPORT_SYMBOL_GPL(arch_fast_hash);
u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
{
return arch_hash_ops.hash2(data, len, seed);
}
EXPORT_SYMBOL_GPL(arch_fast_hash2);
static int __init hashlib_init(void)
{
setup_arch_fast_hash(&arch_hash_ops);
return 0;
}
early_initcall(hashlib_init);