c70f4abef0
Implements an x86_64 assembler driver for the Poly1305 authenticator. This single block variant holds the 130-bit integer in 5 32-bit words, but uses SSE to do two multiplications/additions in parallel. When calling updates with small blocks, the overhead for kernel_fpu_begin/ kernel_fpu_end() negates the perfmance gain. We therefore use the poly1305-generic fallback for small updates. For large messages, throughput increases by ~5-10% compared to poly1305-generic: testing speed of poly1305 (poly1305-generic) test 0 ( 96 byte blocks, 16 bytes per update, 6 updates): 4080026 opers/sec, 391682496 bytes/sec test 1 ( 96 byte blocks, 32 bytes per update, 3 updates): 6221094 opers/sec, 597225024 bytes/sec test 2 ( 96 byte blocks, 96 bytes per update, 1 updates): 9609750 opers/sec, 922536057 bytes/sec test 3 ( 288 byte blocks, 16 bytes per update, 18 updates): 1459379 opers/sec, 420301267 bytes/sec test 4 ( 288 byte blocks, 32 bytes per update, 9 updates): 2115179 opers/sec, 609171609 bytes/sec test 5 ( 288 byte blocks, 288 bytes per update, 1 updates): 3729874 opers/sec, 1074203856 bytes/sec test 6 ( 1056 byte blocks, 32 bytes per update, 33 updates): 593000 opers/sec, 626208000 bytes/sec test 7 ( 1056 byte blocks, 1056 bytes per update, 1 updates): 1081536 opers/sec, 1142102332 bytes/sec test 8 ( 2080 byte blocks, 32 bytes per update, 65 updates): 302077 opers/sec, 628320576 bytes/sec test 9 ( 2080 byte blocks, 2080 bytes per update, 1 updates): 554384 opers/sec, 1153120176 bytes/sec test 10 ( 4128 byte blocks, 4128 bytes per update, 1 updates): 278715 opers/sec, 1150536345 bytes/sec test 11 ( 8224 byte blocks, 8224 bytes per update, 1 updates): 140202 opers/sec, 1153022070 bytes/sec testing speed of poly1305 (poly1305-simd) test 0 ( 96 byte blocks, 16 bytes per update, 6 updates): 3790063 opers/sec, 363846076 bytes/sec test 1 ( 96 byte blocks, 32 bytes per update, 3 updates): 5913378 opers/sec, 567684355 bytes/sec test 2 ( 96 byte blocks, 96 bytes per update, 1 updates): 9352574 opers/sec, 897847104 bytes/sec test 3 ( 288 byte blocks, 16 bytes per update, 18 updates): 1362145 opers/sec, 392297990 bytes/sec test 4 ( 288 byte blocks, 32 bytes per update, 9 updates): 2007075 opers/sec, 578037628 bytes/sec test 5 ( 288 byte blocks, 288 bytes per update, 1 updates): 3709811 opers/sec, 1068425798 bytes/sec test 6 ( 1056 byte blocks, 32 bytes per update, 33 updates): 566272 opers/sec, 597984182 bytes/sec test 7 ( 1056 byte blocks, 1056 bytes per update, 1 updates): 1111657 opers/sec, 1173910108 bytes/sec test 8 ( 2080 byte blocks, 32 bytes per update, 65 updates): 288857 opers/sec, 600823808 bytes/sec test 9 ( 2080 byte blocks, 2080 bytes per update, 1 updates): 590746 opers/sec, 1228751888 bytes/sec test 10 ( 4128 byte blocks, 4128 bytes per update, 1 updates): 301825 opers/sec, 1245936902 bytes/sec test 11 ( 8224 byte blocks, 8224 bytes per update, 1 updates): 153075 opers/sec, 1258896201 bytes/sec Benchmark results from a Core i5-4670T. Signed-off-by: Martin Willi <martin@strongswan.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
||
---|---|---|
.. | ||
asymmetric_keys | ||
async_tx | ||
.gitignore | ||
842.c | ||
ablk_helper.c | ||
ablkcipher.c | ||
aead.c | ||
aes_generic.c | ||
af_alg.c | ||
ahash.c | ||
akcipher.c | ||
algapi.c | ||
algboss.c | ||
algif_aead.c | ||
algif_hash.c | ||
algif_rng.c | ||
algif_skcipher.c | ||
ansi_cprng.c | ||
anubis.c | ||
api.c | ||
arc4.c | ||
authenc.c | ||
authencesn.c | ||
blkcipher.c | ||
blowfish_common.c | ||
blowfish_generic.c | ||
camellia_generic.c | ||
cast5_generic.c | ||
cast6_generic.c | ||
cast_common.c | ||
cbc.c | ||
ccm.c | ||
chacha20_generic.c | ||
chacha20poly1305.c | ||
chainiv.c | ||
cipher.c | ||
cmac.c | ||
compress.c | ||
crc32.c | ||
crc32c_generic.c | ||
crct10dif_common.c | ||
crct10dif_generic.c | ||
cryptd.c | ||
crypto_null.c | ||
crypto_user.c | ||
crypto_wq.c | ||
ctr.c | ||
cts.c | ||
deflate.c | ||
des_generic.c | ||
drbg.c | ||
ecb.c | ||
echainiv.c | ||
eseqiv.c | ||
fcrypt.c | ||
fips.c | ||
gcm.c | ||
gf128mul.c | ||
ghash-generic.c | ||
hash_info.c | ||
hmac.c | ||
internal.h | ||
jitterentropy-kcapi.c | ||
jitterentropy.c | ||
Kconfig | ||
khazad.c | ||
lrw.c | ||
lz4.c | ||
lz4hc.c | ||
lzo.c | ||
Makefile | ||
mcryptd.c | ||
md4.c | ||
md5.c | ||
memneq.c | ||
michael_mic.c | ||
pcbc.c | ||
pcompress.c | ||
pcrypt.c | ||
poly1305_generic.c | ||
proc.c | ||
ripemd.h | ||
rmd128.c | ||
rmd160.c | ||
rmd256.c | ||
rmd320.c | ||
rng.c | ||
rsa_helper.c | ||
rsa.c | ||
rsakey.asn1 | ||
salsa20_generic.c | ||
scatterwalk.c | ||
seed.c | ||
seqiv.c | ||
serpent_generic.c | ||
sha1_generic.c | ||
sha256_generic.c | ||
sha512_generic.c | ||
shash.c | ||
tcrypt.c | ||
tcrypt.h | ||
tea.c | ||
testmgr.c | ||
testmgr.h | ||
tgr192.c | ||
twofish_common.c | ||
twofish_generic.c | ||
vmac.c | ||
wp512.c | ||
xcbc.c | ||
xor.c | ||
xts.c | ||
zlib.c |