crypto: aesni - AVX and AVX2 version of AESNI-GCM encode and decode
We have added AVX and AVX2 routines that optimize AESNI-GCM encode/decode. These routines are optimized for encrypt and decrypt of large buffers. In tests we have seen up to 6% speedup for 1K, 11% speedup for 2K and 18% speedup for 8K buffer over the existing SSE version. These routines should provide even better speedup for future Intel x86_64 cpus. Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
fed286110f
commit
d764593af9
|
@ -75,7 +75,7 @@ ifeq ($(avx2_supported),yes)
|
||||||
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
|
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
|
||||||
endif
|
endif
|
||||||
|
|
||||||
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
|
aesni-intel-y := aesni-intel_asm.o aesni-intel_avx.o aesni-intel_glue.o fpu.o
|
||||||
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
|
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
|
||||||
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
|
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
|
||||||
crc32c-intel-y := crc32c-intel_glue.o
|
crc32c-intel-y := crc32c-intel_glue.o
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -101,6 +101,9 @@ asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
|
||||||
int crypto_fpu_init(void);
|
int crypto_fpu_init(void);
|
||||||
void crypto_fpu_exit(void);
|
void crypto_fpu_exit(void);
|
||||||
|
|
||||||
|
#define AVX_GEN2_OPTSIZE 640
|
||||||
|
#define AVX_GEN4_OPTSIZE 4096
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
|
asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
|
||||||
const u8 *in, unsigned int len, u8 *iv);
|
const u8 *in, unsigned int len, u8 *iv);
|
||||||
|
@ -150,6 +153,123 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out,
|
||||||
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||||
u8 *auth_tag, unsigned long auth_tag_len);
|
u8 *auth_tag, unsigned long auth_tag_len);
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef CONFIG_AS_AVX
|
||||||
|
/*
|
||||||
|
* asmlinkage void aesni_gcm_precomp_avx_gen2()
|
||||||
|
* gcm_data *my_ctx_data, context data
|
||||||
|
* u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
|
||||||
|
*/
|
||||||
|
asmlinkage void aesni_gcm_precomp_avx_gen2(void *my_ctx_data, u8 *hash_subkey);
|
||||||
|
|
||||||
|
asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx, u8 *out,
|
||||||
|
const u8 *in, unsigned long plaintext_len, u8 *iv,
|
||||||
|
const u8 *aad, unsigned long aad_len,
|
||||||
|
u8 *auth_tag, unsigned long auth_tag_len);
|
||||||
|
|
||||||
|
asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out,
|
||||||
|
const u8 *in, unsigned long ciphertext_len, u8 *iv,
|
||||||
|
const u8 *aad, unsigned long aad_len,
|
||||||
|
u8 *auth_tag, unsigned long auth_tag_len);
|
||||||
|
|
||||||
|
static void aesni_gcm_enc_avx(void *ctx, u8 *out,
|
||||||
|
const u8 *in, unsigned long plaintext_len, u8 *iv,
|
||||||
|
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||||
|
u8 *auth_tag, unsigned long auth_tag_len)
|
||||||
|
{
|
||||||
|
if (plaintext_len < AVX_GEN2_OPTSIZE) {
|
||||||
|
aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
|
||||||
|
aad_len, auth_tag, auth_tag_len);
|
||||||
|
} else {
|
||||||
|
aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
|
||||||
|
aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
|
||||||
|
aad_len, auth_tag, auth_tag_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void aesni_gcm_dec_avx(void *ctx, u8 *out,
|
||||||
|
const u8 *in, unsigned long ciphertext_len, u8 *iv,
|
||||||
|
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||||
|
u8 *auth_tag, unsigned long auth_tag_len)
|
||||||
|
{
|
||||||
|
if (ciphertext_len < AVX_GEN2_OPTSIZE) {
|
||||||
|
aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad,
|
||||||
|
aad_len, auth_tag, auth_tag_len);
|
||||||
|
} else {
|
||||||
|
aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
|
||||||
|
aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
|
||||||
|
aad_len, auth_tag, auth_tag_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_AS_AVX2
|
||||||
|
/*
|
||||||
|
* asmlinkage void aesni_gcm_precomp_avx_gen4()
|
||||||
|
* gcm_data *my_ctx_data, context data
|
||||||
|
* u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
|
||||||
|
*/
|
||||||
|
asmlinkage void aesni_gcm_precomp_avx_gen4(void *my_ctx_data, u8 *hash_subkey);
|
||||||
|
|
||||||
|
asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx, u8 *out,
|
||||||
|
const u8 *in, unsigned long plaintext_len, u8 *iv,
|
||||||
|
const u8 *aad, unsigned long aad_len,
|
||||||
|
u8 *auth_tag, unsigned long auth_tag_len);
|
||||||
|
|
||||||
|
asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out,
|
||||||
|
const u8 *in, unsigned long ciphertext_len, u8 *iv,
|
||||||
|
const u8 *aad, unsigned long aad_len,
|
||||||
|
u8 *auth_tag, unsigned long auth_tag_len);
|
||||||
|
|
||||||
|
static void aesni_gcm_enc_avx2(void *ctx, u8 *out,
|
||||||
|
const u8 *in, unsigned long plaintext_len, u8 *iv,
|
||||||
|
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||||
|
u8 *auth_tag, unsigned long auth_tag_len)
|
||||||
|
{
|
||||||
|
if (plaintext_len < AVX_GEN2_OPTSIZE) {
|
||||||
|
aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
|
||||||
|
aad_len, auth_tag, auth_tag_len);
|
||||||
|
} else if (plaintext_len < AVX_GEN4_OPTSIZE) {
|
||||||
|
aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
|
||||||
|
aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
|
||||||
|
aad_len, auth_tag, auth_tag_len);
|
||||||
|
} else {
|
||||||
|
aesni_gcm_precomp_avx_gen4(ctx, hash_subkey);
|
||||||
|
aesni_gcm_enc_avx_gen4(ctx, out, in, plaintext_len, iv, aad,
|
||||||
|
aad_len, auth_tag, auth_tag_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void aesni_gcm_dec_avx2(void *ctx, u8 *out,
|
||||||
|
const u8 *in, unsigned long ciphertext_len, u8 *iv,
|
||||||
|
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||||
|
u8 *auth_tag, unsigned long auth_tag_len)
|
||||||
|
{
|
||||||
|
if (ciphertext_len < AVX_GEN2_OPTSIZE) {
|
||||||
|
aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey,
|
||||||
|
aad, aad_len, auth_tag, auth_tag_len);
|
||||||
|
} else if (ciphertext_len < AVX_GEN4_OPTSIZE) {
|
||||||
|
aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
|
||||||
|
aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
|
||||||
|
aad_len, auth_tag, auth_tag_len);
|
||||||
|
} else {
|
||||||
|
aesni_gcm_precomp_avx_gen4(ctx, hash_subkey);
|
||||||
|
aesni_gcm_dec_avx_gen4(ctx, out, in, ciphertext_len, iv, aad,
|
||||||
|
aad_len, auth_tag, auth_tag_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void (*aesni_gcm_enc_tfm)(void *ctx, u8 *out,
|
||||||
|
const u8 *in, unsigned long plaintext_len, u8 *iv,
|
||||||
|
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||||
|
u8 *auth_tag, unsigned long auth_tag_len);
|
||||||
|
|
||||||
|
static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out,
|
||||||
|
const u8 *in, unsigned long ciphertext_len, u8 *iv,
|
||||||
|
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||||
|
u8 *auth_tag, unsigned long auth_tag_len);
|
||||||
|
|
||||||
static inline struct
|
static inline struct
|
||||||
aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
|
aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
|
||||||
{
|
{
|
||||||
|
@ -915,7 +1035,7 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
|
||||||
dst = src;
|
dst = src;
|
||||||
}
|
}
|
||||||
|
|
||||||
aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv,
|
aesni_gcm_enc_tfm(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv,
|
||||||
ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst
|
ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst
|
||||||
+ ((unsigned long)req->cryptlen), auth_tag_len);
|
+ ((unsigned long)req->cryptlen), auth_tag_len);
|
||||||
|
|
||||||
|
@ -996,7 +1116,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
|
||||||
dst = src;
|
dst = src;
|
||||||
}
|
}
|
||||||
|
|
||||||
aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv,
|
aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv,
|
||||||
ctx->hash_subkey, assoc, (unsigned long)req->assoclen,
|
ctx->hash_subkey, assoc, (unsigned long)req->assoclen,
|
||||||
authTag, auth_tag_len);
|
authTag, auth_tag_len);
|
||||||
|
|
||||||
|
@ -1353,6 +1473,25 @@ static int __init aesni_init(void)
|
||||||
|
|
||||||
if (!x86_match_cpu(aesni_cpu_id))
|
if (!x86_match_cpu(aesni_cpu_id))
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
#ifdef CONFIG_AS_AVX2
|
||||||
|
if (boot_cpu_has(X86_FEATURE_AVX2)) {
|
||||||
|
pr_info("AVX2 version of gcm_enc/dec engaged.\n");
|
||||||
|
aesni_gcm_enc_tfm = aesni_gcm_enc_avx2;
|
||||||
|
aesni_gcm_dec_tfm = aesni_gcm_dec_avx2;
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_AS_AVX
|
||||||
|
if (boot_cpu_has(X86_FEATURE_AVX)) {
|
||||||
|
pr_info("AVX version of gcm_enc/dec engaged.\n");
|
||||||
|
aesni_gcm_enc_tfm = aesni_gcm_enc_avx;
|
||||||
|
aesni_gcm_dec_tfm = aesni_gcm_dec_avx;
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
pr_info("SSE version of gcm_enc/dec engaged.\n");
|
||||||
|
aesni_gcm_enc_tfm = aesni_gcm_enc;
|
||||||
|
aesni_gcm_dec_tfm = aesni_gcm_dec;
|
||||||
|
}
|
||||||
|
|
||||||
err = crypto_fpu_init();
|
err = crypto_fpu_init();
|
||||||
if (err)
|
if (err)
|
||||||
|
|
Loading…
Reference in New Issue