crypto: cast6-avx: use new optimized XTS code

Change cast6-avx to use the new XTS code, for smaller stack usage and small
boost to performance.

tcrypt results, with Intel i5-2450M:
        enc     dec
16B     1.01x   1.01x
64B     1.01x   1.00x
256B    1.09x   1.02x
1024B   1.08x   1.06x
8192B   1.08x   1.07x

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Jussi Kivilinna 2013-04-08 21:51:05 +03:00 committed by Herbert Xu
parent 18be45270a
commit 70177286e1
2 changed files with 98 additions and 41 deletions

View File

@ -4,7 +4,7 @@
* Copyright (C) 2012 Johannes Goetzfried * Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
* *
* Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -227,6 +227,8 @@
.data .data
.align 16 .align 16
.Lxts_gf128mul_and_shl1_mask:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
.Lbswap_mask: .Lbswap_mask:
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
.Lbswap128_mask: .Lbswap128_mask:
@ -424,3 +426,47 @@ ENTRY(cast6_ctr_8way)
ret; ret;
ENDPROC(cast6_ctr_8way) ENDPROC(cast6_ctr_8way)
ENTRY(cast6_xts_enc_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (t α GF(2¹²))
*/
movq %rsi, %r11;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask);
call __cast6_enc_blk8;
/* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
ret;
ENDPROC(cast6_xts_enc_8way)
ENTRY(cast6_xts_dec_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (t α GF(2¹²))
*/
movq %rsi, %r11;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask);
call __cast6_dec_blk8;
/* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
ret;
ENDPROC(cast6_xts_dec_8way)

View File

@ -4,6 +4,8 @@
* Copyright (C) 2012 Johannes Goetzfried * Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
* *
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
@ -50,6 +52,23 @@ asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst,
asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src, asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src,
le128 *iv); le128 *iv);
asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(__cast6_encrypt));
}
static void cast6_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(__cast6_decrypt));
}
static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{ {
be128 ctrblk; be128 ctrblk;
@ -87,6 +106,19 @@ static const struct common_glue_ctx cast6_ctr = {
} } } }
}; };
static const struct common_glue_ctx cast6_enc_xts = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc_8way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc) }
} }
};
static const struct common_glue_ctx cast6_dec = { static const struct common_glue_ctx cast6_dec = {
.num_funcs = 2, .num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
@ -113,6 +145,19 @@ static const struct common_glue_ctx cast6_dec_cbc = {
} } } }
}; };
static const struct common_glue_ctx cast6_dec_xts = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec_8way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec) }
} }
};
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes) struct scatterlist *src, unsigned int nbytes)
{ {
@ -307,54 +352,20 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes) struct scatterlist *src, unsigned int nbytes)
{ {
struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAST6_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx, return glue_xts_crypt_128bit(&cast6_enc_xts, desc, dst, src, nbytes,
.tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt), XTS_TWEAK_CAST(__cast6_encrypt),
.crypt_ctx = &crypt_ctx, &ctx->tweak_ctx, &ctx->crypt_ctx);
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
cast6_fpu_end(crypt_ctx.fpu_enabled);
return ret;
} }
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes) struct scatterlist *src, unsigned int nbytes)
{ {
struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[CAST6_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
};
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.tweak_ctx = &ctx->tweak_ctx, return glue_xts_crypt_128bit(&cast6_dec_xts, desc, dst, src, nbytes,
.tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt), XTS_TWEAK_CAST(__cast6_encrypt),
.crypt_ctx = &crypt_ctx, &ctx->tweak_ctx, &ctx->crypt_ctx);
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = xts_crypt(desc, dst, src, nbytes, &req);
cast6_fpu_end(crypt_ctx.fpu_enabled);
return ret;
} }
static struct crypto_alg cast6_algs[10] = { { static struct crypto_alg cast6_algs[10] = { {