linux/arch/powerpc/crypto/aes-spe-modes.S

631 lines
15 KiB
ArmAsm
Raw Normal View History

/*
* AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <asm/ppc_asm.h>
#include "aes-spe-regs.h"
#ifdef __BIG_ENDIAN__ /* Macros for big endian builds */
#define LOAD_DATA(reg, off) \
lwz reg,off(rSP); /* load with offset */
#define SAVE_DATA(reg, off) \
stw reg,off(rDP); /* save with offset */
#define NEXT_BLOCK \
addi rSP,rSP,16; /* increment pointers per bloc */ \
addi rDP,rDP,16;
#define LOAD_IV(reg, off) \
lwz reg,off(rIP); /* IV loading with offset */
#define SAVE_IV(reg, off) \
stw reg,off(rIP); /* IV saving with offset */
#define START_IV /* nothing to reset */
#define CBC_DEC 16 /* CBC decrement per block */
#define CTR_DEC 1 /* CTR decrement one byte */
#else /* Macros for little endian */
#define LOAD_DATA(reg, off) \
lwbrx reg,0,rSP; /* load reversed */ \
addi rSP,rSP,4; /* and increment pointer */
#define SAVE_DATA(reg, off) \
stwbrx reg,0,rDP; /* save reversed */ \
addi rDP,rDP,4; /* and increment pointer */
#define NEXT_BLOCK /* nothing todo */
#define LOAD_IV(reg, off) \
lwbrx reg,0,rIP; /* load reversed */ \
addi rIP,rIP,4; /* and increment pointer */
#define SAVE_IV(reg, off) \
stwbrx reg,0,rIP; /* load reversed */ \
addi rIP,rIP,4; /* and increment pointer */
#define START_IV \
subi rIP,rIP,16; /* must reset pointer */
#define CBC_DEC 32 /* 2 blocks because of incs */
#define CTR_DEC 17 /* 1 block because of incs */
#endif
#define SAVE_0_REGS
#define LOAD_0_REGS
#define SAVE_4_REGS \
stw rI0,96(r1); /* save 32 bit registers */ \
stw rI1,100(r1); \
stw rI2,104(r1); \
stw rI3,108(r1);
#define LOAD_4_REGS \
lwz rI0,96(r1); /* restore 32 bit registers */ \
lwz rI1,100(r1); \
lwz rI2,104(r1); \
lwz rI3,108(r1);
#define SAVE_8_REGS \
SAVE_4_REGS \
stw rG0,112(r1); /* save 32 bit registers */ \
stw rG1,116(r1); \
stw rG2,120(r1); \
stw rG3,124(r1);
#define LOAD_8_REGS \
LOAD_4_REGS \
lwz rG0,112(r1); /* restore 32 bit registers */ \
lwz rG1,116(r1); \
lwz rG2,120(r1); \
lwz rG3,124(r1);
#define INITIALIZE_CRYPT(tab,nr32bitregs) \
mflr r0; \
stwu r1,-160(r1); /* create stack frame */ \
lis rT0,tab@h; /* en-/decryption table pointer */ \
stw r0,8(r1); /* save link register */ \
ori rT0,rT0,tab@l; \
evstdw r14,16(r1); \
mr rKS,rKP; \
evstdw r15,24(r1); /* We must save non volatile */ \
evstdw r16,32(r1); /* registers. Take the chance */ \
evstdw r17,40(r1); /* and save the SPE part too */ \
evstdw r18,48(r1); \
evstdw r19,56(r1); \
evstdw r20,64(r1); \
evstdw r21,72(r1); \
evstdw r22,80(r1); \
evstdw r23,88(r1); \
SAVE_##nr32bitregs##_REGS
#define FINALIZE_CRYPT(nr32bitregs) \
lwz r0,8(r1); \
evldw r14,16(r1); /* restore SPE registers */ \
evldw r15,24(r1); \
evldw r16,32(r1); \
evldw r17,40(r1); \
evldw r18,48(r1); \
evldw r19,56(r1); \
evldw r20,64(r1); \
evldw r21,72(r1); \
evldw r22,80(r1); \
evldw r23,88(r1); \
LOAD_##nr32bitregs##_REGS \
mtlr r0; /* restore link register */ \
xor r0,r0,r0; \
stw r0,16(r1); /* delete sensitive data */ \
stw r0,24(r1); /* that we might have pushed */ \
stw r0,32(r1); /* from other context that runs */ \
stw r0,40(r1); /* the same code */ \
stw r0,48(r1); \
stw r0,56(r1); \
stw r0,64(r1); \
stw r0,72(r1); \
stw r0,80(r1); \
stw r0,88(r1); \
addi r1,r1,160; /* cleanup stack frame */
#define ENDIAN_SWAP(t0, t1, s0, s1) \
rotrwi t0,s0,8; /* swap endianness for 2 GPRs */ \
rotrwi t1,s1,8; \
rlwimi t0,s0,8,8,15; \
rlwimi t1,s1,8,8,15; \
rlwimi t0,s0,8,24,31; \
rlwimi t1,s1,8,24,31;
#define GF128_MUL(d0, d1, d2, d3, t0) \
li t0,0x87; /* multiplication in GF128 */ \
cmpwi d3,-1; \
iselgt t0,0,t0; \
rlwimi d3,d2,0,0,0; /* propagate "carry" bits */ \
rotlwi d3,d3,1; \
rlwimi d2,d1,0,0,0; \
rotlwi d2,d2,1; \
rlwimi d1,d0,0,0,0; \
slwi d0,d0,1; /* shift left 128 bit */ \
rotlwi d1,d1,1; \
xor d0,d0,t0;
#define START_KEY(d0, d1, d2, d3) \
lwz rW0,0(rKP); \
mtctr rRR; \
lwz rW1,4(rKP); \
lwz rW2,8(rKP); \
lwz rW3,12(rKP); \
xor rD0,d0,rW0; \
xor rD1,d1,rW1; \
xor rD2,d2,rW2; \
xor rD3,d3,rW3;
/*
* ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
* u32 rounds)
*
* called from glue layer to encrypt a single 16 byte block
* round values are AES128 = 4, AES192 = 5, AES256 = 6
*
*/
_GLOBAL(ppc_encrypt_aes)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
LOAD_DATA(rD0, 0)
LOAD_DATA(rD1, 4)
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_encrypt_block
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
FINALIZE_CRYPT(0)
blr
/*
* ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
* u32 rounds)
*
* called from glue layer to decrypt a single 16 byte block
* round values are AES128 = 4, AES192 = 5, AES256 = 6
*
*/
_GLOBAL(ppc_decrypt_aes)
INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
LOAD_DATA(rD0, 0)
addi rT1,rT0,4096
LOAD_DATA(rD1, 4)
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
FINALIZE_CRYPT(0)
blr
/*
* ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
* u32 rounds, u32 bytes);
*
* called from glue layer to encrypt multiple blocks via ECB
* Bytes must be larger or equal 16 and only whole blocks are
* processed. round values are AES128 = 4, AES192 = 5 and
* AES256 = 6
*
*/
_GLOBAL(ppc_encrypt_ecb)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
ppc_encrypt_ecb_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
cmpwi rLN,15
LOAD_DATA(rD3, 12)
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_encrypt_block
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
NEXT_BLOCK
bt gt,ppc_encrypt_ecb_loop
FINALIZE_CRYPT(0)
blr
/*
* ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
* u32 rounds, u32 bytes);
*
* called from glue layer to decrypt multiple blocks via ECB
* Bytes must be larger or equal 16 and only whole blocks are
* processed. round values are AES128 = 4, AES192 = 5 and
* AES256 = 6
*
*/
_GLOBAL(ppc_decrypt_ecb)
INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
addi rT1,rT0,4096
ppc_decrypt_ecb_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
cmpwi rLN,15
LOAD_DATA(rD3, 12)
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
NEXT_BLOCK
bt gt,ppc_decrypt_ecb_loop
FINALIZE_CRYPT(0)
blr
/*
* ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
* 32 rounds, u32 bytes, u8 *iv);
*
* called from glue layer to encrypt multiple blocks via CBC
* Bytes must be larger or equal 16 and only whole blocks are
* processed. round values are AES128 = 4, AES192 = 5 and
* AES256 = 6
*
*/
_GLOBAL(ppc_encrypt_cbc)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
LOAD_IV(rI0, 0)
LOAD_IV(rI1, 4)
LOAD_IV(rI2, 8)
LOAD_IV(rI3, 12)
ppc_encrypt_cbc_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
cmpwi rLN,15
LOAD_DATA(rD3, 12)
xor rD0,rD0,rI0
xor rD1,rD1,rI1
xor rD2,rD2,rI2
xor rD3,rD3,rI3
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_encrypt_block
xor rI0,rD0,rW0
SAVE_DATA(rI0, 0)
xor rI1,rD1,rW1
SAVE_DATA(rI1, 4)
xor rI2,rD2,rW2
SAVE_DATA(rI2, 8)
xor rI3,rD3,rW3
SAVE_DATA(rI3, 12)
NEXT_BLOCK
bt gt,ppc_encrypt_cbc_loop
START_IV
SAVE_IV(rI0, 0)
SAVE_IV(rI1, 4)
SAVE_IV(rI2, 8)
SAVE_IV(rI3, 12)
FINALIZE_CRYPT(4)
blr
/*
* ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
* u32 rounds, u32 bytes, u8 *iv);
*
* called from glue layer to decrypt multiple blocks via CBC
* round values are AES128 = 4, AES192 = 5, AES256 = 6
*
*/
_GLOBAL(ppc_decrypt_cbc)
INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
li rT1,15
LOAD_IV(rI0, 0)
andc rLN,rLN,rT1
LOAD_IV(rI1, 4)
subi rLN,rLN,16
LOAD_IV(rI2, 8)
add rSP,rSP,rLN /* reverse processing */
LOAD_IV(rI3, 12)
add rDP,rDP,rLN
LOAD_DATA(rD0, 0)
addi rT1,rT0,4096
LOAD_DATA(rD1, 4)
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
START_IV
SAVE_IV(rD0, 0)
SAVE_IV(rD1, 4)
SAVE_IV(rD2, 8)
cmpwi rLN,16
SAVE_IV(rD3, 12)
bt lt,ppc_decrypt_cbc_end
ppc_decrypt_cbc_loop:
mr rKP,rKS
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
subi rLN,rLN,16
subi rSP,rSP,CBC_DEC
xor rW0,rD0,rW0
LOAD_DATA(rD0, 0)
xor rW1,rD1,rW1
LOAD_DATA(rD1, 4)
xor rW2,rD2,rW2
LOAD_DATA(rD2, 8)
xor rW3,rD3,rW3
LOAD_DATA(rD3, 12)
xor rW0,rW0,rD0
SAVE_DATA(rW0, 0)
xor rW1,rW1,rD1
SAVE_DATA(rW1, 4)
xor rW2,rW2,rD2
SAVE_DATA(rW2, 8)
xor rW3,rW3,rD3
SAVE_DATA(rW3, 12)
cmpwi rLN,15
subi rDP,rDP,CBC_DEC
bt gt,ppc_decrypt_cbc_loop
ppc_decrypt_cbc_end:
mr rKP,rKS
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
xor rW0,rW0,rD0
xor rW1,rW1,rD1
xor rW2,rW2,rD2
xor rW3,rW3,rD3
xor rW0,rW0,rI0 /* decrypt with initial IV */
SAVE_DATA(rW0, 0)
xor rW1,rW1,rI1
SAVE_DATA(rW1, 4)
xor rW2,rW2,rI2
SAVE_DATA(rW2, 8)
xor rW3,rW3,rI3
SAVE_DATA(rW3, 12)
FINALIZE_CRYPT(4)
blr
/*
* ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
* u32 rounds, u32 bytes, u8 *iv);
*
* called from glue layer to encrypt/decrypt multiple blocks
* via CTR. Number of bytes does not need to be a multiple of
* 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
*
*/
_GLOBAL(ppc_crypt_ctr)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
LOAD_IV(rI0, 0)
LOAD_IV(rI1, 4)
LOAD_IV(rI2, 8)
cmpwi rLN,16
LOAD_IV(rI3, 12)
START_IV
bt lt,ppc_crypt_ctr_partial
ppc_crypt_ctr_loop:
mr rKP,rKS
START_KEY(rI0, rI1, rI2, rI3)
bl ppc_encrypt_block
xor rW0,rD0,rW0
xor rW1,rD1,rW1
xor rW2,rD2,rW2
xor rW3,rD3,rW3
LOAD_DATA(rD0, 0)
subi rLN,rLN,16
LOAD_DATA(rD1, 4)
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
addic rI3,rI3,1 /* increase counter */
addze rI2,rI2
addze rI1,rI1
addze rI0,rI0
NEXT_BLOCK
cmpwi rLN,15
bt gt,ppc_crypt_ctr_loop
ppc_crypt_ctr_partial:
cmpwi rLN,0
bt eq,ppc_crypt_ctr_end
mr rKP,rKS
START_KEY(rI0, rI1, rI2, rI3)
bl ppc_encrypt_block
xor rW0,rD0,rW0
SAVE_IV(rW0, 0)
xor rW1,rD1,rW1
SAVE_IV(rW1, 4)
xor rW2,rD2,rW2
SAVE_IV(rW2, 8)
xor rW3,rD3,rW3
SAVE_IV(rW3, 12)
mtctr rLN
subi rIP,rIP,CTR_DEC
subi rSP,rSP,1
subi rDP,rDP,1
ppc_crypt_ctr_xorbyte:
lbzu rW4,1(rIP) /* bytewise xor for partial block */
lbzu rW5,1(rSP)
xor rW4,rW4,rW5
stbu rW4,1(rDP)
bdnz ppc_crypt_ctr_xorbyte
subf rIP,rLN,rIP
addi rIP,rIP,1
addic rI3,rI3,1
addze rI2,rI2
addze rI1,rI1
addze rI0,rI0
ppc_crypt_ctr_end:
SAVE_IV(rI0, 0)
SAVE_IV(rI1, 4)
SAVE_IV(rI2, 8)
SAVE_IV(rI3, 12)
FINALIZE_CRYPT(4)
blr
/*
* ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
* u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
*
* called from glue layer to encrypt multiple blocks via XTS
* If key_twk is given, the initial IV encryption will be
* processed too. Round values are AES128 = 4, AES192 = 5,
* AES256 = 6
*
*/
_GLOBAL(ppc_encrypt_xts)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
LOAD_IV(rI0, 0)
LOAD_IV(rI1, 4)
LOAD_IV(rI2, 8)
cmpwi rKT,0
LOAD_IV(rI3, 12)
bt eq,ppc_encrypt_xts_notweak
mr rKP,rKT
START_KEY(rI0, rI1, rI2, rI3)
bl ppc_encrypt_block
xor rI0,rD0,rW0
xor rI1,rD1,rW1
xor rI2,rD2,rW2
xor rI3,rD3,rW3
ppc_encrypt_xts_notweak:
ENDIAN_SWAP(rG0, rG1, rI0, rI1)
ENDIAN_SWAP(rG2, rG3, rI2, rI3)
ppc_encrypt_xts_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
xor rD0,rD0,rI0
xor rD1,rD1,rI1
xor rD2,rD2,rI2
xor rD3,rD3,rI3
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_encrypt_block
xor rD0,rD0,rW0
xor rD1,rD1,rW1
xor rD2,rD2,rW2
xor rD3,rD3,rW3
xor rD0,rD0,rI0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rI1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rI2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rI3
SAVE_DATA(rD3, 12)
GF128_MUL(rG0, rG1, rG2, rG3, rW0)
ENDIAN_SWAP(rI0, rI1, rG0, rG1)
ENDIAN_SWAP(rI2, rI3, rG2, rG3)
cmpwi rLN,0
NEXT_BLOCK
bt gt,ppc_encrypt_xts_loop
START_IV
SAVE_IV(rI0, 0)
SAVE_IV(rI1, 4)
SAVE_IV(rI2, 8)
SAVE_IV(rI3, 12)
FINALIZE_CRYPT(8)
blr
/*
* ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
* u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
*
* called from glue layer to decrypt multiple blocks via XTS
* If key_twk is given, the initial IV encryption will be
* processed too. Round values are AES128 = 4, AES192 = 5,
* AES256 = 6
*
*/
_GLOBAL(ppc_decrypt_xts)
INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
LOAD_IV(rI0, 0)
addi rT1,rT0,4096
LOAD_IV(rI1, 4)
LOAD_IV(rI2, 8)
cmpwi rKT,0
LOAD_IV(rI3, 12)
bt eq,ppc_decrypt_xts_notweak
subi rT0,rT0,4096
mr rKP,rKT
START_KEY(rI0, rI1, rI2, rI3)
bl ppc_encrypt_block
xor rI0,rD0,rW0
xor rI1,rD1,rW1
xor rI2,rD2,rW2
xor rI3,rD3,rW3
addi rT0,rT0,4096
ppc_decrypt_xts_notweak:
ENDIAN_SWAP(rG0, rG1, rI0, rI1)
ENDIAN_SWAP(rG2, rG3, rI2, rI3)
ppc_decrypt_xts_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
xor rD0,rD0,rI0
xor rD1,rD1,rI1
xor rD2,rD2,rI2
xor rD3,rD3,rI3
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
xor rD0,rD0,rW0
xor rD1,rD1,rW1
xor rD2,rD2,rW2
xor rD3,rD3,rW3
xor rD0,rD0,rI0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rI1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rI2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rI3
SAVE_DATA(rD3, 12)
GF128_MUL(rG0, rG1, rG2, rG3, rW0)
ENDIAN_SWAP(rI0, rI1, rG0, rG1)
ENDIAN_SWAP(rI2, rI3, rG2, rG3)
cmpwi rLN,0
NEXT_BLOCK
bt gt,ppc_decrypt_xts_loop
START_IV
SAVE_IV(rI0, 0)
SAVE_IV(rI1, 4)
SAVE_IV(rI2, 8)
SAVE_IV(rI3, 12)
FINALIZE_CRYPT(8)
blr