target-ppc: Altivec 2.07: AES Instructions

This patch adds the Vector AES instructions introduced in Power ISA
Version 2.07:

   - Vector AES Cipher (vcipher)
   - Vector AES Cipher Last (vcipherlast)
   - Vector AES Inverse Cipher (vncipher)
   - Vector AES Inverse Cipher Last (vncipherlast)
   - Vector AES SubBytes (vsbox)

Note that the implementation of vncipher deviates from the RTL in
ISA V2.07.  However it does match the verbal description in the
third paragraph.  The RTL will be fixed in ISA V2.07B.  The
implementation here has been tested against actual P8 hardware.

Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
This commit is contained in:
Tom Musta 2014-02-12 15:23:17 -06:00 committed by Alexander Graf
parent e8f7b27b99
commit 557d52fa69
3 changed files with 315 additions and 0 deletions

View File

@ -316,6 +316,12 @@ DEF_HELPER_3(vpmsumh, void, avr, avr, avr)
DEF_HELPER_3(vpmsumw, void, avr, avr, avr)
DEF_HELPER_3(vpmsumd, void, avr, avr, avr)
DEF_HELPER_2(vsbox, void, avr, avr)
DEF_HELPER_3(vcipher, void, avr, avr, avr)
DEF_HELPER_3(vcipherlast, void, avr, avr, avr)
DEF_HELPER_3(vncipher, void, avr, avr, avr)
DEF_HELPER_3(vncipherlast, void, avr, avr, avr)
DEF_HELPER_4(bcdadd, i32, avr, avr, avr, i32)
DEF_HELPER_4(bcdsub, i32, avr, avr, avr, i32)

View File

@ -2338,6 +2338,286 @@ uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
return helper_bcdadd(r, a, &bcopy, ps);
}
static uint8_t SBOX[256] = {
0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5,
0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC,
0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A,
0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B,
0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85,
0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17,
0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88,
0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9,
0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6,
0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94,
0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68,
0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16,
};
static void SubBytes(ppc_avr_t *r, ppc_avr_t *a)
{
int i;
VECTOR_FOR_INORDER_I(i, u8) {
r->u8[i] = SBOX[a->u8[i]];
}
}
static uint8_t InvSBOX[256] = {
0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38,
0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D,
0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2,
0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA,
0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A,
0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA,
0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85,
0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20,
0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31,
0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0,
0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26,
0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D,
};
static void InvSubBytes(ppc_avr_t *r, ppc_avr_t *a)
{
int i;
VECTOR_FOR_INORDER_I(i, u8) {
r->u8[i] = InvSBOX[a->u8[i]];
}
}
static uint8_t ROTL8(uint8_t x, int n)
{
return (x << n) | (x >> (8-n));
}
static inline int BIT8(uint8_t x, int n)
{
return (x & (0x80 >> n)) != 0;
}
static uint8_t GFx02(uint8_t x)
{
return ROTL8(x, 1) ^ (BIT8(x, 0) ? 0x1A : 0);
}
static uint8_t GFx03(uint8_t x)
{
return x ^ ROTL8(x, 1) ^ (BIT8(x, 0) ? 0x1A : 0);
}
static uint8_t GFx09(uint8_t x)
{
uint8_t term2 = ROTL8(x, 3);
uint8_t term3 = (BIT8(x, 0) ? 0x68 : 0) | (BIT8(x, 1) ? 0x14 : 0) |
(BIT8(x, 2) ? 0x02 : 0);
uint8_t term4 = (BIT8(x, 1) ? 0x20 : 0) | (BIT8(x, 2) ? 0x18 : 0);
return x ^ term2 ^ term3 ^ term4;
}
static uint8_t GFx0B(uint8_t x)
{
uint8_t term2 = ROTL8(x, 1);
uint8_t term3 = (x << 3) | (BIT8(x, 0) ? 0x06 : 0) |
(BIT8(x, 2) ? 0x01 : 0);
uint8_t term4 = (BIT8(x, 0) ? 0x70 : 0) | (BIT8(x, 1) ? 0x06 : 0) |
(BIT8(x, 2) ? 0x08 : 0);
uint8_t term5 = (BIT8(x, 1) ? 0x30 : 0) | (BIT8(x, 2) ? 0x02 : 0);
uint8_t term6 = BIT8(x, 2) ? 0x10 : 0;
return x ^ term2 ^ term3 ^ term4 ^ term5 ^ term6;
}
static uint8_t GFx0D(uint8_t x)
{
uint8_t term2 = ROTL8(x, 2);
uint8_t term3 = (x << 3) | (BIT8(x, 1) ? 0x04 : 0) |
(BIT8(x, 2) ? 0x03 : 0);
uint8_t term4 = (BIT8(x, 0) ? 0x58 : 0) | (BIT8(x, 1) ? 0x20 : 0);
uint8_t term5 = (BIT8(x, 1) ? 0x08 : 0) | (BIT8(x, 2) ? 0x10 : 0);
uint8_t term6 = BIT8(x, 2) ? 0x08 : 0;
return x ^ term2 ^ term3 ^ term4 ^ term5 ^ term6;
}
static uint8_t GFx0E(uint8_t x)
{
uint8_t term1 = ROTL8(x, 1);
uint8_t term2 = (x << 2) | (BIT8(x, 2) ? 0x02 : 0) |
(BIT8(x, 1) ? 0x01 : 0);
uint8_t term3 = (x << 3) | (BIT8(x, 1) ? 0x04 : 0) |
(BIT8(x, 2) ? 0x01 : 0);
uint8_t term4 = (BIT8(x, 0) ? 0x40 : 0) | (BIT8(x, 1) ? 0x28 : 0) |
(BIT8(x, 2) ? 0x10 : 0);
uint8_t term5 = (BIT8(x, 2) ? 0x08 : 0);
return term1 ^ term2 ^ term3 ^ term4 ^ term5;
}
#if defined(HOST_WORDS_BIGENDIAN)
#define MCB(x, i, b) ((x)->u8[(i)*4 + (b)])
#else
#define MCB(x, i, b) ((x)->u8[15 - ((i)*4 + (b))])
#endif
static void MixColumns(ppc_avr_t *r, ppc_avr_t *x)
{
int i;
for (i = 0; i < 4; i++) {
MCB(r, i, 0) = GFx02(MCB(x, i, 0)) ^ GFx03(MCB(x, i, 1)) ^
MCB(x, i, 2) ^ MCB(x, i, 3);
MCB(r, i, 1) = MCB(x, i, 0) ^ GFx02(MCB(x, i, 1)) ^
GFx03(MCB(x, i, 2)) ^ MCB(x, i, 3);
MCB(r, i, 2) = MCB(x, i, 0) ^ MCB(x, i, 1) ^
GFx02(MCB(x, i, 2)) ^ GFx03(MCB(x, i, 3));
MCB(r, i, 3) = GFx03(MCB(x, i, 0)) ^ MCB(x, i, 1) ^
MCB(x, i, 2) ^ GFx02(MCB(x, i, 3));
}
}
static void InvMixColumns(ppc_avr_t *r, ppc_avr_t *x)
{
int i;
for (i = 0; i < 4; i++) {
MCB(r, i, 0) = GFx0E(MCB(x, i, 0)) ^ GFx0B(MCB(x, i, 1)) ^
GFx0D(MCB(x, i, 2)) ^ GFx09(MCB(x, i, 3));
MCB(r, i, 1) = GFx09(MCB(x, i, 0)) ^ GFx0E(MCB(x, i, 1)) ^
GFx0B(MCB(x, i, 2)) ^ GFx0D(MCB(x, i, 3));
MCB(r, i, 2) = GFx0D(MCB(x, i, 0)) ^ GFx09(MCB(x, i, 1)) ^
GFx0E(MCB(x, i, 2)) ^ GFx0B(MCB(x, i, 3));
MCB(r, i, 3) = GFx0B(MCB(x, i, 0)) ^ GFx0D(MCB(x, i, 1)) ^
GFx09(MCB(x, i, 2)) ^ GFx0E(MCB(x, i, 3));
}
}
static void ShiftRows(ppc_avr_t *r, ppc_avr_t *x)
{
MCB(r, 0, 0) = MCB(x, 0, 0);
MCB(r, 1, 0) = MCB(x, 1, 0);
MCB(r, 2, 0) = MCB(x, 2, 0);
MCB(r, 3, 0) = MCB(x, 3, 0);
MCB(r, 0, 1) = MCB(x, 1, 1);
MCB(r, 1, 1) = MCB(x, 2, 1);
MCB(r, 2, 1) = MCB(x, 3, 1);
MCB(r, 3, 1) = MCB(x, 0, 1);
MCB(r, 0, 2) = MCB(x, 2, 2);
MCB(r, 1, 2) = MCB(x, 3, 2);
MCB(r, 2, 2) = MCB(x, 0, 2);
MCB(r, 3, 2) = MCB(x, 1, 2);
MCB(r, 0, 3) = MCB(x, 3, 3);
MCB(r, 1, 3) = MCB(x, 0, 3);
MCB(r, 2, 3) = MCB(x, 1, 3);
MCB(r, 3, 3) = MCB(x, 2, 3);
}
static void InvShiftRows(ppc_avr_t *r, ppc_avr_t *x)
{
MCB(r, 0, 0) = MCB(x, 0, 0);
MCB(r, 1, 0) = MCB(x, 1, 0);
MCB(r, 2, 0) = MCB(x, 2, 0);
MCB(r, 3, 0) = MCB(x, 3, 0);
MCB(r, 0, 1) = MCB(x, 3, 1);
MCB(r, 1, 1) = MCB(x, 0, 1);
MCB(r, 2, 1) = MCB(x, 1, 1);
MCB(r, 3, 1) = MCB(x, 2, 1);
MCB(r, 0, 2) = MCB(x, 2, 2);
MCB(r, 1, 2) = MCB(x, 3, 2);
MCB(r, 2, 2) = MCB(x, 0, 2);
MCB(r, 3, 2) = MCB(x, 1, 2);
MCB(r, 0, 3) = MCB(x, 1, 3);
MCB(r, 1, 3) = MCB(x, 2, 3);
MCB(r, 2, 3) = MCB(x, 3, 3);
MCB(r, 3, 3) = MCB(x, 0, 3);
}
#undef MCB
void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
{
SubBytes(r, a);
}
void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
ppc_avr_t vtemp1, vtemp2, vtemp3;
SubBytes(&vtemp1, a);
ShiftRows(&vtemp2, &vtemp1);
MixColumns(&vtemp3, &vtemp2);
r->u64[0] = vtemp3.u64[0] ^ b->u64[0];
r->u64[1] = vtemp3.u64[1] ^ b->u64[1];
}
void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
ppc_avr_t vtemp1, vtemp2;
SubBytes(&vtemp1, a);
ShiftRows(&vtemp2, &vtemp1);
r->u64[0] = vtemp2.u64[0] ^ b->u64[0];
r->u64[1] = vtemp2.u64[1] ^ b->u64[1];
}
void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
/* This differs from what is written in ISA V2.07. The RTL is */
/* incorrect and will be fixed in V2.07B. */
ppc_avr_t vtemp1, vtemp2, vtemp3;
InvShiftRows(&vtemp1, a);
InvSubBytes(&vtemp2, &vtemp1);
vtemp3.u64[0] = vtemp2.u64[0] ^ b->u64[0];
vtemp3.u64[1] = vtemp2.u64[1] ^ b->u64[1];
InvMixColumns(r, &vtemp3);
}
void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
ppc_avr_t vtemp1, vtemp2;
InvShiftRows(&vtemp1, a);
InvSubBytes(&vtemp2, &vtemp1);
r->u64[0] = vtemp2.u64[0] ^ b->u64[0];
r->u64[1] = vtemp2.u64[1] ^ b->u64[1];
}
#undef VECTOR_FOR_INORDER_I
#undef HI_IDX
#undef LO_IDX

View File

@ -7414,6 +7414,30 @@ GEN_VXFORM_DUAL(vsubuhm, PPC_ALTIVEC, PPC_NONE, \
GEN_VXFORM_DUAL(vsubuhs, PPC_ALTIVEC, PPC_NONE, \
bcdsub, PPC_NONE, PPC2_ALTIVEC_207)
static void gen_vsbox(DisasContext *ctx)
{
TCGv_ptr ra, rd;
if (unlikely(!ctx->altivec_enabled)) {
gen_exception(ctx, POWERPC_EXCP_VPU);
return;
}
ra = gen_avr_ptr(rA(ctx->opcode));
rd = gen_avr_ptr(rD(ctx->opcode));
gen_helper_vsbox(rd, ra);
tcg_temp_free_ptr(ra);
tcg_temp_free_ptr(rd);
}
GEN_VXFORM(vcipher, 4, 20)
GEN_VXFORM(vcipherlast, 4, 20)
GEN_VXFORM(vncipher, 4, 21)
GEN_VXFORM(vncipherlast, 4, 21)
GEN_VXFORM_DUAL(vcipher, PPC_NONE, PPC2_ALTIVEC_207,
vcipherlast, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM_DUAL(vncipher, PPC_NONE, PPC2_ALTIVEC_207,
vncipherlast, PPC_NONE, PPC2_ALTIVEC_207)
/*** VSX extension ***/
static inline TCGv_i64 cpu_vsrh(int n)
@ -10669,6 +10693,11 @@ GEN_VXFORM_207(vpmsumh, 4, 17),
GEN_VXFORM_207(vpmsumw, 4, 18),
GEN_VXFORM_207(vpmsumd, 4, 19),
GEN_VXFORM_207(vsbox, 4, 23),
GEN_VXFORM_DUAL(vcipher, vcipherlast, 4, 20, PPC_NONE, PPC2_ALTIVEC_207),
GEN_VXFORM_DUAL(vncipher, vncipherlast, 4, 21, PPC_NONE, PPC2_ALTIVEC_207),
GEN_HANDLER_E(lxsdx, 0x1F, 0x0C, 0x12, 0, PPC_NONE, PPC2_VSX),
GEN_HANDLER_E(lxsiwax, 0x1F, 0x0C, 0x02, 0, PPC_NONE, PPC2_VSX207),
GEN_HANDLER_E(lxsiwzx, 0x1F, 0x0C, 0x00, 0, PPC_NONE, PPC2_VSX207),