diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5072a03e56b..21d7c1c8498 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,24 @@ +2019-07-09 Sylvia Taylor + + * config/arm/crypto.md: + (crypto_): Redefine aese/aesd pattern with xor. + (crypto_): Remove attribute enabled for aesmc. + (crypto_): Split CRYPTO_BINARY into 2 patterns. + (*aarch32_crypto_aese_fused, *aarch32_crypto_aesd_fused): New. + * config/arm/arm.c + (aarch_macro_fusion_pair_p): Remove aes/aesmc fusion check. + * config/arm/aarch-common-protos.h + (aarch_crypto_can_dual_issue): Remove. + * config/arm/aarch-common.c + (aarch_crypto_can_dual_issue): Likewise. + * config/arm/exynos-m1.md: Remove aese/aesmc fusion. + * config/arm/cortex-a53.md: Likewise. + * config/arm/cortex-a57.md: Likewise. + * config/arm/iterators.md: + (CRYPTO_BINARY): Redefine. + (CRYPTO_UNARY): Removed. + (CRYPTO_AES, CRYPTO_AESMC): New. + 2019-07-09 Richard Biener * tree-ssa-sccvn.c (struct vn_walk_cb_data): Add orig_ref member. diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index 11cd5145bbc..3bf38a104f6 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -24,7 +24,6 @@ #define GCC_AARCH_COMMON_PROTOS_H extern int aarch_accumulator_forwarding (rtx_insn *, rtx_insn *); -extern int aarch_crypto_can_dual_issue (rtx_insn *, rtx_insn *); extern bool aarch_rev16_p (rtx); extern bool aarch_rev16_shleft_mask_imm_p (rtx, machine_mode); extern bool aarch_rev16_shright_mask_imm_p (rtx, machine_mode); diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c index c7af12d4cd1..965a07a43e3 100644 --- a/gcc/config/arm/aarch-common.c +++ b/gcc/config/arm/aarch-common.c @@ -31,46 +31,6 @@ #include "rtl-iter.h" #include "memmodel.h" -/* In ARMv8-A there's a general expectation that AESE/AESMC - and AESD/AESIMC sequences of the form: - - AESE Vn, _ - AESMC Vn, Vn - - will issue both instructions in a single cycle on super-scalar - implementations. This function identifies such pairs. */ - -int -aarch_crypto_can_dual_issue (rtx_insn *producer_insn, rtx_insn *consumer_insn) -{ - rtx producer_set, consumer_set; - rtx producer_src, consumer_src; - - producer_set = single_set (producer_insn); - consumer_set = single_set (consumer_insn); - - producer_src = producer_set ? SET_SRC (producer_set) : NULL; - consumer_src = consumer_set ? SET_SRC (consumer_set) : NULL; - - if (producer_src && consumer_src - && GET_CODE (producer_src) == UNSPEC && GET_CODE (consumer_src) == UNSPEC - && ((XINT (producer_src, 1) == UNSPEC_AESE - && XINT (consumer_src, 1) == UNSPEC_AESMC) - || (XINT (producer_src, 1) == UNSPEC_AESD - && XINT (consumer_src, 1) == UNSPEC_AESIMC))) - { - unsigned int regno = REGNO (SET_DEST (producer_set)); - - /* Before reload the registers are virtual, so the destination of - consumer_set doesn't need to match. */ - - return (REGNO (SET_DEST (consumer_set)) == regno || !reload_completed) - && REGNO (XVECEXP (consumer_src, 0, 0)) == regno; - } - - return 0; -} - /* Return TRUE if X is either an arithmetic shift left, or is a multiplication by a power of two. */ bool diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 820502ac8a0..f42a7b1d4ed 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -30606,10 +30606,6 @@ aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr) if (!arm_macro_fusion_p ()) return false; - if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC - && aarch_crypto_can_dual_issue (prev, curr)) - return true; - if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT && arm_sets_movw_movt_fusible_p (prev_set, curr_set)) return true; diff --git a/gcc/config/arm/cortex-a57.md b/gcc/config/arm/cortex-a57.md index a5f0b10d142..2d96a9cdd5a 100644 --- a/gcc/config/arm/cortex-a57.md +++ b/gcc/config/arm/cortex-a57.md @@ -801,9 +801,3 @@ ;; help. (define_bypass 1 "cortex_a57_*" "cortex_a57_call,cortex_a57_branch") - -;; AESE+AESMC and AESD+AESIMC pairs forward with zero latency -(define_bypass 0 "cortex_a57_crypto_simple" - "cortex_a57_crypto_simple" - "aarch_crypto_can_dual_issue") - diff --git a/gcc/config/arm/crypto.md b/gcc/config/arm/crypto.md index 63d9d9ffa42..bf34f69fc75 100644 --- a/gcc/config/arm/crypto.md +++ b/gcc/config/arm/crypto.md @@ -19,33 +19,76 @@ ;; . -;; When AES/AESMC fusion is enabled we want the register allocation to -;; look like: -;; AESE Vn, _ -;; AESMC Vn, Vn -;; So prefer to tie operand 1 to operand 0 when fusing. - (define_insn "crypto_" - [(set (match_operand: 0 "register_operand" "=w,w") - (unspec: [(match_operand: 1 - "register_operand" "0,w")] - CRYPTO_UNARY))] + [(set (match_operand: 0 "register_operand" "=w") + (unspec: + [(match_operand: 1 "register_operand" "w")] + CRYPTO_AESMC))] "TARGET_CRYPTO" ".\\t%q0, %q1" - [(set_attr "type" "") - (set_attr_alternative "enabled" - [(if_then_else (match_test - "arm_fusion_enabled_p (tune_params::FUSE_AES_AESMC)") - (const_string "yes" ) - (const_string "no")) - (const_string "yes")])] + [(set_attr "type" "")] +) + +(define_insn "crypto_" + [(set (match_operand:V16QI 0 "register_operand" "=w") + (unspec:V16QI + [(xor:V16QI + (match_operand:V16QI 1 "register_operand" "%0") + (match_operand:V16QI 2 "register_operand" "w"))] + CRYPTO_AES))] + "TARGET_CRYPTO" + ".\\t%q0, %q2" + [(set_attr "type" "")] +) + +;; When AESE/AESMC fusion is enabled we really want to keep the two together +;; and enforce the register dependency without scheduling or register +;; allocation messing up the order or introducing moves inbetween. +;; Mash the two together during combine. + +(define_insn "*aarch32_crypto_aese_fused" + [(set (match_operand:V16QI 0 "register_operand" "=w") + (unspec:V16QI + [(unspec:V16QI + [(xor:V16QI + (match_operand:V16QI 1 "register_operand" "%0") + (match_operand:V16QI 2 "register_operand" "w"))] + UNSPEC_AESE)] + UNSPEC_AESMC))] + "TARGET_CRYPTO + && arm_fusion_enabled_p (tune_params::FUSE_AES_AESMC)" + "aese.8\\t%q0, %q2\;aesmc.8\\t%q0, %q0" + [(set_attr "type" "crypto_aese") + (set_attr "length" "8")] +) + +;; When AESD/AESIMC fusion is enabled we really want to keep the two together +;; and enforce the register dependency without scheduling or register +;; allocation messing up the order or introducing moves inbetween. +;; Mash the two together during combine. + +(define_insn "*aarch32_crypto_aesd_fused" + [(set (match_operand:V16QI 0 "register_operand" "=w") + (unspec:V16QI + [(unspec:V16QI + [(xor:V16QI + (match_operand:V16QI 1 "register_operand" "%0") + (match_operand:V16QI 2 "register_operand" "w"))] + UNSPEC_AESD)] + UNSPEC_AESIMC))] + "TARGET_CRYPTO + && arm_fusion_enabled_p (tune_params::FUSE_AES_AESMC)" + "aesd.8\\t%q0, %q2\;aesimc.8\\t%q0, %q0" + [(set_attr "type" "crypto_aese") + (set_attr "length" "8")] ) (define_insn "crypto_" [(set (match_operand: 0 "register_operand" "=w") - (unspec: [(match_operand: 1 "register_operand" "0") - (match_operand: 2 "register_operand" "w")] - CRYPTO_BINARY))] + (unspec: + [(match_operand: 1 "register_operand" "0") + (match_operand: 2 "register_operand" "w")] + CRYPTO_BINARY))] "TARGET_CRYPTO" ".\\t%q0, %q2" [(set_attr "type" "")] diff --git a/gcc/config/arm/exynos-m1.md b/gcc/config/arm/exynos-m1.md index 3d04a52ac3d..150ac85ebc7 100644 --- a/gcc/config/arm/exynos-m1.md +++ b/gcc/config/arm/exynos-m1.md @@ -950,11 +950,6 @@ "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\ exynos_m1_crypto_poly*") -;; AES{D,E}/AESMC pairs can feed each other instantly. -(define_bypass 0 "exynos_m1_crypto_simple" - "exynos_m1_crypto_simple" - "aarch_crypto_can_dual_issue") - ;; Predicted branches take no time, but mispredicted ones take forever anyway. (define_bypass 1 "exynos_m1_*" "exynos_m1_call, exynos_m1_branch") diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 2462b8c87ea..e03a7202417 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -413,10 +413,11 @@ (define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W UNSPEC_CRC32CB UNSPEC_CRC32CH UNSPEC_CRC32CW]) -(define_int_iterator CRYPTO_UNARY [UNSPEC_AESMC UNSPEC_AESIMC]) +(define_int_iterator CRYPTO_AESMC [UNSPEC_AESMC UNSPEC_AESIMC]) -(define_int_iterator CRYPTO_BINARY [UNSPEC_AESD UNSPEC_AESE - UNSPEC_SHA1SU1 UNSPEC_SHA256SU0]) +(define_int_iterator CRYPTO_AES [UNSPEC_AESD UNSPEC_AESE]) + +(define_int_iterator CRYPTO_BINARY [UNSPEC_SHA1SU1 UNSPEC_SHA256SU0]) (define_int_iterator CRYPTO_TERNARY [UNSPEC_SHA1SU0 UNSPEC_SHA256H UNSPEC_SHA256H2 UNSPEC_SHA256SU1]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ce78f7781f5..995076b34d1 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2019-07-09 Sylvia Taylor + + * gcc.target/arm/aes-fuse-1.c: New. + * gcc.target/arm/aes-fuse-2.c: New. + * gcc.target/arm/aes_xor_combine.c: New. + 2019-07-09 Martin Liska * gcc.dg/predict-17.c: Test loop optimizer assumption diff --git a/gcc/testsuite/gcc.target/arm/aes-fuse-1.c b/gcc/testsuite/gcc.target/arm/aes-fuse-1.c new file mode 100644 index 00000000000..27b08aeef7b --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/aes-fuse-1.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ +/* { dg-additional-options "-mcpu=cortex-a72 -O3 -dp" } */ + +#include + +#define AESE(r, v, key) (r = vaeseq_u8 ((v), (key))); +#define AESMC(r, i) (r = vaesmcq_u8 (i)) + +const uint8x16_t zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +uint8x16_t dummy; +uint8x16_t a; +uint8x16_t b; +uint8x16_t c; +uint8x16_t d; +uint8x16_t x; +uint8x16_t y; +uint8x16_t k; + +void foo (void) +{ + AESE (a, a, k); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESE (b, b, k); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESE (c, c, k); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESE (d, d, k); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + + x = x ^ k; + AESE (x, x, zero); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + y = y ^ k; + AESE (y, y, zero); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + + AESMC (d, d); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESMC (c, c); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESMC (b, b); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESMC (a, a); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + + AESMC (y, y); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESMC (x, x); +} + +/* { dg-final { scan-assembler-times "crypto_aese_fused" 6 } } */ +/* { dg-final { scan-assembler-not "veor" } } */ diff --git a/gcc/testsuite/gcc.target/arm/aes-fuse-2.c b/gcc/testsuite/gcc.target/arm/aes-fuse-2.c new file mode 100644 index 00000000000..1266a287531 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/aes-fuse-2.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ +/* { dg-additional-options "-mcpu=cortex-a72 -O3 -dp" } */ + +#include + +#define AESD(r, v, key) (r = vaesdq_u8 ((v), (key))); +#define AESIMC(r, i) (r = vaesimcq_u8 (i)) + +const uint8x16_t zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +uint8x16_t dummy; +uint8x16_t a; +uint8x16_t b; +uint8x16_t c; +uint8x16_t d; +uint8x16_t x; +uint8x16_t y; +uint8x16_t k; + +void foo (void) +{ + AESD (a, a, k); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESD (b, b, k); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESD (c, c, k); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESD (d, d, k); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + + x = x ^ k; + AESD (x, x, zero); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + y = y ^ k; + AESD (y, y, zero); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + + AESIMC (d, d); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESIMC (c, c); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESIMC (b, b); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESIMC (a, a); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + + AESIMC (y, y); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESIMC (x, x); +} + +/* { dg-final { scan-assembler-times "crypto_aesd_fused" 6 } } */ +/* { dg-final { scan-assembler-not "veor" } } */ diff --git a/gcc/testsuite/gcc.target/arm/aes_xor_combine.c b/gcc/testsuite/gcc.target/arm/aes_xor_combine.c new file mode 100644 index 00000000000..17ae1c53e4a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/aes_xor_combine.c @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ +/* { dg-additional-options "-O3" } */ + +#include + +#define AESE(r, v, key) (r = vaeseq_u8 ((v), (key))); +#define AESD(r, v, key) (r = vaesdq_u8 ((v), (key))); + +const uint8x16_t zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +uint8x16_t foo_key_0 (uint8x16_t dummy, uint8x16_t foo, uint8x16_t bar) +{ + dummy = dummy ^ foo; + AESE(dummy, dummy, zero); + dummy = dummy ^ bar; + AESE(dummy, dummy, zero); + + dummy = dummy ^ foo; + AESD(dummy, dummy, zero); + dummy = dummy ^ bar; + AESD(dummy, dummy, zero); + + return dummy; +} + +uint8x16_t foo_data_0 (uint8x16_t dummy, uint8x16_t foo, uint8x16_t bar) +{ + dummy = dummy ^ foo; + AESE(dummy, zero, dummy); + dummy = dummy ^ bar; + AESE(dummy, zero, dummy); + + dummy = dummy ^ foo; + AESD(dummy, zero, dummy); + dummy = dummy ^ bar; + AESD(dummy, zero, dummy); + + return dummy; +} + +/* { dg-final { scan-assembler-not "veor" } } */