[AArch64][2/3] Recognise rev16 operations on SImode and DImode data

* config/aarch64/aarch64.md (rev16<mode>2): New pattern.
       (rev16<mode>2_alt): Likewise.
       * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case.
       * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New.
       (aarch_rev16_shleft_mask_imm_p): Likewise.
       (aarch_rev16_p_1): Likewise.
       (aarch_rev16_p): Likewise.
       * config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern.
       (aarch_rev16_shright_mask_imm_p): Likewise.
       (aarch_rev16_shleft_mask_imm_p): Likewise.

       * gcc.target/aarch64/rev16_1.c: New test.

From-SVN: r209704
This commit is contained in:
Kyrylo Tkachov 2014-04-23 15:26:28 +00:00 committed by Kyrylo Tkachov
parent 9ac05ae590
commit f7d5cf8df3
7 changed files with 194 additions and 0 deletions

View File

@ -1,3 +1,16 @@
2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64.md (rev16<mode>2): New pattern.
(rev16<mode>2_alt): Likewise.
* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case.
* config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New.
(aarch_rev16_shleft_mask_imm_p): Likewise.
(aarch_rev16_p_1): Likewise.
(aarch_rev16_p): Likewise.
* config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern.
(aarch_rev16_shright_mask_imm_p): Likewise.
(aarch_rev16_shleft_mask_imm_p): Likewise.
2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/arm/aarch-common-protos.h (alu_cost_table): Add rev field.

View File

@ -4695,6 +4695,16 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
return false;
case IOR:
if (aarch_rev16_p (x))
{
*cost = COSTS_N_INSNS (1);
if (speed)
*cost += extra_cost->alu.rev;
return true;
}
/* Fall through. */
case XOR:
case AND:
cost_logic:

View File

@ -3253,6 +3253,38 @@
[(set_attr "type" "rev")]
)
;; There are no canonicalisation rules for the position of the lshiftrt, ashift
;; operations within an IOR/AND RTX, therefore we have two patterns matching
;; each valid permutation.
(define_insn "rev16<mode>2"
[(set (match_operand:GPI 0 "register_operand" "=r")
(ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
(const_int 8))
(match_operand:GPI 3 "const_int_operand" "n"))
(and:GPI (lshiftrt:GPI (match_dup 1)
(const_int 8))
(match_operand:GPI 2 "const_int_operand" "n"))))]
"aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode)
&& aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)"
"rev16\\t%<w>0, %<w>1"
[(set_attr "type" "rev")]
)
(define_insn "rev16<mode>2_alt"
[(set (match_operand:GPI 0 "register_operand" "=r")
(ior:GPI (and:GPI (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "r")
(const_int 8))
(match_operand:GPI 2 "const_int_operand" "n"))
(and:GPI (ashift:GPI (match_dup 1)
(const_int 8))
(match_operand:GPI 3 "const_int_operand" "n"))))]
"aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode)
&& aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)"
"rev16\\t%<w>0, %<w>1"
[(set_attr "type" "rev")]
)
;; zero_extend version of above
(define_insn "*bswapsi2_uxtw"
[(set (match_operand:DI 0 "register_operand" "=r")

View File

@ -24,6 +24,9 @@
#define GCC_AARCH_COMMON_PROTOS_H
extern int aarch_crypto_can_dual_issue (rtx, rtx);
extern bool aarch_rev16_p (rtx);
extern bool aarch_rev16_shleft_mask_imm_p (rtx, enum machine_mode);
extern bool aarch_rev16_shright_mask_imm_p (rtx, enum machine_mode);
extern int arm_early_load_addr_dep (rtx, rtx);
extern int arm_early_store_addr_dep (rtx, rtx);
extern int arm_mac_accumulator_is_mul_result (rtx, rtx);

View File

@ -191,6 +191,79 @@ arm_get_set_operands (rtx producer, rtx consumer,
return 0;
}
bool
aarch_rev16_shright_mask_imm_p (rtx val, enum machine_mode mode)
{
return CONST_INT_P (val)
&& INTVAL (val) == trunc_int_for_mode (0xff00ff00ff00ff, mode);
}
bool
aarch_rev16_shleft_mask_imm_p (rtx val, enum machine_mode mode)
{
return CONST_INT_P (val)
&& INTVAL (val) == trunc_int_for_mode (0xff00ff00ff00ff00, mode);
}
static bool
aarch_rev16_p_1 (rtx lhs, rtx rhs, enum machine_mode mode)
{
if (GET_CODE (lhs) == AND
&& GET_CODE (XEXP (lhs, 0)) == ASHIFT
&& CONST_INT_P (XEXP (XEXP (lhs, 0), 1))
&& INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8
&& REG_P (XEXP (XEXP (lhs, 0), 0))
&& CONST_INT_P (XEXP (lhs, 1))
&& GET_CODE (rhs) == AND
&& GET_CODE (XEXP (rhs, 0)) == LSHIFTRT
&& REG_P (XEXP (XEXP (rhs, 0), 0))
&& CONST_INT_P (XEXP (XEXP (rhs, 0), 1))
&& INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8
&& CONST_INT_P (XEXP (rhs, 1))
&& REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0)))
{
rtx lhs_mask = XEXP (lhs, 1);
rtx rhs_mask = XEXP (rhs, 1);
return aarch_rev16_shright_mask_imm_p (rhs_mask, mode)
&& aarch_rev16_shleft_mask_imm_p (lhs_mask, mode);
}
return false;
}
/* Recognise a sequence of bitwise operations corresponding to a rev16 operation.
These will be of the form:
((x >> 8) & 0x00ff00ff)
| ((x << 8) & 0xff00ff00)
for SImode and with similar but wider bitmasks for DImode.
The two sub-expressions of the IOR can appear on either side so check both
permutations with the help of aarch_rev16_p_1 above. */
bool
aarch_rev16_p (rtx x)
{
rtx left_sub_rtx, right_sub_rtx;
bool is_rev = false;
if (GET_CODE (x) != IOR)
return false;
left_sub_rtx = XEXP (x, 0);
right_sub_rtx = XEXP (x, 1);
/* There are no canonicalisation rules for the position of the two shifts
involved in a rev, so try both permutations. */
is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x));
if (!is_rev)
is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x));
return is_rev;
}
/* Return nonzero if the CONSUMER instruction (a load) does need
PRODUCER's value to calculate the address. */
int

View File

@ -1,3 +1,7 @@
2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/aarch64/rev16_1.c: New test.
2014-04-23 Richard Biener <rguenther@suse.de>
PR tree-optimization/60903

View File

@ -0,0 +1,59 @@
/* { dg-options "-O2" } */
/* { dg-do run } */
extern void abort (void);
typedef unsigned int __u32;
__u32
__rev16_32_alt (__u32 x)
{
return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8)
| (((__u32)(x) & (__u32)0x00ff00ffUL) << 8);
}
__u32
__rev16_32 (__u32 x)
{
return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8)
| (((__u32)(x) & (__u32)0xff00ff00UL) >> 8);
}
typedef unsigned long long __u64;
__u64
__rev16_64_alt (__u64 x)
{
return (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8)
| (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8);
}
__u64
__rev16_64 (__u64 x)
{
return (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8)
| (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8);
}
int
main (void)
{
volatile __u32 in32 = 0x12345678;
volatile __u32 expected32 = 0x34127856;
volatile __u64 in64 = 0x1234567890abcdefUL;
volatile __u64 expected64 = 0x34127856ab90efcdUL;
if (__rev16_32 (in32) != expected32)
abort ();
if (__rev16_32_alt (in32) != expected32)
abort ();
if (__rev16_64 (in64) != expected64)
abort ();
if (__rev16_64_alt (in64) != expected64)
abort ();
return 0;
}