[AArch64][2/3] Recognise rev16 operations on SImode and DImode data
* config/aarch64/aarch64.md (rev16<mode>2): New pattern. (rev16<mode>2_alt): Likewise. * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case. * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New. (aarch_rev16_shleft_mask_imm_p): Likewise. (aarch_rev16_p_1): Likewise. (aarch_rev16_p): Likewise. * config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern. (aarch_rev16_shright_mask_imm_p): Likewise. (aarch_rev16_shleft_mask_imm_p): Likewise. * gcc.target/aarch64/rev16_1.c: New test. From-SVN: r209704
This commit is contained in:
parent
9ac05ae590
commit
f7d5cf8df3
|
@ -1,3 +1,16 @@
|
||||||
|
2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||||
|
|
||||||
|
* config/aarch64/aarch64.md (rev16<mode>2): New pattern.
|
||||||
|
(rev16<mode>2_alt): Likewise.
|
||||||
|
* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case.
|
||||||
|
* config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New.
|
||||||
|
(aarch_rev16_shleft_mask_imm_p): Likewise.
|
||||||
|
(aarch_rev16_p_1): Likewise.
|
||||||
|
(aarch_rev16_p): Likewise.
|
||||||
|
* config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern.
|
||||||
|
(aarch_rev16_shright_mask_imm_p): Likewise.
|
||||||
|
(aarch_rev16_shleft_mask_imm_p): Likewise.
|
||||||
|
|
||||||
2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||||
|
|
||||||
* config/arm/aarch-common-protos.h (alu_cost_table): Add rev field.
|
* config/arm/aarch-common-protos.h (alu_cost_table): Add rev field.
|
||||||
|
|
|
@ -4695,6 +4695,16 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
case IOR:
|
case IOR:
|
||||||
|
if (aarch_rev16_p (x))
|
||||||
|
{
|
||||||
|
*cost = COSTS_N_INSNS (1);
|
||||||
|
|
||||||
|
if (speed)
|
||||||
|
*cost += extra_cost->alu.rev;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
/* Fall through. */
|
||||||
case XOR:
|
case XOR:
|
||||||
case AND:
|
case AND:
|
||||||
cost_logic:
|
cost_logic:
|
||||||
|
|
|
@ -3253,6 +3253,38 @@
|
||||||
[(set_attr "type" "rev")]
|
[(set_attr "type" "rev")]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
;; There are no canonicalisation rules for the position of the lshiftrt, ashift
|
||||||
|
;; operations within an IOR/AND RTX, therefore we have two patterns matching
|
||||||
|
;; each valid permutation.
|
||||||
|
|
||||||
|
(define_insn "rev16<mode>2"
|
||||||
|
[(set (match_operand:GPI 0 "register_operand" "=r")
|
||||||
|
(ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
|
||||||
|
(const_int 8))
|
||||||
|
(match_operand:GPI 3 "const_int_operand" "n"))
|
||||||
|
(and:GPI (lshiftrt:GPI (match_dup 1)
|
||||||
|
(const_int 8))
|
||||||
|
(match_operand:GPI 2 "const_int_operand" "n"))))]
|
||||||
|
"aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode)
|
||||||
|
&& aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)"
|
||||||
|
"rev16\\t%<w>0, %<w>1"
|
||||||
|
[(set_attr "type" "rev")]
|
||||||
|
)
|
||||||
|
|
||||||
|
(define_insn "rev16<mode>2_alt"
|
||||||
|
[(set (match_operand:GPI 0 "register_operand" "=r")
|
||||||
|
(ior:GPI (and:GPI (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "r")
|
||||||
|
(const_int 8))
|
||||||
|
(match_operand:GPI 2 "const_int_operand" "n"))
|
||||||
|
(and:GPI (ashift:GPI (match_dup 1)
|
||||||
|
(const_int 8))
|
||||||
|
(match_operand:GPI 3 "const_int_operand" "n"))))]
|
||||||
|
"aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode)
|
||||||
|
&& aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)"
|
||||||
|
"rev16\\t%<w>0, %<w>1"
|
||||||
|
[(set_attr "type" "rev")]
|
||||||
|
)
|
||||||
|
|
||||||
;; zero_extend version of above
|
;; zero_extend version of above
|
||||||
(define_insn "*bswapsi2_uxtw"
|
(define_insn "*bswapsi2_uxtw"
|
||||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||||
|
|
|
@ -24,6 +24,9 @@
|
||||||
#define GCC_AARCH_COMMON_PROTOS_H
|
#define GCC_AARCH_COMMON_PROTOS_H
|
||||||
|
|
||||||
extern int aarch_crypto_can_dual_issue (rtx, rtx);
|
extern int aarch_crypto_can_dual_issue (rtx, rtx);
|
||||||
|
extern bool aarch_rev16_p (rtx);
|
||||||
|
extern bool aarch_rev16_shleft_mask_imm_p (rtx, enum machine_mode);
|
||||||
|
extern bool aarch_rev16_shright_mask_imm_p (rtx, enum machine_mode);
|
||||||
extern int arm_early_load_addr_dep (rtx, rtx);
|
extern int arm_early_load_addr_dep (rtx, rtx);
|
||||||
extern int arm_early_store_addr_dep (rtx, rtx);
|
extern int arm_early_store_addr_dep (rtx, rtx);
|
||||||
extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
|
extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
|
||||||
|
|
|
@ -191,6 +191,79 @@ arm_get_set_operands (rtx producer, rtx consumer,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
aarch_rev16_shright_mask_imm_p (rtx val, enum machine_mode mode)
|
||||||
|
{
|
||||||
|
return CONST_INT_P (val)
|
||||||
|
&& INTVAL (val) == trunc_int_for_mode (0xff00ff00ff00ff, mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
aarch_rev16_shleft_mask_imm_p (rtx val, enum machine_mode mode)
|
||||||
|
{
|
||||||
|
return CONST_INT_P (val)
|
||||||
|
&& INTVAL (val) == trunc_int_for_mode (0xff00ff00ff00ff00, mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static bool
|
||||||
|
aarch_rev16_p_1 (rtx lhs, rtx rhs, enum machine_mode mode)
|
||||||
|
{
|
||||||
|
if (GET_CODE (lhs) == AND
|
||||||
|
&& GET_CODE (XEXP (lhs, 0)) == ASHIFT
|
||||||
|
&& CONST_INT_P (XEXP (XEXP (lhs, 0), 1))
|
||||||
|
&& INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8
|
||||||
|
&& REG_P (XEXP (XEXP (lhs, 0), 0))
|
||||||
|
&& CONST_INT_P (XEXP (lhs, 1))
|
||||||
|
&& GET_CODE (rhs) == AND
|
||||||
|
&& GET_CODE (XEXP (rhs, 0)) == LSHIFTRT
|
||||||
|
&& REG_P (XEXP (XEXP (rhs, 0), 0))
|
||||||
|
&& CONST_INT_P (XEXP (XEXP (rhs, 0), 1))
|
||||||
|
&& INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8
|
||||||
|
&& CONST_INT_P (XEXP (rhs, 1))
|
||||||
|
&& REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0)))
|
||||||
|
|
||||||
|
{
|
||||||
|
rtx lhs_mask = XEXP (lhs, 1);
|
||||||
|
rtx rhs_mask = XEXP (rhs, 1);
|
||||||
|
|
||||||
|
return aarch_rev16_shright_mask_imm_p (rhs_mask, mode)
|
||||||
|
&& aarch_rev16_shleft_mask_imm_p (lhs_mask, mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Recognise a sequence of bitwise operations corresponding to a rev16 operation.
|
||||||
|
These will be of the form:
|
||||||
|
((x >> 8) & 0x00ff00ff)
|
||||||
|
| ((x << 8) & 0xff00ff00)
|
||||||
|
for SImode and with similar but wider bitmasks for DImode.
|
||||||
|
The two sub-expressions of the IOR can appear on either side so check both
|
||||||
|
permutations with the help of aarch_rev16_p_1 above. */
|
||||||
|
|
||||||
|
bool
|
||||||
|
aarch_rev16_p (rtx x)
|
||||||
|
{
|
||||||
|
rtx left_sub_rtx, right_sub_rtx;
|
||||||
|
bool is_rev = false;
|
||||||
|
|
||||||
|
if (GET_CODE (x) != IOR)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
left_sub_rtx = XEXP (x, 0);
|
||||||
|
right_sub_rtx = XEXP (x, 1);
|
||||||
|
|
||||||
|
/* There are no canonicalisation rules for the position of the two shifts
|
||||||
|
involved in a rev, so try both permutations. */
|
||||||
|
is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x));
|
||||||
|
|
||||||
|
if (!is_rev)
|
||||||
|
is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x));
|
||||||
|
|
||||||
|
return is_rev;
|
||||||
|
}
|
||||||
|
|
||||||
/* Return nonzero if the CONSUMER instruction (a load) does need
|
/* Return nonzero if the CONSUMER instruction (a load) does need
|
||||||
PRODUCER's value to calculate the address. */
|
PRODUCER's value to calculate the address. */
|
||||||
int
|
int
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||||
|
|
||||||
|
* gcc.target/aarch64/rev16_1.c: New test.
|
||||||
|
|
||||||
2014-04-23 Richard Biener <rguenther@suse.de>
|
2014-04-23 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
PR tree-optimization/60903
|
PR tree-optimization/60903
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
/* { dg-options "-O2" } */
|
||||||
|
/* { dg-do run } */
|
||||||
|
|
||||||
|
extern void abort (void);
|
||||||
|
|
||||||
|
typedef unsigned int __u32;
|
||||||
|
|
||||||
|
__u32
|
||||||
|
__rev16_32_alt (__u32 x)
|
||||||
|
{
|
||||||
|
return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8)
|
||||||
|
| (((__u32)(x) & (__u32)0x00ff00ffUL) << 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
__u32
|
||||||
|
__rev16_32 (__u32 x)
|
||||||
|
{
|
||||||
|
return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8)
|
||||||
|
| (((__u32)(x) & (__u32)0xff00ff00UL) >> 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef unsigned long long __u64;
|
||||||
|
|
||||||
|
__u64
|
||||||
|
__rev16_64_alt (__u64 x)
|
||||||
|
{
|
||||||
|
return (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8)
|
||||||
|
| (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
__u64
|
||||||
|
__rev16_64 (__u64 x)
|
||||||
|
{
|
||||||
|
return (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8)
|
||||||
|
| (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main (void)
|
||||||
|
{
|
||||||
|
volatile __u32 in32 = 0x12345678;
|
||||||
|
volatile __u32 expected32 = 0x34127856;
|
||||||
|
volatile __u64 in64 = 0x1234567890abcdefUL;
|
||||||
|
volatile __u64 expected64 = 0x34127856ab90efcdUL;
|
||||||
|
|
||||||
|
if (__rev16_32 (in32) != expected32)
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
if (__rev16_32_alt (in32) != expected32)
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
if (__rev16_64 (in64) != expected64)
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
if (__rev16_64_alt (in64) != expected64)
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue