aarch64: Model zero-high-half semantics of ADDHN/SUBHN instructions
Model the zero-high-half semantics of the narrowing arithmetic Neon instructions in the aarch64_<sur><addsub>hn<mode> RTL pattern. Modeling these semantics allows for better RTL combinations while also removing some register allocation issues as the compiler now knows that the operation is totally destructive. Add new tests to narrow_zero_high_half.c to verify the benefit of this change. gcc/ChangeLog: 2021-06-14 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/aarch64-simd.md (aarch64_<sur><addsub>hn<mode>): Change to an expander that emits the correct instruction depending on endianness. (aarch64_<sur><addsub>hn<mode>_insn_le): Define. (aarch64_<sur><addsub>hn<mode>_insn_be): Define. gcc/testsuite/ChangeLog: * gcc.target/aarch64/narrow_zero_high_half.c: Add new tests.
This commit is contained in:
parent
d0889b5d37
commit
dbfc149b63
|
@ -4661,16 +4661,53 @@
|
|||
|
||||
;; <r><addsub>hn<q>.
|
||||
|
||||
(define_insn "aarch64_<sur><addsub>hn<mode>"
|
||||
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
|
||||
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
|
||||
(match_operand:VQN 2 "register_operand" "w")]
|
||||
ADDSUBHN))]
|
||||
"TARGET_SIMD"
|
||||
(define_insn "aarch64_<sur><addsub>hn<mode>_insn_le"
|
||||
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
|
||||
(vec_concat:<VNARROWQ2>
|
||||
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
|
||||
(match_operand:VQN 2 "register_operand" "w")]
|
||||
ADDSUBHN)
|
||||
(match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
|
||||
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
|
||||
"<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
|
||||
[(set_attr "type" "neon_<addsub>_halve_narrow_q")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_<sur><addsub>hn<mode>_insn_be"
|
||||
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
|
||||
(vec_concat:<VNARROWQ2>
|
||||
(match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
|
||||
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
|
||||
(match_operand:VQN 2 "register_operand" "w")]
|
||||
ADDSUBHN)))]
|
||||
"TARGET_SIMD && BYTES_BIG_ENDIAN"
|
||||
"<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
|
||||
[(set_attr "type" "neon_<addsub>_halve_narrow_q")]
|
||||
)
|
||||
|
||||
(define_expand "aarch64_<sur><addsub>hn<mode>"
|
||||
[(set (match_operand:<VNARROWQ> 0 "register_operand")
|
||||
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
|
||||
(match_operand:VQN 2 "register_operand")]
|
||||
ADDSUBHN))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_be (tmp, operands[1],
|
||||
operands[2], CONST0_RTX (<VNARROWQ>mode)));
|
||||
else
|
||||
emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_le (tmp, operands[1],
|
||||
operands[2], CONST0_RTX (<VNARROWQ>mode)));
|
||||
|
||||
/* The intrinsic expects a narrow result, so emit a subreg that will get
|
||||
optimized away as appropriate. */
|
||||
emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
|
||||
<VNARROWQ2>mode));
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
(define_insn "aarch64_<sur><addsub>hn2<mode>_insn_le"
|
||||
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
|
||||
(vec_concat:<VNARROWQ2>
|
||||
|
|
|
@ -74,6 +74,42 @@ TEST_UNARY (vqmovn, uint8x16_t, uint16x8_t, u16, u8)
|
|||
TEST_UNARY (vqmovn, uint16x8_t, uint32x4_t, u32, u16)
|
||||
TEST_UNARY (vqmovn, uint32x4_t, uint64x2_t, u64, u32)
|
||||
|
||||
#define TEST_ARITH(name, rettype, intype, fs, rs) \
|
||||
rettype test_ ## name ## _ ## fs ## _zero_high \
|
||||
(intype a, intype b) \
|
||||
{ \
|
||||
return vcombine_ ## rs (name ## _ ## fs (a, b), \
|
||||
vdup_n_ ## rs (0)); \
|
||||
}
|
||||
|
||||
TEST_ARITH (vaddhn, int8x16_t, int16x8_t, s16, s8)
|
||||
TEST_ARITH (vaddhn, int16x8_t, int32x4_t, s32, s16)
|
||||
TEST_ARITH (vaddhn, int32x4_t, int64x2_t, s64, s32)
|
||||
TEST_ARITH (vaddhn, uint8x16_t, uint16x8_t, u16, u8)
|
||||
TEST_ARITH (vaddhn, uint16x8_t, uint32x4_t, u32, u16)
|
||||
TEST_ARITH (vaddhn, uint32x4_t, uint64x2_t, u64, u32)
|
||||
|
||||
TEST_ARITH (vraddhn, int8x16_t, int16x8_t, s16, s8)
|
||||
TEST_ARITH (vraddhn, int16x8_t, int32x4_t, s32, s16)
|
||||
TEST_ARITH (vraddhn, int32x4_t, int64x2_t, s64, s32)
|
||||
TEST_ARITH (vraddhn, uint8x16_t, uint16x8_t, u16, u8)
|
||||
TEST_ARITH (vraddhn, uint16x8_t, uint32x4_t, u32, u16)
|
||||
TEST_ARITH (vraddhn, uint32x4_t, uint64x2_t, u64, u32)
|
||||
|
||||
TEST_ARITH (vsubhn, int8x16_t, int16x8_t, s16, s8)
|
||||
TEST_ARITH (vsubhn, int16x8_t, int32x4_t, s32, s16)
|
||||
TEST_ARITH (vsubhn, int32x4_t, int64x2_t, s64, s32)
|
||||
TEST_ARITH (vsubhn, uint8x16_t, uint16x8_t, u16, u8)
|
||||
TEST_ARITH (vsubhn, uint16x8_t, uint32x4_t, u32, u16)
|
||||
TEST_ARITH (vsubhn, uint32x4_t, uint64x2_t, u64, u32)
|
||||
|
||||
TEST_ARITH (vrsubhn, int8x16_t, int16x8_t, s16, s8)
|
||||
TEST_ARITH (vrsubhn, int16x8_t, int32x4_t, s32, s16)
|
||||
TEST_ARITH (vrsubhn, int32x4_t, int64x2_t, s64, s32)
|
||||
TEST_ARITH (vrsubhn, uint8x16_t, uint16x8_t, u16, u8)
|
||||
TEST_ARITH (vrsubhn, uint16x8_t, uint32x4_t, u32, u16)
|
||||
TEST_ARITH (vrsubhn, uint32x4_t, uint64x2_t, u64, u32)
|
||||
|
||||
/* { dg-final { scan-assembler-not "dup\\t" } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times "\\tshrn\\tv" 6} } */
|
||||
|
@ -88,3 +124,7 @@ TEST_UNARY (vqmovn, uint32x4_t, uint64x2_t, u64, u32)
|
|||
/* { dg-final { scan-assembler-times "\\tsqxtun\\tv" 3} } */
|
||||
/* { dg-final { scan-assembler-times "\\tuqxtn\\tv" 3} } */
|
||||
/* { dg-final { scan-assembler-times "\\tsqxtn\\tv" 3} } */
|
||||
/* { dg-final { scan-assembler-times "\\taddhn\\tv" 6} } */
|
||||
/* { dg-final { scan-assembler-times "\\tsubhn\\tv" 6} } */
|
||||
/* { dg-final { scan-assembler-times "\\trsubhn\\tv" 6} } */
|
||||
/* { dg-final { scan-assembler-times "\\traddhn\\tv" 6} } */
|
||||
|
|
Loading…
Reference in New Issue