aarch64: Add combine patterns for right shift and narrow
This adds a simple pattern for combining right shifts and narrows into shifted narrows. i.e. typedef short int16_t; typedef unsigned short uint16_t; void foo (uint16_t * restrict a, int16_t * restrict d, int n) { for( int i = 0; i < n; i++ ) d[i] = (a[i] * a[i]) >> 10; } now generates: .L4: ldr q0, [x0, x3] umull v1.4s, v0.4h, v0.4h umull2 v0.4s, v0.8h, v0.8h shrn v1.4h, v1.4s, 10 shrn2 v1.8h, v0.4s, 10 str q1, [x1, x3] add x3, x3, 16 cmp x4, x3 bne .L4 instead of: .L4: ldr q0, [x0, x3] umull v1.4s, v0.4h, v0.4h umull2 v0.4s, v0.8h, v0.8h sshr v1.4s, v1.4s, 10 sshr v0.4s, v0.4s, 10 xtn v1.4h, v1.4s xtn2 v1.8h, v0.4s str q1, [x1, x3] add x3, x3, 16 cmp x4, x3 bne .L4 Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-simd.md (*aarch64_<srn_op>shrn<mode>_vect, *aarch64_<srn_op>shrn<mode>2_vect_le, *aarch64_<srn_op>shrn<mode>2_vect_be): New. * config/aarch64/iterators.md (srn_op): New. gcc/testsuite/ChangeLog: * gcc.target/aarch64/shrn-combine-1.c: New test. * gcc.target/aarch64/shrn-combine-2.c: New test. * gcc.target/aarch64/shrn-combine-3.c: New test. * gcc.target/aarch64/shrn-combine-4.c: New test.
This commit is contained in:
parent
d98626bf45
commit
e33aef11e1
|
@ -1818,6 +1818,40 @@
|
|||
[(set_attr "type" "neon_shift_imm_narrow_q")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_<srn_op>shrn<mode>_vect"
|
||||
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
|
||||
(truncate:<VNARROWQ>
|
||||
(SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
|
||||
(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
|
||||
"TARGET_SIMD"
|
||||
"shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
|
||||
[(set_attr "type" "neon_shift_imm_narrow_q")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_<srn_op>shrn<mode>2_vect_le"
|
||||
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
|
||||
(vec_concat:<VNARROWQ2>
|
||||
(match_operand:<VNARROWQ> 1 "register_operand" "0")
|
||||
(truncate:<VNARROWQ>
|
||||
(SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w")
|
||||
(match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
|
||||
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
|
||||
"shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
|
||||
[(set_attr "type" "neon_shift_imm_narrow_q")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_<srn_op>shrn<mode>2_vect_be"
|
||||
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
|
||||
(vec_concat:<VNARROWQ2>
|
||||
(truncate:<VNARROWQ>
|
||||
(SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w")
|
||||
(match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
|
||||
(match_operand:<VNARROWQ> 1 "register_operand" "0")))]
|
||||
"TARGET_SIMD && BYTES_BIG_ENDIAN"
|
||||
"shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
|
||||
[(set_attr "type" "neon_shift_imm_narrow_q")]
|
||||
)
|
||||
|
||||
(define_expand "aarch64_shrn<mode>"
|
||||
[(set (match_operand:<VNARROWQ> 0 "register_operand")
|
||||
(truncate:<VNARROWQ>
|
||||
|
|
|
@ -2003,6 +2003,9 @@
|
|||
;; Op prefix for shift right and accumulate.
|
||||
(define_code_attr sra_op [(ashiftrt "s") (lshiftrt "u")])
|
||||
|
||||
;; op prefix for shift right and narrow.
|
||||
(define_code_attr srn_op [(ashiftrt "r") (lshiftrt "")])
|
||||
|
||||
;; Map shift operators onto underlying bit-field instructions
|
||||
(define_code_attr bfshift [(ashift "ubfiz") (ashiftrt "sbfx")
|
||||
(lshiftrt "ubfx") (rotatert "extr")])
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
/* { dg-do assemble } */
|
||||
/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
|
||||
|
||||
#define TYPE char
|
||||
|
||||
void foo (unsigned TYPE * restrict a, TYPE * restrict d, int n)
|
||||
{
|
||||
for( int i = 0; i < n; i++ )
|
||||
d[i] = (a[i] * a[i]) >> 2;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tshrn\t} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tshrn2\t} 1 } } */
|
|
@ -0,0 +1,13 @@
|
|||
/* { dg-do assemble } */
|
||||
/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
|
||||
|
||||
#define TYPE short
|
||||
|
||||
void foo (unsigned TYPE * restrict a, TYPE * restrict d, int n)
|
||||
{
|
||||
for( int i = 0; i < n; i++ )
|
||||
d[i] = (a[i] * a[i]) >> 2;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tshrn\t} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tshrn2\t} 1 } } */
|
|
@ -0,0 +1,13 @@
|
|||
/* { dg-do assemble } */
|
||||
/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
|
||||
|
||||
#define TYPE int
|
||||
|
||||
void foo (unsigned long long * restrict a, TYPE * restrict d, int n)
|
||||
{
|
||||
for( int i = 0; i < n; i++ )
|
||||
d[i] = a[i] >> 3;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tshrn\t} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tshrn2\t} 1 } } */
|
|
@ -0,0 +1,13 @@
|
|||
/* { dg-do assemble } */
|
||||
/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
|
||||
|
||||
#define TYPE long long
|
||||
|
||||
void foo (unsigned TYPE * restrict a, TYPE * restrict d, int n)
|
||||
{
|
||||
for( int i = 0; i < n; i++ )
|
||||
d[i] = (a[i] * a[i]) >> 2;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tshrn\t} } } */
|
||||
/* { dg-final { scan-assembler-not {\tshrn2\t} } } */
|
Loading…
Reference in New Issue