aarch64: Add combine patterns for right shift and narrow

This adds a simple pattern for combining right shifts and narrows into
shifted narrows.

i.e.

typedef short int16_t;
typedef unsigned short uint16_t;

void foo (uint16_t * restrict a, int16_t * restrict d, int n)
{
    for( int i = 0; i < n; i++ )
      d[i] = (a[i] * a[i]) >> 10;
}

now generates:

.L4:
        ldr     q0, [x0, x3]
        umull   v1.4s, v0.4h, v0.4h
        umull2  v0.4s, v0.8h, v0.8h
        shrn    v1.4h, v1.4s, 10
        shrn2   v1.8h, v0.4s, 10
        str     q1, [x1, x3]
        add     x3, x3, 16
        cmp     x4, x3
        bne     .L4

instead of:

.L4:
        ldr     q0, [x0, x3]
        umull   v1.4s, v0.4h, v0.4h
        umull2  v0.4s, v0.8h, v0.8h
        sshr    v1.4s, v1.4s, 10
        sshr    v0.4s, v0.4s, 10
        xtn     v1.4h, v1.4s
        xtn2    v1.8h, v0.4s
        str     q1, [x1, x3]
        add     x3, x3, 16
        cmp     x4, x3
        bne     .L4

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64-simd.md (*aarch64_<srn_op>shrn<mode>_vect,
	*aarch64_<srn_op>shrn<mode>2_vect_le,
	*aarch64_<srn_op>shrn<mode>2_vect_be): New.
	* config/aarch64/iterators.md (srn_op): New.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/shrn-combine-1.c: New test.
	* gcc.target/aarch64/shrn-combine-2.c: New test.
	* gcc.target/aarch64/shrn-combine-3.c: New test.
	* gcc.target/aarch64/shrn-combine-4.c: New test.
This commit is contained in:
Tamar Christina 2021-10-20 17:06:31 +01:00
parent d98626bf45
commit e33aef11e1
6 changed files with 89 additions and 0 deletions

View File

@ -1818,6 +1818,40 @@
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
(define_insn "*aarch64_<srn_op>shrn<mode>_vect"
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
(truncate:<VNARROWQ>
(SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
"TARGET_SIMD"
"shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
(define_insn "*aarch64_<srn_op>shrn<mode>2_vect_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 1 "register_operand" "0")
(truncate:<VNARROWQ>
(SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w")
(match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
(define_insn "*aarch64_<srn_op>shrn<mode>2_vect_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(truncate:<VNARROWQ>
(SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w")
(match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
(match_operand:<VNARROWQ> 1 "register_operand" "0")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
(define_expand "aarch64_shrn<mode>"
[(set (match_operand:<VNARROWQ> 0 "register_operand")
(truncate:<VNARROWQ>

View File

@ -2003,6 +2003,9 @@
;; Op prefix for shift right and accumulate.
(define_code_attr sra_op [(ashiftrt "s") (lshiftrt "u")])
;; op prefix for shift right and narrow.
(define_code_attr srn_op [(ashiftrt "r") (lshiftrt "")])
;; Map shift operators onto underlying bit-field instructions
(define_code_attr bfshift [(ashift "ubfiz") (ashiftrt "sbfx")
(lshiftrt "ubfx") (rotatert "extr")])

View File

@ -0,0 +1,13 @@
/* { dg-do assemble } */
/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
#define TYPE char
void foo (unsigned TYPE * restrict a, TYPE * restrict d, int n)
{
for( int i = 0; i < n; i++ )
d[i] = (a[i] * a[i]) >> 2;
}
/* { dg-final { scan-assembler-times {\tshrn\t} 1 } } */
/* { dg-final { scan-assembler-times {\tshrn2\t} 1 } } */

View File

@ -0,0 +1,13 @@
/* { dg-do assemble } */
/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
#define TYPE short
void foo (unsigned TYPE * restrict a, TYPE * restrict d, int n)
{
for( int i = 0; i < n; i++ )
d[i] = (a[i] * a[i]) >> 2;
}
/* { dg-final { scan-assembler-times {\tshrn\t} 1 } } */
/* { dg-final { scan-assembler-times {\tshrn2\t} 1 } } */

View File

@ -0,0 +1,13 @@
/* { dg-do assemble } */
/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
#define TYPE int
void foo (unsigned long long * restrict a, TYPE * restrict d, int n)
{
for( int i = 0; i < n; i++ )
d[i] = a[i] >> 3;
}
/* { dg-final { scan-assembler-times {\tshrn\t} 1 } } */
/* { dg-final { scan-assembler-times {\tshrn2\t} 1 } } */

View File

@ -0,0 +1,13 @@
/* { dg-do assemble } */
/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
#define TYPE long long
void foo (unsigned TYPE * restrict a, TYPE * restrict d, int n)
{
for( int i = 0; i < n; i++ )
d[i] = (a[i] * a[i]) >> 2;
}
/* { dg-final { scan-assembler-not {\tshrn\t} } } */
/* { dg-final { scan-assembler-not {\tshrn2\t} } } */