[3/3] [AArch64][vect] vec_widen_lshift pattern
Add aarch64 vec_widen_lshift_lo/hi patterns and fix bug it triggers in mid-end. This pattern takes one vector with N elements of size S, shifts each element left by the element width and stores the results as N elements of size 2*s (in 2 result vectors). The aarch64 backend implements this with the shll,shll2 instruction pair. gcc/ChangeLog: * config/aarch64/aarch64-simd.md: Add vec_widen_lshift_hi/lo<mode> patterns. * tree-vect-stmts.c (vectorizable_conversion): Fix for widen_lshift case. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vect-widen-lshift.c: New test.
This commit is contained in:
parent
9fc9573f9a
commit
27842e2a1e
@ -4664,8 +4664,74 @@
|
||||
[(set_attr "type" "neon_sat_shift_reg<q>")]
|
||||
)
|
||||
|
||||
(define_expand "vec_widen_<sur>shiftl_lo_<mode>"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
||||
(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
|
||||
(match_operand:SI 2
|
||||
"aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
|
||||
VSHLL))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
|
||||
emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
|
||||
p, operands[2]));
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
(define_expand "vec_widen_<sur>shiftl_hi_<mode>"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand")
|
||||
(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
|
||||
(match_operand:SI 2
|
||||
"immediate_operand" "i")]
|
||||
VSHLL))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
||||
emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
|
||||
p, operands[2]));
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
;; vshll_n
|
||||
|
||||
(define_insn "aarch64_<sur>shll<mode>_internal"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
||||
(unspec:<VWIDE> [(vec_select:<VHALF>
|
||||
(match_operand:VQW 1 "register_operand" "w")
|
||||
(match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
|
||||
(match_operand:SI 3
|
||||
"aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
|
||||
VSHLL))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
|
||||
return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
|
||||
else
|
||||
return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
|
||||
}
|
||||
[(set_attr "type" "neon_shift_imm_long")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_<sur>shll2<mode>_internal"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
||||
(unspec:<VWIDE> [(vec_select:<VHALF>
|
||||
(match_operand:VQW 1 "register_operand" "w")
|
||||
(match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
|
||||
(match_operand:SI 3
|
||||
"aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
|
||||
VSHLL))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
|
||||
return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
|
||||
else
|
||||
return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
|
||||
}
|
||||
[(set_attr "type" "neon_shift_imm_long")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_<sur>shll_n<mode>"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
|
||||
(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
|
||||
|
62
gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c
Normal file
62
gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c
Normal file
@ -0,0 +1,62 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O3 -save-temps" } */
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#pragma GCC target "+nosve"
|
||||
|
||||
#define ARR_SIZE 1024
|
||||
|
||||
/* Should produce an shll,shll2 pair*/
|
||||
void sshll_opt (int32_t *foo, int16_t *a, int16_t *b)
|
||||
{
|
||||
for( int i = 0; i < ARR_SIZE - 3;i=i+4)
|
||||
{
|
||||
foo[i] = a[i] << 16;
|
||||
foo[i+1] = a[i+1] << 16;
|
||||
foo[i+2] = a[i+2] << 16;
|
||||
foo[i+3] = a[i+3] << 16;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((optimize (0)))
|
||||
void sshll_nonopt (int32_t *foo, int16_t *a, int16_t *b)
|
||||
{
|
||||
for( int i = 0; i < ARR_SIZE - 3;i=i+4)
|
||||
{
|
||||
foo[i] = a[i] << 16;
|
||||
foo[i+1] = a[i+1] << 16;
|
||||
foo[i+2] = a[i+2] << 16;
|
||||
foo[i+3] = a[i+3] << 16;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void __attribute__((optimize (0)))
|
||||
init(uint16_t *a, uint16_t *b)
|
||||
{
|
||||
for( int i = 0; i < ARR_SIZE;i++)
|
||||
{
|
||||
a[i] = i;
|
||||
b[i] = 2*i;
|
||||
}
|
||||
}
|
||||
|
||||
int __attribute__((optimize (0)))
|
||||
main()
|
||||
{
|
||||
uint32_t foo_arr[ARR_SIZE];
|
||||
uint32_t bar_arr[ARR_SIZE];
|
||||
uint16_t a[ARR_SIZE];
|
||||
uint16_t b[ARR_SIZE];
|
||||
|
||||
init(a, b);
|
||||
sshll_opt(foo_arr, a, b);
|
||||
sshll_nonopt(bar_arr, a, b);
|
||||
if (memcmp(foo_arr, bar_arr, ARR_SIZE) != 0)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tshll\t} 1} } */
|
||||
/* { dg-final { scan-assembler-times {\tshll2\t} 1} } */
|
@ -4935,8 +4935,9 @@ vectorizable_conversion (vec_info *vinfo,
|
||||
&vec_oprnds1);
|
||||
if (code == WIDEN_LSHIFT_EXPR)
|
||||
{
|
||||
vec_oprnds1.create (ncopies * ninputs);
|
||||
for (i = 0; i < ncopies * ninputs; ++i)
|
||||
int oprnds_size = vec_oprnds0.length ();
|
||||
vec_oprnds1.create (oprnds_size);
|
||||
for (i = 0; i < oprnds_size; ++i)
|
||||
vec_oprnds1.quick_push (op1);
|
||||
}
|
||||
/* Arguments are ready. Create the new vector stmts. */
|
||||
|
Loading…
Reference in New Issue
Block a user